Fix slow and non-deterministic behavior of isspace() and tolower()

Message ID 325f6653-a7ae-1242-53f6-b004f6edca37@redhat.com
State New, archived
Headers

Commit Message

Pedro Alves June 21, 2019, 8:12 p.m. UTC
  On 6/21/19 8:19 PM, Shawn Landden wrote:
> I was getting 8% and 6% cpu usage in tolower() and isspace(),
> respectively, waiting for a breakpoint on ppc64el.
> 
> Also, gdb doesn't want non-deterministic behavior here.
> 
> v2: do not touch C namespace
> v3: Turns out there are two places using these in performance-critical
> parts.
>     Follow GNU coding standards.

This doesn't address my comments about
<https://sourceware.org/ml/gdb-patches/2019-06/msg00442.html>.

Something like this.  How does this compare?  Any significant
difference in your benchmark?  And again, how are you benchmarking
this?

From 1bd3203a26f989ba146376842f5b6a78bdfae181 Mon Sep 17 00:00:00 2001
From: Pedro Alves <palves@redhat.com>
Date: Fri, 21 Jun 2019 19:12:30 +0100
Subject: [PATCH] Use ISSPACE etc

---
 gdb/common/gdb-safe-ctype.h | 46 ++++++++++++++++++++++++++++++++++++++++++++
 gdb/utils.c                 | 47 +++++++++++++++++++++++----------------------
 2 files changed, 70 insertions(+), 23 deletions(-)
 create mode 100644 gdb/common/gdb-safe-ctype.h
  

Comments

Shawn Landden June 21, 2019, 8:30 p.m. UTC | #1
On Fri, Jun 21, 2019 at 3:12 PM Pedro Alves <palves@redhat.com> wrote:
>
> On 6/21/19 8:19 PM, Shawn Landden wrote:
> > I was getting 8% and 6% cpu usage in tolower() and isspace(),
> > respectively, waiting for a breakpoint on ppc64el.
> >
> > Also, gdb doesn't want non-deterministic behavior here.
> >
> > v2: do not touch C namespace
> > v3: Turns out there are two places using these in performance-critical
> > parts.
> >     Follow GNU coding standards.
>
> This doesn't address my comments about
> <https://sourceware.org/ml/gdb-patches/2019-06/msg00442.html>.
>
> Something like this.  How does this compare?  Any significant
> difference in your benchmark?  And again, how are you benchmarking
> this?
perf record. goes from like 6-8% to almost nothing (if you remove the static).
>
> From 1bd3203a26f989ba146376842f5b6a78bdfae181 Mon Sep 17 00:00:00 2001
> From: Pedro Alves <palves@redhat.com>
> Date: Fri, 21 Jun 2019 19:12:30 +0100
> Subject: [PATCH] Use ISSPACE etc
>
> ---
>  gdb/common/gdb-safe-ctype.h | 46 ++++++++++++++++++++++++++++++++++++++++++++
>  gdb/utils.c                 | 47 +++++++++++++++++++++++----------------------
>  2 files changed, 70 insertions(+), 23 deletions(-)
>  create mode 100644 gdb/common/gdb-safe-ctype.h
>
> diff --git a/gdb/common/gdb-safe-ctype.h b/gdb/common/gdb-safe-ctype.h
> new file mode 100644
> index 00000000000..01a97b7ce39
> --- /dev/null
> +++ b/gdb/common/gdb-safe-ctype.h
> @@ -0,0 +1,46 @@
> +/* Wrapper around libiberty's safe-ctype.h for GDB, the GNU debugger.
> +
> +   Copyright (C) 2019 Free Software Foundation, Inc.
> +
> +   This file is part of GDB.
> +
> +   This program is free software; you can redistribute it and/or modify
> +   it under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3 of the License, or
> +   (at your option) any later version.
> +
> +   This program is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +   GNU General Public License for more details.
> +
> +   You should have received a copy of the GNU General Public License
> +   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
> +
> +#ifndef GDB_SAFE_CTYPE_H
> +#define GDB_SAFE_CTYPE_H
> +
> +/* After safe-ctype.h is included, we can no longer use the host's
> +   ctype routines.  Trying to do so results in compile errors.  Code
> +   that uses safe-ctype.h that wants to refer to the locale-dependent
> +   ctype functions must call these wrapper versions instead.  */
> +
> +static inline int
> +gdb_isprint (int ch)
> +{
> +  return isprint (ch);
> +}
> +
> +/* readline.h defines these symbols too, but we want libiberty's
> +   versions.  */
> +#undef ISALPHA
> +#undef ISALNUM
> +#undef ISDIGIT
> +#undef ISLOWER
> +#undef ISPRINT
> +#undef ISUPPER
> +#undef ISXDIGIT
> +
> +#include "safe-ctype.h"
> +
> +#endif
> diff --git a/gdb/utils.c b/gdb/utils.c
> index c7922cf7f56..367ea277c7d 100644
> --- a/gdb/utils.c
> +++ b/gdb/utils.c
> @@ -73,6 +73,7 @@
>  #include "common/pathstuff.h"
>  #include "cli/cli-style.h"
>  #include "common/scope-exit.h"
> +#include "common/gdb-safe-ctype.h"
>
>  void (*deprecated_error_begin_hook) (void);
>
> @@ -1057,7 +1058,7 @@ parse_escape (struct gdbarch *gdbarch, const char **string_ptr)
>           while (++count < 3)
>             {
>               c = (**string_ptr);
> -             if (isdigit (c) && c != '8' && c != '9')
> +             if (ISDIGIT (c) && c != '8' && c != '9')
>                 {
>                   (*string_ptr)++;
>                   i *= 8;
> @@ -1985,7 +1986,7 @@ puts_debug (char *prefix, char *string, char *suffix)
>        switch (ch)
>         {
>         default:
> -         if (isprint (ch))
> +         if (gdb_isprint (ch))
>             fputc_unfiltered (ch, gdb_stdlog);
>
>           else
> @@ -2268,7 +2269,7 @@ fprintf_symbol_filtered (struct ui_file *stream, const char *name,
>  static bool
>  valid_identifier_name_char (int ch)
>  {
> -  return (isalnum (ch) || ch == '_');
> +  return (ISALNUM (ch) || ch == '_');
>  }
>
>  /* Skip to end of token, or to END, whatever comes first.  Input is
> @@ -2278,7 +2279,7 @@ static const char *
>  cp_skip_operator_token (const char *token, const char *end)
>  {
>    const char *p = token;
> -  while (p != end && !isspace (*p) && *p != '(')
> +  while (p != end && !ISSPACE (*p) && *p != '(')
>      {
>        if (valid_identifier_name_char (*p))
>         {
> @@ -2332,9 +2333,9 @@ cp_skip_operator_token (const char *token, const char *end)
>  static void
>  skip_ws (const char *&string1, const char *&string2, const char *end_str2)
>  {
> -  while (isspace (*string1))
> +  while (ISSPACE (*string1))
>      string1++;
> -  while (string2 < end_str2 && isspace (*string2))
> +  while (string2 < end_str2 && ISSPACE (*string2))
>      string2++;
>  }
>
> @@ -2396,8 +2397,8 @@ strncmp_iw_with_mode (const char *string1, const char *string2,
>    while (1)
>      {
>        if (skip_spaces
> -         || ((isspace (*string1) && !valid_identifier_name_char (*string2))
> -             || (isspace (*string2) && !valid_identifier_name_char (*string1))))
> +         || ((ISSPACE (*string1) && !valid_identifier_name_char (*string2))
> +             || (ISSPACE (*string2) && !valid_identifier_name_char (*string1))))
>         {
>           skip_ws (string1, string2, end_str2);
>           skip_spaces = false;
> @@ -2430,7 +2431,7 @@ strncmp_iw_with_mode (const char *string1, const char *string2,
>           if (match_for_lcd != NULL && abi_start != string1)
>             match_for_lcd->mark_ignored_range (abi_start, string1);
>
> -         while (isspace (*string1))
> +         while (ISSPACE (*string1))
>             string1++;
>         }
>
> @@ -2455,9 +2456,9 @@ strncmp_iw_with_mode (const char *string1, const char *string2,
>           string1++;
>           string2++;
>
> -         while (isspace (*string1))
> +         while (ISSPACE (*string1))
>             string1++;
> -         while (string2 < end_str2 && isspace (*string2))
> +         while (string2 < end_str2 && ISSPACE (*string2))
>             string2++;
>           continue;
>         }
> @@ -2551,14 +2552,14 @@ strncmp_iw_with_mode (const char *string1, const char *string2,
>        if (case_sensitivity == case_sensitive_on && *string1 != *string2)
>         break;
>        if (case_sensitivity == case_sensitive_off
> -         && (tolower ((unsigned char) *string1)
> -             != tolower ((unsigned char) *string2)))
> +         && (TOLOWER ((unsigned char) *string1)
> +             != TOLOWER ((unsigned char) *string2)))
>         break;
>
>        /* If we see any non-whitespace, non-identifier-name character
>          (any of "()<>*&" etc.), then skip spaces the next time
>          around.  */
> -      if (!isspace (*string1) && !valid_identifier_name_char (*string1))
> +      if (!ISSPACE (*string1) && !valid_identifier_name_char (*string1))
>         skip_spaces = true;
>
>        string1++;
> @@ -2679,16 +2680,16 @@ strcmp_iw_ordered (const char *string1, const char *string2)
>
>        while (*string1 != '\0' && *string2 != '\0')
>         {
> -         while (isspace (*string1))
> +         while (ISSPACE (*string1))
>             string1++;
> -         while (isspace (*string2))
> +         while (ISSPACE (*string2))
>             string2++;
>
>           switch (case_pass)
>           {
>             case case_sensitive_off:
> -             c1 = tolower ((unsigned char) *string1);
> -             c2 = tolower ((unsigned char) *string2);
> +             c1 = TOLOWER ((unsigned char) *string1);
> +             c2 = TOLOWER ((unsigned char) *string2);
>               break;
>             case case_sensitive_on:
>               c1 = *string1;
> @@ -2927,17 +2928,17 @@ string_to_core_addr (const char *my_string)
>  {
>    CORE_ADDR addr = 0;
>
> -  if (my_string[0] == '0' && tolower (my_string[1]) == 'x')
> +  if (my_string[0] == '0' && TOLOWER (my_string[1]) == 'x')
>      {
>        /* Assume that it is in hex.  */
>        int i;
>
>        for (i = 2; my_string[i] != '\0'; i++)
>         {
> -         if (isdigit (my_string[i]))
> +         if (ISDIGIT (my_string[i]))
>             addr = (my_string[i] - '0') + (addr * 16);
> -         else if (isxdigit (my_string[i]))
> -           addr = (tolower (my_string[i]) - 'a' + 0xa) + (addr * 16);
> +         else if (ISXDIGIT (my_string[i]))
> +           addr = (TOLOWER (my_string[i]) - 'a' + 0xa) + (addr * 16);
>           else
>             error (_("invalid hex \"%s\""), my_string);
>         }
> @@ -2949,7 +2950,7 @@ string_to_core_addr (const char *my_string)
>
>        for (i = 0; my_string[i] != '\0'; i++)
>         {
> -         if (isdigit (my_string[i]))
> +         if (ISDIGIT (my_string[i]))
>             addr = (my_string[i] - '0') + (addr * 10);
>           else
>             error (_("invalid decimal \"%s\""), my_string);
> --
> 2.14.5
>
  
Pedro Alves June 21, 2019, 9:14 p.m. UTC | #2
On 6/21/19 9:30 PM, Shawn Landden wrote:
> On Fri, Jun 21, 2019 at 3:12 PM Pedro Alves <palves@redhat.com> wrote:
>> On 6/21/19 8:19 PM, Shawn Landden wrote:
>>> I was getting 8% and 6% cpu usage in tolower() and isspace(),
>>> respectively, waiting for a breakpoint on ppc64el.
>>>
>>> Also, gdb doesn't want non-deterministic behavior here.
>>>
>>> v2: do not touch C namespace
>>> v3: Turns out there are two places using these in performance-critical
>>> parts.
>>>     Follow GNU coding standards.
>> This doesn't address my comments about
>> <https://sourceware.org/ml/gdb-patches/2019-06/msg00442.html>.
>>
>> Something like this.  How does this compare?  Any significant
>> difference in your benchmark?  And again, how are you benchmarking
>> this?
> perf record. 

What was the use case?  Did you have some speficic set of commands?
Some script?  Please be detailed/more specific, so that
others can reproduce it, either now, or in the future when we
look back into this thread.

> goes from like 6-8% to almost nothing (if you remove the static).

What do you mean by "remove the static"?

Thanks,
Pedro Alves
  

Patch

diff --git a/gdb/common/gdb-safe-ctype.h b/gdb/common/gdb-safe-ctype.h
new file mode 100644
index 00000000000..01a97b7ce39
--- /dev/null
+++ b/gdb/common/gdb-safe-ctype.h
@@ -0,0 +1,46 @@ 
+/* Wrapper around libiberty's safe-ctype.h for GDB, the GNU debugger.
+
+   Copyright (C) 2019 Free Software Foundation, Inc.
+
+   This file is part of GDB.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef GDB_SAFE_CTYPE_H
+#define GDB_SAFE_CTYPE_H
+
+/* After safe-ctype.h is included, we can no longer use the host's
+   ctype routines.  Trying to do so results in compile errors.  Code
+   that uses safe-ctype.h that wants to refer to the locale-dependent
+   ctype functions must call these wrapper versions instead.  */
+
+static inline int
+gdb_isprint (int ch)
+{
+  return isprint (ch);
+}
+
+/* readline.h defines these symbols too, but we want libiberty's
+   versions.  */
+#undef ISALPHA
+#undef ISALNUM
+#undef ISDIGIT
+#undef ISLOWER
+#undef ISPRINT
+#undef ISUPPER
+#undef ISXDIGIT
+
+#include "safe-ctype.h"
+
+#endif
diff --git a/gdb/utils.c b/gdb/utils.c
index c7922cf7f56..367ea277c7d 100644
--- a/gdb/utils.c
+++ b/gdb/utils.c
@@ -73,6 +73,7 @@ 
 #include "common/pathstuff.h"
 #include "cli/cli-style.h"
 #include "common/scope-exit.h"
+#include "common/gdb-safe-ctype.h"
 
 void (*deprecated_error_begin_hook) (void);
 
@@ -1057,7 +1058,7 @@  parse_escape (struct gdbarch *gdbarch, const char **string_ptr)
 	  while (++count < 3)
 	    {
 	      c = (**string_ptr);
-	      if (isdigit (c) && c != '8' && c != '9')
+	      if (ISDIGIT (c) && c != '8' && c != '9')
 		{
 		  (*string_ptr)++;
 		  i *= 8;
@@ -1985,7 +1986,7 @@  puts_debug (char *prefix, char *string, char *suffix)
       switch (ch)
 	{
 	default:
-	  if (isprint (ch))
+	  if (gdb_isprint (ch))
 	    fputc_unfiltered (ch, gdb_stdlog);
 
 	  else
@@ -2268,7 +2269,7 @@  fprintf_symbol_filtered (struct ui_file *stream, const char *name,
 static bool
 valid_identifier_name_char (int ch)
 {
-  return (isalnum (ch) || ch == '_');
+  return (ISALNUM (ch) || ch == '_');
 }
 
 /* Skip to end of token, or to END, whatever comes first.  Input is
@@ -2278,7 +2279,7 @@  static const char *
 cp_skip_operator_token (const char *token, const char *end)
 {
   const char *p = token;
-  while (p != end && !isspace (*p) && *p != '(')
+  while (p != end && !ISSPACE (*p) && *p != '(')
     {
       if (valid_identifier_name_char (*p))
 	{
@@ -2332,9 +2333,9 @@  cp_skip_operator_token (const char *token, const char *end)
 static void
 skip_ws (const char *&string1, const char *&string2, const char *end_str2)
 {
-  while (isspace (*string1))
+  while (ISSPACE (*string1))
     string1++;
-  while (string2 < end_str2 && isspace (*string2))
+  while (string2 < end_str2 && ISSPACE (*string2))
     string2++;
 }
 
@@ -2396,8 +2397,8 @@  strncmp_iw_with_mode (const char *string1, const char *string2,
   while (1)
     {
       if (skip_spaces
-	  || ((isspace (*string1) && !valid_identifier_name_char (*string2))
-	      || (isspace (*string2) && !valid_identifier_name_char (*string1))))
+	  || ((ISSPACE (*string1) && !valid_identifier_name_char (*string2))
+	      || (ISSPACE (*string2) && !valid_identifier_name_char (*string1))))
 	{
 	  skip_ws (string1, string2, end_str2);
 	  skip_spaces = false;
@@ -2430,7 +2431,7 @@  strncmp_iw_with_mode (const char *string1, const char *string2,
 	  if (match_for_lcd != NULL && abi_start != string1)
 	    match_for_lcd->mark_ignored_range (abi_start, string1);
 
-	  while (isspace (*string1))
+	  while (ISSPACE (*string1))
 	    string1++;
 	}
 
@@ -2455,9 +2456,9 @@  strncmp_iw_with_mode (const char *string1, const char *string2,
 	  string1++;
 	  string2++;
 
-	  while (isspace (*string1))
+	  while (ISSPACE (*string1))
 	    string1++;
-	  while (string2 < end_str2 && isspace (*string2))
+	  while (string2 < end_str2 && ISSPACE (*string2))
 	    string2++;
 	  continue;
 	}
@@ -2551,14 +2552,14 @@  strncmp_iw_with_mode (const char *string1, const char *string2,
       if (case_sensitivity == case_sensitive_on && *string1 != *string2)
 	break;
       if (case_sensitivity == case_sensitive_off
-	  && (tolower ((unsigned char) *string1)
-	      != tolower ((unsigned char) *string2)))
+	  && (TOLOWER ((unsigned char) *string1)
+	      != TOLOWER ((unsigned char) *string2)))
 	break;
 
       /* If we see any non-whitespace, non-identifier-name character
 	 (any of "()<>*&" etc.), then skip spaces the next time
 	 around.  */
-      if (!isspace (*string1) && !valid_identifier_name_char (*string1))
+      if (!ISSPACE (*string1) && !valid_identifier_name_char (*string1))
 	skip_spaces = true;
 
       string1++;
@@ -2679,16 +2680,16 @@  strcmp_iw_ordered (const char *string1, const char *string2)
 
       while (*string1 != '\0' && *string2 != '\0')
 	{
-	  while (isspace (*string1))
+	  while (ISSPACE (*string1))
 	    string1++;
-	  while (isspace (*string2))
+	  while (ISSPACE (*string2))
 	    string2++;
 
 	  switch (case_pass)
 	  {
 	    case case_sensitive_off:
-	      c1 = tolower ((unsigned char) *string1);
-	      c2 = tolower ((unsigned char) *string2);
+	      c1 = TOLOWER ((unsigned char) *string1);
+	      c2 = TOLOWER ((unsigned char) *string2);
 	      break;
 	    case case_sensitive_on:
 	      c1 = *string1;
@@ -2927,17 +2928,17 @@  string_to_core_addr (const char *my_string)
 {
   CORE_ADDR addr = 0;
 
-  if (my_string[0] == '0' && tolower (my_string[1]) == 'x')
+  if (my_string[0] == '0' && TOLOWER (my_string[1]) == 'x')
     {
       /* Assume that it is in hex.  */
       int i;
 
       for (i = 2; my_string[i] != '\0'; i++)
 	{
-	  if (isdigit (my_string[i]))
+	  if (ISDIGIT (my_string[i]))
 	    addr = (my_string[i] - '0') + (addr * 16);
-	  else if (isxdigit (my_string[i]))
-	    addr = (tolower (my_string[i]) - 'a' + 0xa) + (addr * 16);
+	  else if (ISXDIGIT (my_string[i]))
+	    addr = (TOLOWER (my_string[i]) - 'a' + 0xa) + (addr * 16);
 	  else
 	    error (_("invalid hex \"%s\""), my_string);
 	}
@@ -2949,7 +2950,7 @@  string_to_core_addr (const char *my_string)
 
       for (i = 0; my_string[i] != '\0'; i++)
 	{
-	  if (isdigit (my_string[i]))
+	  if (ISDIGIT (my_string[i]))
 	    addr = (my_string[i] - '0') + (addr * 10);
 	  else
 	    error (_("invalid decimal \"%s\""), my_string);