[RFC,v5,1/9] Convert substitute_path_component to C++

Message ID 20180312153115.47321-2-prudo@linux.vnet.ibm.com
State New, archived
Headers

Commit Message

Philipp Rudo March 12, 2018, 3:31 p.m. UTC
  Simplify the code of utils.c:substiute_path_component by converting it to C++.

gdb/ChangeLog:

	* utils.c (substitute_path_component): Convert to C++.
	* utils.h (substitute_path_componetn): Adjust declatation.
	* auto-load.c (auto_load_expand_dir_vars): Adjust.
---
 gdb/auto-load.c | 19 ++++++++-----------
 gdb/utils.c     | 47 +++++++++++------------------------------------
 gdb/utils.h     | 10 ++++++++--
 3 files changed, 27 insertions(+), 49 deletions(-)
  

Comments

Simon Marchi March 16, 2018, 2:15 a.m. UTC | #1
Hi Philipp,

Just a few comments, but in general I think this patch is good and could
go in separate from the rest.

On 2018-03-12 11:31 AM, Philipp Rudo wrote:
> Simplify the code of utils.c:substiute_path_component by converting it to C++.

s/substiute/substitute/
> 
> gdb/ChangeLog:
> 
> 	* utils.c (substitute_path_component): Convert to C++.
> 	* utils.h (substitute_path_componetn): Adjust declatation.
> 	* auto-load.c (auto_load_expand_dir_vars): Adjust.
> ---
>  gdb/auto-load.c | 19 ++++++++-----------
>  gdb/utils.c     | 47 +++++++++++------------------------------------
>  gdb/utils.h     | 10 ++++++++--
>  3 files changed, 27 insertions(+), 49 deletions(-)
> 
> diff --git a/gdb/auto-load.c b/gdb/auto-load.c
> index 70bddbc862..a7f9635252 100644
> --- a/gdb/auto-load.c
> +++ b/gdb/auto-load.c
> @@ -175,21 +175,18 @@ std::vector<gdb::unique_xmalloc_ptr<char>> auto_load_safe_path_vec;
>     substitute_path_component.  */
>  
>  static std::vector<gdb::unique_xmalloc_ptr<char>>
> -auto_load_expand_dir_vars (const char *string)
> +auto_load_expand_dir_vars (const std::string &string)

All the usages of auto_load_expand_dir_vars pass in a char pointer.  This means that
a temporary std::string is created for the duration of the call (one copy) and another
one is done lower.  I would suggest either to leave the parameter as const char * to
avoid that copy.

>  {
> -  char *s = xstrdup (string);
> -  substitute_path_component (&s, "$datadir", gdb_datadir);
> -  substitute_path_component (&s, "$debugdir", debug_file_directory);
> +  std::string s (string);
> +  substitute_path_component (s, "$datadir", gdb_datadir);
> +  substitute_path_component (s, "$debugdir", debug_file_directory);
>  
> -  if (debug_auto_load && strcmp (s, string) != 0)
> +  if (debug_auto_load && s.compare (string) != 0)

s != string

>      fprintf_unfiltered (gdb_stdlog,
> -			_("auto-load: Expanded $-variables to \"%s\".\n"), s);
> +			_("auto-load: Expanded $-variables to \"%s\".\n"),
> +			s.c_str ());
>  
> -  std::vector<gdb::unique_xmalloc_ptr<char>> dir_vec
> -    = dirnames_to_char_ptr_vec (s);
> -  xfree(s);
> -
> -  return dir_vec;
> +  return dirnames_to_char_ptr_vec (s.c_str ());
>  }
>  
>  /* Update auto_load_safe_path_vec from current AUTO_LOAD_SAFE_PATH.  */
> diff --git a/gdb/utils.c b/gdb/utils.c
> index b99d444a6e..d4f1398d14 100644
> --- a/gdb/utils.c
> +++ b/gdb/utils.c
> @@ -3052,49 +3052,24 @@ make_bpstat_clear_actions_cleanup (void)
>    return make_cleanup (do_bpstat_clear_actions_cleanup, NULL);
>  }
>  
> -/* Substitute all occurences of string FROM by string TO in *STRINGP.  *STRINGP
> -   must come from xrealloc-compatible allocator and it may be updated.  FROM
> -   needs to be delimited by IS_DIR_SEPARATOR or DIRNAME_SEPARATOR (or be
> -   located at the start or end of *STRINGP.  */
> +/* See utils.h.  */
>  
>  void
> -substitute_path_component (char **stringp, const char *from, const char *to)
> +substitute_path_component (std::string &str, const std::string &from,
> +			   const std::string &to)
>  {
> -  char *string = *stringp, *s;
> -  const size_t from_len = strlen (from);
> -  const size_t to_len = strlen (to);
> -
> -  for (s = string;;)
> +  for (size_t pos = str.find (from); pos != std::string::npos;
> +       pos = str.find (from, pos + to.length ()))
>      {
> -      s = strstr (s, from);
> -      if (s == NULL)
> -	break;
> -
> -      if ((s == string || IS_DIR_SEPARATOR (s[-1])
> -	   || s[-1] == DIRNAME_SEPARATOR)
> -          && (s[from_len] == '\0' || IS_DIR_SEPARATOR (s[from_len])
> -	      || s[from_len] == DIRNAME_SEPARATOR))
> +      char start = pos == 0 ? str[0] : str[pos - 1];

I think it would be safe to just not set start if pos == 0, given the condition
below, instead of setting it to an unrelated character.

> +      char end = str[pos + from.length ()];
> +      if ((pos == 0 || IS_DIR_SEPARATOR (start) || start == DIRNAME_SEPARATOR)
> +	  && (end == '\0' || IS_DIR_SEPARATOR (end)
> +	      || end == DIRNAME_SEPARATOR))
>  	{
> -	  char *string_new;
> -
> -	  string_new
> -	    = (char *) xrealloc (string, (strlen (string) + to_len + 1));
> -
> -	  /* Relocate the current S pointer.  */
> -	  s = s - string + string_new;
> -	  string = string_new;
> -
> -	  /* Replace from by to.  */
> -	  memmove (&s[to_len], &s[from_len], strlen (&s[from_len]) + 1);
> -	  memcpy (s, to, to_len);
> -
> -	  s += to_len;
> +	  str.replace (pos, from.length (), to);
>  	}
> -      else
> -	s++;
>      }
> -
> -  *stringp = string;
>  }
>  
>  #ifdef HAVE_WAITPID
> diff --git a/gdb/utils.h b/gdb/utils.h
> index 8ca3eb0369..7e6a39ee82 100644
> --- a/gdb/utils.h
> +++ b/gdb/utils.h
> @@ -298,8 +298,14 @@ extern struct cleanup *make_bpstat_clear_actions_cleanup (void);
>  extern int gdb_filename_fnmatch (const char *pattern, const char *string,
>  				 int flags);
>  
> -extern void substitute_path_component (char **stringp, const char *from,
> -				       const char *to);
> +/* Substitute all occurences of string FROM by string TO in STR.  STR
> +   must come from xrealloc-compatible allocator and it may be updated.  FROM
> +   needs to be delimited by IS_DIR_SEPARATOR or DIRNAME_SEPARATOR (or be
> +   located at the start or end of STR).  */

This comment would need to be updated.

> +
> +extern void substitute_path_component (std::string &str,
> +				       const std::string &from,
> +				       const std::string &to);

I sent a proposal to the list that we avoid using non-const references, and use pointers
instead.  See:

https://sourceware.org/ml/gdb-patches/2018-03/msg00145.html

Here, it would mean using a pointer for the first parameter.  I didn't have any feedback
and therefore there hasn't been any formal decision, but I would suggest changing it to
a pointer for the reasons outlined in that message.

Also, I would suggest keeping the from and to parameters as const char *.  All the usages
pass in char pointers, so having the parameters as string only makes unnecessary copies.

>  
>  std::string ldirname (const char *filename);
>  
> 

I wrote and pushed some unit tests for the function while reviewing, you'll have to fixup
that new call with:

  auto test = [] (std::string s, const char *from, const char *to,
		  const char *expected)
    {
      substitute_path_component (s, from, to);
      SELF_CHECK (s == expected);
    };

Thanks,

Simon
  
Simon Marchi March 17, 2018, 8:10 p.m. UTC | #2
On 2018-03-15 10:15 PM, Simon Marchi wrote:
>> diff --git a/gdb/auto-load.c b/gdb/auto-load.c
>> index 70bddbc862..a7f9635252 100644
>> --- a/gdb/auto-load.c
>> +++ b/gdb/auto-load.c
>> @@ -175,21 +175,18 @@ std::vector<gdb::unique_xmalloc_ptr<char>> auto_load_safe_path_vec;
>>     substitute_path_component.  */
>>  
>>  static std::vector<gdb::unique_xmalloc_ptr<char>>
>> -auto_load_expand_dir_vars (const char *string)
>> +auto_load_expand_dir_vars (const std::string &string)
> 
> All the usages of auto_load_expand_dir_vars pass in a char pointer.  This means that
> a temporary std::string is created for the duration of the call (one copy) and another
> one is done lower.  I would suggest either to leave the parameter as const char * to
> avoid that copy.

I stumbled on the discussion you had with Pedro on the v1 patchset:

  https://sourceware.org/ml/gdb-patches/2017-01/msg00210.html

so I thought it would be good to expand a little bit.  A quote from that thread:

> Thus you should only use one kind of string through out GDB, either char * or
> std::string. And as GDB decided to move to C++ for me std::string is the way you
> should go.

I don't think we should be so dogmatic.  There are times where a "const char *" is
appropriate, others where std::string is appropriate.  A const std::string& is not
appropriate for a function that is potentially called with string literals, since it
will always require a copy.  As mentioned in that thread, backporting std::string_view
would be the way to go.  But until then, I think we should keep using const char *
in those cases.

Simon
  

Patch

diff --git a/gdb/auto-load.c b/gdb/auto-load.c
index 70bddbc862..a7f9635252 100644
--- a/gdb/auto-load.c
+++ b/gdb/auto-load.c
@@ -175,21 +175,18 @@  std::vector<gdb::unique_xmalloc_ptr<char>> auto_load_safe_path_vec;
    substitute_path_component.  */
 
 static std::vector<gdb::unique_xmalloc_ptr<char>>
-auto_load_expand_dir_vars (const char *string)
+auto_load_expand_dir_vars (const std::string &string)
 {
-  char *s = xstrdup (string);
-  substitute_path_component (&s, "$datadir", gdb_datadir);
-  substitute_path_component (&s, "$debugdir", debug_file_directory);
+  std::string s (string);
+  substitute_path_component (s, "$datadir", gdb_datadir);
+  substitute_path_component (s, "$debugdir", debug_file_directory);
 
-  if (debug_auto_load && strcmp (s, string) != 0)
+  if (debug_auto_load && s.compare (string) != 0)
     fprintf_unfiltered (gdb_stdlog,
-			_("auto-load: Expanded $-variables to \"%s\".\n"), s);
+			_("auto-load: Expanded $-variables to \"%s\".\n"),
+			s.c_str ());
 
-  std::vector<gdb::unique_xmalloc_ptr<char>> dir_vec
-    = dirnames_to_char_ptr_vec (s);
-  xfree(s);
-
-  return dir_vec;
+  return dirnames_to_char_ptr_vec (s.c_str ());
 }
 
 /* Update auto_load_safe_path_vec from current AUTO_LOAD_SAFE_PATH.  */
diff --git a/gdb/utils.c b/gdb/utils.c
index b99d444a6e..d4f1398d14 100644
--- a/gdb/utils.c
+++ b/gdb/utils.c
@@ -3052,49 +3052,24 @@  make_bpstat_clear_actions_cleanup (void)
   return make_cleanup (do_bpstat_clear_actions_cleanup, NULL);
 }
 
-/* Substitute all occurences of string FROM by string TO in *STRINGP.  *STRINGP
-   must come from xrealloc-compatible allocator and it may be updated.  FROM
-   needs to be delimited by IS_DIR_SEPARATOR or DIRNAME_SEPARATOR (or be
-   located at the start or end of *STRINGP.  */
+/* See utils.h.  */
 
 void
-substitute_path_component (char **stringp, const char *from, const char *to)
+substitute_path_component (std::string &str, const std::string &from,
+			   const std::string &to)
 {
-  char *string = *stringp, *s;
-  const size_t from_len = strlen (from);
-  const size_t to_len = strlen (to);
-
-  for (s = string;;)
+  for (size_t pos = str.find (from); pos != std::string::npos;
+       pos = str.find (from, pos + to.length ()))
     {
-      s = strstr (s, from);
-      if (s == NULL)
-	break;
-
-      if ((s == string || IS_DIR_SEPARATOR (s[-1])
-	   || s[-1] == DIRNAME_SEPARATOR)
-          && (s[from_len] == '\0' || IS_DIR_SEPARATOR (s[from_len])
-	      || s[from_len] == DIRNAME_SEPARATOR))
+      char start = pos == 0 ? str[0] : str[pos - 1];
+      char end = str[pos + from.length ()];
+      if ((pos == 0 || IS_DIR_SEPARATOR (start) || start == DIRNAME_SEPARATOR)
+	  && (end == '\0' || IS_DIR_SEPARATOR (end)
+	      || end == DIRNAME_SEPARATOR))
 	{
-	  char *string_new;
-
-	  string_new
-	    = (char *) xrealloc (string, (strlen (string) + to_len + 1));
-
-	  /* Relocate the current S pointer.  */
-	  s = s - string + string_new;
-	  string = string_new;
-
-	  /* Replace from by to.  */
-	  memmove (&s[to_len], &s[from_len], strlen (&s[from_len]) + 1);
-	  memcpy (s, to, to_len);
-
-	  s += to_len;
+	  str.replace (pos, from.length (), to);
 	}
-      else
-	s++;
     }
-
-  *stringp = string;
 }
 
 #ifdef HAVE_WAITPID
diff --git a/gdb/utils.h b/gdb/utils.h
index 8ca3eb0369..7e6a39ee82 100644
--- a/gdb/utils.h
+++ b/gdb/utils.h
@@ -298,8 +298,14 @@  extern struct cleanup *make_bpstat_clear_actions_cleanup (void);
 extern int gdb_filename_fnmatch (const char *pattern, const char *string,
 				 int flags);
 
-extern void substitute_path_component (char **stringp, const char *from,
-				       const char *to);
+/* Substitute all occurences of string FROM by string TO in STR.  STR
+   must come from xrealloc-compatible allocator and it may be updated.  FROM
+   needs to be delimited by IS_DIR_SEPARATOR or DIRNAME_SEPARATOR (or be
+   located at the start or end of STR).  */
+
+extern void substitute_path_component (std::string &str,
+				       const std::string &from,
+				       const std::string &to);
 
 std::string ldirname (const char *filename);