[05/10] Fix C++ name canonicalizations of character literals

Message ID 20240421-canon-fixes-v1-5-4dc4791d270d@tromey.com
State New
Headers
Series Fix some C++ name canonicalizer problems |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gdb_build--master-aarch64 fail Patch failed to apply
linaro-tcwg-bot/tcwg_gdb_build--master-arm fail Patch failed to apply

Commit Message

Tom Tromey April 21, 2024, 5 p.m. UTC
  The names "void C<(char)1>::m()" and "void C<'\001'>::m()" should
canonicalize to the same string, but currently they do not -- the
former remains unchanged and the latter is transformed to
"void C<(char)'\001'>::m()".

This patch fixes the bug and also adds some unit tests.

Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=16843
---
 gdb/cp-name-parser.y | 49 +++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 43 insertions(+), 6 deletions(-)
  

Comments

John Baldwin April 22, 2024, 5:19 p.m. UTC | #1
On 4/21/24 10:00 AM, Tom Tromey wrote:
> The names "void C<(char)1>::m()" and "void C<'\001'>::m()" should
> canonicalize to the same string, but currently they do not -- the
> former remains unchanged and the latter is transformed to
> "void C<(char)'\001'>::m()".
> 
> This patch fixes the bug and also adds some unit tests.
> 
> Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=16843

Approved-By: John Baldwin <jhb@FreeBSD.org>
  

Patch

diff --git a/gdb/cp-name-parser.y b/gdb/cp-name-parser.y
index 6003ed0b01e..6590194545f 100644
--- a/gdb/cp-name-parser.y
+++ b/gdb/cp-name-parser.y
@@ -44,6 +44,7 @@ 
 #include "cp-support.h"
 #include "c-support.h"
 #include "parser-defs.h"
+#include "gdbsupport/selftest.h"
 
 #define GDB_YY_REMAP_PREFIX cpname
 #include "yy-remap.h"
@@ -1514,6 +1515,7 @@  yylex (YYSTYPE *lvalp, cpname_state *state)
   int c;
   int namelen;
   const char *tokstart;
+  char *copy;
 
  retry:
   state->prev_lexptr = state->lexptr;
@@ -1544,6 +1546,10 @@  yylex (YYSTYPE *lvalp, cpname_state *state)
 	  return ERROR;
 	}
 
+      /* We over-allocate here, but it doesn't really matter . */
+      copy = (char *) obstack_alloc (&state->demangle_info->obstack, 30);
+      xsnprintf (copy, 30, "%d", c);
+
       c = *state->lexptr++;
       if (c != '\'')
 	{
@@ -1551,15 +1557,10 @@  yylex (YYSTYPE *lvalp, cpname_state *state)
 	  return ERROR;
 	}
 
-      /* FIXME: We should refer to a canonical form of the character,
-	 presumably the same one that appears in manglings - the decimal
-	 representation.  But if that isn't in our input then we have to
-	 allocate memory for it somewhere.  */
       lvalp->comp
 	= state->fill_comp (DEMANGLE_COMPONENT_LITERAL,
 			    state->make_builtin_type ("char"),
-			    state->make_name (tokstart,
-					      state->lexptr - tokstart));
+			    state->make_name (copy, strlen (copy)));
 
       return INT;
 
@@ -1970,3 +1971,39 @@  cp_demangled_name_to_comp (const char *demangled_name,
 
   return result;
 }
+
+#if GDB_SELF_TEST
+
+static void
+should_be_the_same (const char *one, const char *two)
+{
+  gdb::unique_xmalloc_ptr<char> cpone = cp_canonicalize_string (one);
+  gdb::unique_xmalloc_ptr<char> cptwo = cp_canonicalize_string (two);
+
+  if (cpone != nullptr)
+    one = cpone.get ();
+  if (cptwo != nullptr)
+    two = cptwo.get ();
+
+  SELF_CHECK (strcmp (one, two) == 0);
+}
+
+static void
+canonicalize_tests ()
+{
+  should_be_the_same ("short int", "short");
+  should_be_the_same ("int short", "short");
+
+  should_be_the_same ("C<(char) 1>::m()", "C<(char) '\\001'>::m()");
+}
+
+#endif
+
+void _initialize_cp_name_parser ();
+void
+_initialize_cp_name_parser ()
+{
+#if GDB_SELF_TEST
+  selftests::register_test ("canonicalize", canonicalize_tests);
+#endif
+}