[02/28] Change ada_decode to preserve upper-case in some situations

Message ID 20250311-search-in-psyms-v1-2-d73d9be20983@tromey.com
State New
Headers
Series Search symbols via quick API |

Commit Message

Tom Tromey March 11, 2025, 2:12 p.m. UTC
  This patch is needed to avoid regressions later in the series.

The issue here is that ada_decode, when called with wide=false, would
act as though the input needed verbatim quoting.  That would happen
because the 'W' character would be passed through; and then a later
loop would reject the result due to that character.

Similarly, with operators=false the upper-case-checking loop would be
skipped, but then some names that did need verbatim quoting would pass
through.

Furthermore I noticed that there isn't a need to distinguish between
the "wide" and "operators" cases -- all callers pass identical values
to both.

This patch cleans up the above, consolidating the parameters and
changing how upper-case detection is handled, so that both the
operator and wide cases pass-through without issue.  I've added new
unit tests for this.
---
 gdb/ada-lang.c            | 83 ++++++++++++++++++++++++++++++++++-------------
 gdb/ada-lang.h            | 15 ++++-----
 gdb/dwarf2/cooked-index.c |  2 +-
 gdb/symtab.h              |  2 +-
 4 files changed, 68 insertions(+), 34 deletions(-)
  

Patch

diff --git a/gdb/ada-lang.c b/gdb/ada-lang.c
index a55ee12ce70d02082e64d85634b87dd27f5a0670..4bb6a808fd8c1a7f8e4b2344fdf935f94c602ed1 100644
--- a/gdb/ada-lang.c
+++ b/gdb/ada-lang.c
@@ -1308,7 +1308,7 @@  convert_from_hex_encoded (std::string &out, const char *str, int n)
 /* See ada-lang.h.  */
 
 std::string
-ada_decode (const char *encoded, bool wrap, bool operators, bool wide)
+ada_decode (const char *encoded, bool wrap, bool translate)
 {
   int i;
   int len0;
@@ -1403,7 +1403,7 @@  ada_decode (const char *encoded, bool wrap, bool operators, bool wide)
   while (i < len0)
     {
       /* Is this a symbol function?  */
-      if (operators && at_start_name && encoded[i] == 'O')
+      if (at_start_name && encoded[i] == 'O')
 	{
 	  int k;
 
@@ -1414,7 +1414,10 @@  ada_decode (const char *encoded, bool wrap, bool operators, bool wide)
 			    op_len - 1) == 0)
 		  && !isalnum (encoded[i + op_len]))
 		{
-		  decoded.append (ada_opname_table[k].decoded);
+		  if (translate)
+		    decoded.append (ada_opname_table[k].decoded);
+		  else
+		    decoded.append (ada_opname_table[k].encoded);
 		  at_start_name = 0;
 		  i += op_len;
 		  break;
@@ -1502,28 +1505,59 @@  ada_decode (const char *encoded, bool wrap, bool operators, bool wide)
 	    i++;
 	}
 
-      if (wide && i < len0 + 3 && encoded[i] == 'U' && isxdigit (encoded[i + 1]))
+      /* Handle wide characters while respecting the arguments to the
+	 function: we may want to copy them verbatim, but in this case
+	 we do not want to register that we've copied an upper-case
+	 character.  */
+      if (i < len0 + 3 && encoded[i] == 'U' && isxdigit (encoded[i + 1]))
 	{
-	  if (convert_from_hex_encoded (decoded, &encoded[i + 1], 2))
+	  if (translate)
 	    {
-	      i += 3;
+	      if (convert_from_hex_encoded (decoded, &encoded[i + 1], 2))
+		{
+		  i += 3;
+		  continue;
+		}
+	    }
+	  else
+	    {
+	      decoded.push_back (encoded[i]);
+	      ++i;
 	      continue;
 	    }
 	}
-      else if (wide && i < len0 + 5 && encoded[i] == 'W' && isxdigit (encoded[i + 1]))
+      else if (i < len0 + 5 && encoded[i] == 'W' && isxdigit (encoded[i + 1]))
 	{
-	  if (convert_from_hex_encoded (decoded, &encoded[i + 1], 4))
+	  if (translate)
+	    {
+	      if (convert_from_hex_encoded (decoded, &encoded[i + 1], 4))
+		{
+		  i += 5;
+		  continue;
+		}
+	    }
+	  else
 	    {
-	      i += 5;
+	      decoded.push_back (encoded[i]);
+	      ++i;
 	      continue;
 	    }
 	}
-      else if (wide && i < len0 + 10 && encoded[i] == 'W' && encoded[i + 1] == 'W'
+      else if (i < len0 + 10 && encoded[i] == 'W' && encoded[i + 1] == 'W'
 	       && isxdigit (encoded[i + 2]))
 	{
-	  if (convert_from_hex_encoded (decoded, &encoded[i + 2], 8))
+	  if (translate)
 	    {
-	      i += 10;
+	      if (convert_from_hex_encoded (decoded, &encoded[i + 2], 8))
+		{
+		  i += 10;
+		  continue;
+		}
+	    }
+	  else
+	    {
+	      decoded.push_back (encoded[i]);
+	      ++i;
 	      continue;
 	    }
 	}
@@ -1550,6 +1584,12 @@  ada_decode (const char *encoded, bool wrap, bool operators, bool wide)
 	  at_start_name = 1;
 	  i += 2;
 	}
+      else if (isupper (encoded[i]) || encoded[i] == ' ')
+	{
+	  /* Decoded names should never contain any uppercase
+	     character.  */
+	  goto Suppress;
+	}
       else
 	{
 	  /* It's a character part of the decoded name, so just copy it
@@ -1559,16 +1599,6 @@  ada_decode (const char *encoded, bool wrap, bool operators, bool wide)
 	}
     }
 
-  /* Decoded names should never contain any uppercase character.
-     Double-check this, and abort the decoding if we find one.  */
-
-  if (operators)
-    {
-      for (i = 0; i < decoded.length(); ++i)
-	if (isupper (decoded[i]) || decoded[i] == ' ')
-	  goto Suppress;
-    }
-
   /* If the compiler added a suffix, append it now.  */
   if (suffix >= 0)
     decoded = decoded + "[" + &encoded[suffix] + "]";
@@ -1594,6 +1624,13 @@  ada_decode_tests ()
   /* This isn't valid, but used to cause a crash.  PR gdb/30639.  The
      result does not really matter very much.  */
   SELF_CHECK (ada_decode ("44") == "44");
+
+  /* Check that the settings used by the DWARF reader have the desired
+     effect.  */
+  SELF_CHECK (ada_decode ("symada__cS", false, false) == "");
+  SELF_CHECK (ada_decode ("pkg__Oxor", false, false) == "pkg.Oxor");
+  SELF_CHECK (ada_decode ("pack__func_W017b", false, false)
+	      == "pack.func_W017b");
 }
 
 #endif
@@ -13311,7 +13348,7 @@  ada_lookup_name_info::ada_lookup_name_info (const lookup_name_info &lookup_name)
       else
 	m_standard_p = false;
 
-      m_decoded_name = ada_decode (m_encoded_name.c_str (), true, false, false);
+      m_decoded_name = ada_decode (m_encoded_name.c_str (), true, false);
 
       /* If the name contains a ".", then the user is entering a fully
 	 qualified entity name, and the match must not be done in wild
diff --git a/gdb/ada-lang.h b/gdb/ada-lang.h
index 3582082a1a1b702595b803072ff9c345b7f3e0f7..a96a1f6e01737b03c6e6dea5024fbdd253647201 100644
--- a/gdb/ada-lang.h
+++ b/gdb/ada-lang.h
@@ -218,16 +218,13 @@  extern const char *ada_decode_symbol (const struct general_symbol_info *);
    simply wrapped in <...>.  If WRAP is false, then the empty string
    will be returned.
 
-   When OPERATORS is false, operator names will not be decoded.  By
-   default, they are decoded, e.g., 'Oadd' will be transformed to
-   '"+"'.
-
-   When WIDE is false, wide characters will be left as-is.  By
-   default, they converted from their hex encoding to the host
-   charset.  */
+   TRANSLATE has two effects.  When true (the default), operator names
+   and wide characters will be decoded.  E.g., 'Oadd' will be
+   transformed to '"+"', and wide characters converted from their hex
+   encoding to the host charset.  When false, these will be left
+   alone.  */
 extern std::string ada_decode (const char *name, bool wrap = true,
-			       bool operators = true,
-			       bool wide = true);
+			       bool translate = true);
 
 extern std::vector<struct block_symbol> ada_lookup_symbol_list
      (const char *, const struct block *, domain_search_flags);
diff --git a/gdb/dwarf2/cooked-index.c b/gdb/dwarf2/cooked-index.c
index 9533a20e6c48cd164f1de853f1071ce5cb00ca88..427b9bbb2f6ce7a9e1339a729b58d2f64286677d 100644
--- a/gdb/dwarf2/cooked-index.c
+++ b/gdb/dwarf2/cooked-index.c
@@ -359,7 +359,7 @@  cooked_index_shard::handle_gnat_encoded_entry
      characters are left as-is.  This is done to make name matching a
      bit simpler; and for wide characters, it means the choice of Ada
      source charset does not affect the indexer directly.  */
-  std::string canonical = ada_decode (entry->name, false, false, false);
+  std::string canonical = ada_decode (entry->name, false, false);
   if (canonical.empty ())
     {
       entry->canonical = entry->name;
diff --git a/gdb/symtab.h b/gdb/symtab.h
index 7927380fca3f115fd43ecdaf683ecc07a0ff22e0..83913b1806f4a5fe39987978bb7059efc606a594 100644
--- a/gdb/symtab.h
+++ b/gdb/symtab.h
@@ -145,7 +145,7 @@  class ada_lookup_name_info final
   std::string m_encoded_name;
 
   /* The decoded lookup name.  This is formed by calling ada_decode
-     with both 'operators' and 'wide' set to false.  */
+     with 'translate' set to false.  */
   std::string m_decoded_name;
 
   /* Whether the user-provided lookup name was Ada encoded.  If so,