[17/40] Linespec lexing and C++ operators

Message ID 1496406158-12663-18-git-send-email-palves@redhat.com
State New, archived
Headers

Commit Message

Pedro Alves June 2, 2017, 12:22 p.m. UTC
  There's some lexing code in linespec that isn't handling C++ operators
correctly.  It's the usual confusion with operator< / operator<<, in
code that wants to skip past template parameters.

The linespec_lexer_lex_string change is necessary otherwise we get
this (with current master):

 (gdb) break 'operator<'
 unmatched quote

The need for the find_toplevel_char change was exposed by the use of
that function in the explicit location completer.  Without the fix,
that completer is not able to "see" past operator< symbols, without
quoting, like:

 (gdb) b -function operator<(int, int) -labe[TAB]    # nothing happens

gdb incorrectly thinks "-labe" is part of the "unclosed" template
parameter list started with "<".

gdb/ChangeLog:
yyyy-mm-dd  Pedro Alves  <palves@redhat.com>

	* linespec.c (linespec_lexer_lex_string, find_toplevel_char):
	Handle 'operator<' / 'operator<<'.
---
 gdb/linespec.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 82 insertions(+), 6 deletions(-)
  

Comments

Keith Seitz July 14, 2017, 9:45 p.m. UTC | #1
On 06/02/2017 05:22 AM, Pedro Alves wrote:
> There's some lexing code in linespec that isn't handling C++ operators
> correctly.  It's the usual confusion with operator< / operator<<, in
> code that wants to skip past template parameters.
> 
> The linespec_lexer_lex_string change is necessary otherwise we get
> this (with current master):
> 
>  (gdb) break 'operator<'
>  unmatched quote
> 
> The need for the find_toplevel_char change was exposed by the use of
> that function in the explicit location completer.  Without the fix,
> that completer is not able to "see" past operator< symbols, without
> quoting, like:
> 
>  (gdb) b -function operator<(int, int) -labe[TAB]    # nothing happens
> 
> gdb incorrectly thinks "-labe" is part of the "unclosed" template
> parameter list started with "<".

Ouch. The best laid plans...

Just a few trivial things.

> diff --git a/gdb/linespec.c b/gdb/linespec.c
> index 0216bf1..f24cca2 100644
> --- a/gdb/linespec.c
> +++ b/gdb/linespec.c
> @@ -674,14 +674,49 @@ linespec_lexer_lex_string (linespec_parser *parser)
>  	  else if (*PARSER_STREAM (parser) == '<'
>  		   || *PARSER_STREAM (parser) == '(')
>  	    {
> -	      const char *p;
> +	      /* Don't interpret 'operator<' / 'operator<<' as a
> +		 template parameter list though.  */
> +	      if (*PARSER_STREAM (parser) == '<'
> +		  && (PARSER_STATE (parser)->language->la_language
> +		      == language_cplus)
> +		  && (PARSER_STREAM (parser) - start) >= CP_OPERATOR_LEN)
> +		{
> +		  const char *p = PARSER_STREAM (parser);
> +
> +		  while (p > start && isspace (p[-1]))
> +		    p--;
> +		  if (p - start >= CP_OPERATOR_LEN)
> +		    {
> +		      p-= CP_OPERATOR_LEN;

This is a cut-n-paste-o (that's a typo propagated via cut-n-paste). Missing a space before the operator.

> +		      if (strncmp (p, CP_OPERATOR_STR, CP_OPERATOR_LEN) == 0
> +			  && (p == start
> +			      || !(isalnum (p[-1]) || p[-1] == '_')))
> +			{
> +			  /* This is an operator name.  Keep going.  */
> +			  ++(PARSER_STREAM (parser));
> +			  if (*PARSER_STREAM (parser) == '<')
> +			    ++(PARSER_STREAM (parser));
> +			  continue;
> +			}
> +		    }
> +		}
>  
> -	      p = find_parameter_list_end (PARSER_STREAM (parser));
> -	      if (p != NULL)
> +	      const char *p = find_parameter_list_end (PARSER_STREAM (parser));
> +	      PARSER_STREAM (parser) = p;
> +
> +	      /* Don't loop around to the normal \0 case above because
> +		 we don't want to misinterpret a potential keyword at
> +		 the end of the token when the string isn't
> +		 "()<>"-balanced.  This handles "b
> +		 function(thread<tab>" in completion mode.  */
> +	      if (*p == '\0')
>  		{
> -		  PARSER_STREAM (parser) = p;
> -		  continue;
> +		  LS_TOKEN_STOKEN (token).ptr = start;
> +		  LS_TOKEN_STOKEN (token).length = PARSER_STREAM (parser) - start;

Line length > 80?

> +		  return token;
>  		}
> +	      else
> +		continue;
>  	    }
>  	  /* Commas are terminators, but not if they are part of an
>  	     operator name.  */

Keith
  
Pedro Alves July 17, 2017, 7:33 p.m. UTC | #2
On 07/14/2017 10:45 PM, Keith Seitz wrote:
> On 06/02/2017 05:22 AM, Pedro Alves wrote:

>>
>>  (gdb) b -function operator<(int, int) -labe[TAB]    # nothing happens
>>
>> gdb incorrectly thinks "-labe" is part of the "unclosed" template
>> parameter list started with "<".
> 
> Ouch. The best laid plans...

:-)

>> +		    p--;
>> +		  if (p - start >= CP_OPERATOR_LEN)
>> +		    {
>> +		      p-= CP_OPERATOR_LEN;
> 
> This is a cut-n-paste-o (that's a typo propagated via cut-n-paste). Missing a space before the operator.

Whoops.

>> +		  LS_TOKEN_STOKEN (token).ptr = start;
>> +		  LS_TOKEN_STOKEN (token).length = PARSER_STREAM (parser) - start;
> 
> Line length > 80?

Indeed.

Pushed with the obvious fixes.

Thanks,
Pedro Alves
  

Patch

diff --git a/gdb/linespec.c b/gdb/linespec.c
index 0216bf1..f24cca2 100644
--- a/gdb/linespec.c
+++ b/gdb/linespec.c
@@ -674,14 +674,49 @@  linespec_lexer_lex_string (linespec_parser *parser)
 	  else if (*PARSER_STREAM (parser) == '<'
 		   || *PARSER_STREAM (parser) == '(')
 	    {
-	      const char *p;
+	      /* Don't interpret 'operator<' / 'operator<<' as a
+		 template parameter list though.  */
+	      if (*PARSER_STREAM (parser) == '<'
+		  && (PARSER_STATE (parser)->language->la_language
+		      == language_cplus)
+		  && (PARSER_STREAM (parser) - start) >= CP_OPERATOR_LEN)
+		{
+		  const char *p = PARSER_STREAM (parser);
+
+		  while (p > start && isspace (p[-1]))
+		    p--;
+		  if (p - start >= CP_OPERATOR_LEN)
+		    {
+		      p-= CP_OPERATOR_LEN;
+		      if (strncmp (p, CP_OPERATOR_STR, CP_OPERATOR_LEN) == 0
+			  && (p == start
+			      || !(isalnum (p[-1]) || p[-1] == '_')))
+			{
+			  /* This is an operator name.  Keep going.  */
+			  ++(PARSER_STREAM (parser));
+			  if (*PARSER_STREAM (parser) == '<')
+			    ++(PARSER_STREAM (parser));
+			  continue;
+			}
+		    }
+		}
 
-	      p = find_parameter_list_end (PARSER_STREAM (parser));
-	      if (p != NULL)
+	      const char *p = find_parameter_list_end (PARSER_STREAM (parser));
+	      PARSER_STREAM (parser) = p;
+
+	      /* Don't loop around to the normal \0 case above because
+		 we don't want to misinterpret a potential keyword at
+		 the end of the token when the string isn't
+		 "()<>"-balanced.  This handles "b
+		 function(thread<tab>" in completion mode.  */
+	      if (*p == '\0')
 		{
-		  PARSER_STREAM (parser) = p;
-		  continue;
+		  LS_TOKEN_STOKEN (token).ptr = start;
+		  LS_TOKEN_STOKEN (token).length = PARSER_STREAM (parser) - start;
+		  return token;
 		}
+	      else
+		continue;
 	    }
 	  /* Commas are terminators, but not if they are part of an
 	     operator name.  */
@@ -1112,7 +1147,7 @@  find_methods (struct type *t, const char *name,
 /* Find an instance of the character C in the string S that is outside
    of all parenthesis pairs, single-quoted strings, and double-quoted
    strings.  Also, ignore the char within a template name, like a ','
-   within foo<int, int>.  */
+   within foo<int, int>, while considering C++ operator</operator<<.  */
 
 const char *
 find_toplevel_char (const char *s, char c)
@@ -1140,6 +1175,47 @@  find_toplevel_char (const char *s, char c)
 	depth++;
       else if ((*scan == ')' || *scan == '>') && depth > 0)
 	depth--;
+      else if (*scan == 'o' && !quoted && depth == 0)
+	{
+	  /* Handle C++ operator names.  */
+	  if (strncmp (scan, CP_OPERATOR_STR, CP_OPERATOR_LEN) == 0)
+	    {
+	      scan += CP_OPERATOR_LEN;
+	      if (*scan == c)
+		return scan;
+	      while (isspace (*scan))
+		{
+		  ++scan;
+		  if (*scan == c)
+		    return scan;
+		}
+	      if (*scan == '\0')
+		break;
+
+	      switch (*scan)
+		{
+		  /* Skip over one less than the appropriate number of
+		     characters: the for loop will skip over the last
+		     one.  */
+		case '<':
+		  if (scan[1] == '<')
+		    {
+		      scan++;
+		      if (*scan == c)
+			return scan;
+		    }
+		  break;
+		case '>':
+		  if (scan[1] == '>')
+		    {
+		      scan++;
+		      if (*scan == c)
+			return scan;
+		    }
+		  break;
+		}
+	    }
+	}
     }
 
   return 0;