libcpp: Fix valgrind errors on pr88974.c [PR112956]

Message ID ZXltpRf/CIlKnbxD@tucnak
State New
Headers
Series libcpp: Fix valgrind errors on pr88974.c [PR112956] |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm fail Patch failed to apply
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 fail Patch failed to apply

Commit Message

Jakub Jelinek Dec. 13, 2023, 8:39 a.m. UTC
  Hi!

On the c-c++-common/cpp/pr88974.c testcase I'm seeing
==600549== Conditional jump or move depends on uninitialised value(s)
==600549==    at 0x1DD3A05: cpp_get_token_1(cpp_reader*, unsigned int*) (macro.cc:3050)
==600549==    by 0x1DBFC7F: _cpp_parse_expr (expr.cc:1392)
==600549==    by 0x1DB9471: do_if(cpp_reader*) (directives.cc:2087)
==600549==    by 0x1DBB4D8: _cpp_handle_directive (directives.cc:572)
==600549==    by 0x1DCD488: _cpp_lex_token (lex.cc:3682)
==600549==    by 0x1DD3A97: cpp_get_token_1(cpp_reader*, unsigned int*) (macro.cc:2936)
==600549==    by 0x7F7EE4: scan_translation_unit (c-ppoutput.cc:350)
==600549==    by 0x7F7EE4: preprocess_file(cpp_reader*) (c-ppoutput.cc:106)
==600549==    by 0x7F6235: c_common_init() (c-opts.cc:1280)
==600549==    by 0x704C8B: lang_dependent_init (toplev.cc:1837)
==600549==    by 0x704C8B: do_compile (toplev.cc:2135)
==600549==    by 0x704C8B: toplev::main(int, char**) (toplev.cc:2306)
==600549==    by 0x7064BA: main (main.cc:39)
error.  The problem is that _cpp_lex_direct can leave result->src_loc
uninitialized in some cases and later on we use that location_t.

_cpp_lex_direct essentially does:
  cppchar_t c;
...
  cpp_token *result = pfile->cur_token++;

 fresh_line:
  result->flags = 0;
...
  if (buffer->need_line)
    {
      if (pfile->state.in_deferred_pragma)
	{
	  result->type = CPP_PRAGMA_EOL;
	  ... // keeps result->src_loc uninitialized;
	  return result;
	}
      if (!_cpp_get_fresh_line (pfile))
        {
          result->type = CPP_EOF;
          if (!pfile->state.in_directive && !pfile->state.parsing_args)
	    {
	      result->src_loc = pfile->line_table->highest_line;
	      ...
	    }
	  ... // otherwise result->src_loc is sometimes uninitialized here
	  return result;
	}
      ...
    }
...
  result->src_loc = pfile->line_table->highest_line;
...
  c = *buffer->cur++;
  switch (c)
    {
...
    case '\n':
...
      buffer->need_line = true;
      if (pfile->state.in_deferred_pragma)
        {
          result->type = CPP_PRAGMA_EOL;
...
	  return result;
	}
      goto fresh_line;
...
    }
...
So, if _cpp_lex_direct is called without buffer->need_line initially set,
result->src_loc is always initialized (and actually hundreds of tests rely
on that exact value it has), even when c == '\n' and we set that flag later
on and goto fresh_line.  For CPP_PRAGMA_EOL case we have in that case
separate handling and don't goto.
But if _cpp_lex_direct is called with buffer->need_line initially set and
either decide to return a CPP_PRAGMA_EOL token or if getting a new line fails
for some reason and we return an CPP_ERROR token and we are in directive
or parsing args state, it is kept uninitialized and can be whatever the
allocation left it there as.

The following patch attempts to keep the status quo, use value that was
returned previously if it was initialized (i.e. we went through the
goto fresh_line; statement in c == '\n' handling) and only initialize
result->src_loc if it was uninitialized before.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2023-12-13  Jakub Jelinek  <jakub@redhat.com>

	PR preprocessor/112956
	* lex.cc (_cpp_lex_direct): Initialize c to 0.
	For CPP_PRAGMA_EOL tokens and if c == 0 also for CPP_EOF
	set result->src_loc to highest locus.


	Jakub
  

Comments

Jason Merrill Dec. 13, 2023, 8:10 p.m. UTC | #1
On 12/13/23 03:39, Jakub Jelinek wrote:
> Hi!
> 
> On the c-c++-common/cpp/pr88974.c testcase I'm seeing
> ==600549== Conditional jump or move depends on uninitialised value(s)
> ==600549==    at 0x1DD3A05: cpp_get_token_1(cpp_reader*, unsigned int*) (macro.cc:3050)
> ==600549==    by 0x1DBFC7F: _cpp_parse_expr (expr.cc:1392)
> ==600549==    by 0x1DB9471: do_if(cpp_reader*) (directives.cc:2087)
> ==600549==    by 0x1DBB4D8: _cpp_handle_directive (directives.cc:572)
> ==600549==    by 0x1DCD488: _cpp_lex_token (lex.cc:3682)
> ==600549==    by 0x1DD3A97: cpp_get_token_1(cpp_reader*, unsigned int*) (macro.cc:2936)
> ==600549==    by 0x7F7EE4: scan_translation_unit (c-ppoutput.cc:350)
> ==600549==    by 0x7F7EE4: preprocess_file(cpp_reader*) (c-ppoutput.cc:106)
> ==600549==    by 0x7F6235: c_common_init() (c-opts.cc:1280)
> ==600549==    by 0x704C8B: lang_dependent_init (toplev.cc:1837)
> ==600549==    by 0x704C8B: do_compile (toplev.cc:2135)
> ==600549==    by 0x704C8B: toplev::main(int, char**) (toplev.cc:2306)
> ==600549==    by 0x7064BA: main (main.cc:39)
> error.  The problem is that _cpp_lex_direct can leave result->src_loc
> uninitialized in some cases and later on we use that location_t.
> 
> _cpp_lex_direct essentially does:
>    cppchar_t c;
> ...
>    cpp_token *result = pfile->cur_token++;
> 
>   fresh_line:
>    result->flags = 0;
> ...
>    if (buffer->need_line)
>      {
>        if (pfile->state.in_deferred_pragma)
> 	{
> 	  result->type = CPP_PRAGMA_EOL;
> 	  ... // keeps result->src_loc uninitialized;
> 	  return result;
> 	}
>        if (!_cpp_get_fresh_line (pfile))
>          {
>            result->type = CPP_EOF;
>            if (!pfile->state.in_directive && !pfile->state.parsing_args)
> 	    {
> 	      result->src_loc = pfile->line_table->highest_line;
> 	      ...
> 	    }
> 	  ... // otherwise result->src_loc is sometimes uninitialized here
> 	  return result;
> 	}
>        ...
>      }
> ...
>    result->src_loc = pfile->line_table->highest_line;
> ...
>    c = *buffer->cur++;
>    switch (c)
>      {
> ...
>      case '\n':
> ...
>        buffer->need_line = true;
>        if (pfile->state.in_deferred_pragma)
>          {
>            result->type = CPP_PRAGMA_EOL;
> ...
> 	  return result;
> 	}
>        goto fresh_line;
> ...
>      }
> ...
> So, if _cpp_lex_direct is called without buffer->need_line initially set,
> result->src_loc is always initialized (and actually hundreds of tests rely
> on that exact value it has), even when c == '\n' and we set that flag later
> on and goto fresh_line.  For CPP_PRAGMA_EOL case we have in that case
> separate handling and don't goto.
> But if _cpp_lex_direct is called with buffer->need_line initially set and
> either decide to return a CPP_PRAGMA_EOL token or if getting a new line fails
> for some reason and we return an CPP_ERROR token and we are in directive
> or parsing args state, it is kept uninitialized and can be whatever the
> allocation left it there as.
> 
> The following patch attempts to keep the status quo, use value that was
> returned previously if it was initialized (i.e. we went through the
> goto fresh_line; statement in c == '\n' handling) and only initialize
> result->src_loc if it was uninitialized before.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

> 2023-12-13  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR preprocessor/112956
> 	* lex.cc (_cpp_lex_direct): Initialize c to 0.
> 	For CPP_PRAGMA_EOL tokens and if c == 0 also for CPP_EOF
> 	set result->src_loc to highest locus.
> 
> --- libcpp/lex.cc.jj	2023-12-11 12:39:23.353442196 +0100
> +++ libcpp/lex.cc	2023-12-12 13:15:07.154019695 +0100
> @@ -3809,7 +3809,7 @@ _cpp_get_fresh_line (cpp_reader *pfile)
>   cpp_token *
>   _cpp_lex_direct (cpp_reader *pfile)
>   {
> -  cppchar_t c;
> +  cppchar_t c = 0;
>     cpp_buffer *buffer;
>     const unsigned char *comment_start;
>     bool fallthrough_comment = false;
> @@ -3833,6 +3833,7 @@ _cpp_lex_direct (cpp_reader *pfile)
>   	  pfile->state.in_deferred_pragma = false;
>   	  if (!pfile->state.pragma_allow_expansion)
>   	    pfile->state.prevent_expansion--;
> +	  result->src_loc = pfile->line_table->highest_line;
>   	  return result;
>   	}
>         if (!_cpp_get_fresh_line (pfile))
> @@ -3849,6 +3850,8 @@ _cpp_lex_direct (cpp_reader *pfile)
>   	      /* Now pop the buffer that _cpp_get_fresh_line did not.  */
>   	      _cpp_pop_buffer (pfile);
>   	    }
> +	  else if (c == 0)
> +	    result->src_loc = pfile->line_table->highest_line;
>   	  return result;
>   	}
>         if (buffer != pfile->buffer)
> 
> 	Jakub
>
  

Patch

--- libcpp/lex.cc.jj	2023-12-11 12:39:23.353442196 +0100
+++ libcpp/lex.cc	2023-12-12 13:15:07.154019695 +0100
@@ -3809,7 +3809,7 @@  _cpp_get_fresh_line (cpp_reader *pfile)
 cpp_token *
 _cpp_lex_direct (cpp_reader *pfile)
 {
-  cppchar_t c;
+  cppchar_t c = 0;
   cpp_buffer *buffer;
   const unsigned char *comment_start;
   bool fallthrough_comment = false;
@@ -3833,6 +3833,7 @@  _cpp_lex_direct (cpp_reader *pfile)
 	  pfile->state.in_deferred_pragma = false;
 	  if (!pfile->state.pragma_allow_expansion)
 	    pfile->state.prevent_expansion--;
+	  result->src_loc = pfile->line_table->highest_line;
 	  return result;
 	}
       if (!_cpp_get_fresh_line (pfile))
@@ -3849,6 +3850,8 @@  _cpp_lex_direct (cpp_reader *pfile)
 	      /* Now pop the buffer that _cpp_get_fresh_line did not.  */
 	      _cpp_pop_buffer (pfile);
 	    }
+	  else if (c == 0)
+	    result->src_loc = pfile->line_table->highest_line;
 	  return result;
 	}
       if (buffer != pfile->buffer)