fix bug with command `printf "%s\n", $_as_string($pc)`

Message ID 1cbe8b68-b592-825a-c662-56096ef0f795@gmail.com
State New, archived
Headers

Commit Message

Matthew Malcomson Feb. 25, 2017, 11:45 a.m. UTC
  On 20/02/17 18:19, Simon Marchi wrote:
> On 2017-02-19 12:32, Matthew Malcomson wrote:
>> diff --git a/gdb/python/py-value.c b/gdb/python/py-value.c
>> index eb3d307b19..c786f68865 100644
>> --- a/gdb/python/py-value.c
>> +++ b/gdb/python/py-value.c
>> @@ -1615,7 +1615,7 @@ convert_value_from_python (PyObject *obj)
>>        gdb::unique_xmalloc_ptr<char> s
>>          = python_string_to_target_string (obj);
>>        if (s != NULL)
>> -        value = value_cstring (s.get (), strlen (s.get ()),
>> +        value = value_cstring (s.get (), strlen (s.get ()) + 1,
>>                     builtin_type_pychar);
>>      }
>>        else if (PyObject_TypeCheck (obj, &value_object_type))
>
> This fix looks good to me.
>
> One test (py-mi.exp) needs to be updated though.  You can run all the 
> Python-related tests using:
>
>   $ make check TESTS="gdb.python/*.exp"
>
> Normally, the Python tests all pass reliably, unlike some other parts 
> of the testsuite.
>
> It might also be good to improve gdb.python/py-as-string.exp to 
> include a test for this bug.
>
> Thanks,
>
> Simon

Thanks -- I had mistakenly ignored the py-mi.exp failure assuming it was 
nothing to do with me (my apologies).

I've included the test fixes you suggested below.

------------------------------------------------

CHANGELOG:

2017-02-19  Matthew Malcomson <hardenedapple@gmail.com>

     * python/py-value.c (convert_value_from_python): Include NULL 
terminator in result.
     testsuite/gdb.python/py-as-string.c, 
testsuite/gdb.python/py-as-string.exp: Update tests
     to account for NULL terminator from python string values.
     doc/gdb.texinfo ($trace_func): Mention this value can't be used 
with printf.

-------------------------------------------------

PATCH:


commit 44fd8bd7af5cf4c6b32846dd78ebfecb7b8d9fa5
Author: Matthew Malcomson <hardenedapple@gmail.com>
Date:   Sun Feb 19 14:35:09 2017 +0000

     convert_value_from_python include terminating NULL

     When converting python strings to internal gdb Value strings, the NULL
     byte was initially left out, this can result in extra data from the
     inferior being printed when the resulting value is used with
     printf "%s\n", value
  

Comments

Simon Marchi Feb. 25, 2017, 6:33 p.m. UTC | #1
Hi Matthew,

I noted mostly some minor formatting issues, in general it looks good to 
me.  One comment about malloc.

On 2017-02-25 06:45, Matthew Malcomson wrote:
> CHANGELOG:
> 
> 2017-02-19  Matthew Malcomson <hardenedapple@gmail.com>
> 
>     * python/py-value.c (convert_value_from_python): Include NULL
> terminator in result.
>     testsuite/gdb.python/py-as-string.c,
> testsuite/gdb.python/py-as-string.exp: Update tests
>     to account for NULL terminator from python string values.
>     doc/gdb.texinfo ($trace_func): Mention this value can't be used 
> with printf.

There is a ChangeLog in the doc and testsuite directories, so you should 
place these entries in the relevant ChangeLogs.  Also, look at this 
section of the GDB wiki for more info on the proper format.

https://sourceware.org/gdb/wiki/ContributionChecklist#Properly_Formatted_GNU_ChangeLog

> +static char arena[51] = 
> "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
> +
> +/* Override malloc() so value_coerce_to_target() gets a known pointer, 
> and we
> +   know we'll see an error if $_as_string() returns a string that 
> isn't NULL
> +   terminated. */

IIUC, the goal of overriding malloc is to ensure that the memory return 
by malloc is not all zeroes, which would potentially hide the bug?  If 
that's right, you could instead write a wrapper for malloc instead of a 
replacement.  The wrapper would memset the allocated buffer to 'x'es, 
for example.  This way, it will be safer in case there are many calls to 
malloc or calls with size > 51.

See option #2 of this answer: http://stackoverflow.com/a/262481

> +void *malloc(size_t size)

We try to respect the GNU/GDB coding style even in tests, so please put 
the return type on its own line and put a space after the function name:

void *
malloc (size_t size)
{
   ...
}

> +{
> +    if (size > sizeof(arena))

Space after sizeof.

> +        return NULL;
> +
> +    return arena;
> +}

The indentation in C/C++ code sould be two spaces per indent, until you 
have 8 spaces, it then becomes a tab.

> +
>  static enum EnumType enum_valid = ENUM_VALUE_B;
>  static enum EnumType enum_invalid = 20;
> 
> diff --git a/gdb/testsuite/gdb.python/py-as-string.exp
> b/gdb/testsuite/gdb.python/py-as-string.exp
> index 0c44d5f174..819442834c 100644
> --- a/gdb/testsuite/gdb.python/py-as-string.exp
> +++ b/gdb/testsuite/gdb.python/py-as-string.exp
> @@ -35,6 +35,12 @@ proc test_as_string { } {
>      gdb_test "p \$_as_string(2)" "\"2\""
>      gdb_test "p \$_as_string(enum_valid)" "\"ENUM_VALUE_B\""
>      gdb_test "p \$_as_string(enum_invalid)" "\"20\""
> +    # Test that the NULL character is included in the returned value.
> +    gdb_test "printf \"%s\\n\", \$_as_string(\"hi\")" "\"hi\""
> +    # Quote once to define the string, and once for the regexp.
> +    gdb_test "interpreter-exec mi '-var-create test *
> \$_as_string(\"Hello\")'" \
> + "\\^done,name=\"test\",numchild=\"8\",value=\"\\\[8]\",type=\"char
> \\\[8]\",has_more=\"0\""

Indent this with a leading tab.

If you want to avoid massive escaping, you can use {} strings instead of 
"" strings.  {} strings are treated literally, so there's no $variable 
substitution, no [proc invocation], no need to escape a literal 
backslash, etc.  You still need to escape characters that have a special 
meaning in regex though.

   "\\^done,name=\"test\",numchild=\"8\",value=\"\\\[8]\",type=\"char 
\\\[8]\",has_more=\"0\""

would become (I think, I have not tested)

   {\^done,name="test",numchild="8",value="\[8]",type="char 
\[8]",has_more="0"}

Finally, feel free to add newlines between logical groups of lines to 
make the code more readable.

Thanks,

Simon
  
Matthew Malcomson Feb. 25, 2017, 7:11 p.m. UTC | #2
On 25/02/17 18:33, Simon Marchi wrote:
> Hi Matthew,
>
> I noted mostly some minor formatting issues, in general it looks good 
> to me.  One comment about malloc.


Sure, I have just a few questions


> On 2017-02-25 06:45, Matthew Malcomson wrote:
>> CHANGELOG:
>>
>> 2017-02-19  Matthew Malcomson <hardenedapple@gmail.com>
>>
>>     * python/py-value.c (convert_value_from_python): Include NULL
>> terminator in result.
>>     testsuite/gdb.python/py-as-string.c,
>> testsuite/gdb.python/py-as-string.exp: Update tests
>>     to account for NULL terminator from python string values.
>>     doc/gdb.texinfo ($trace_func): Mention this value can't be used 
>> with printf.
>
> There is a ChangeLog in the doc and testsuite directories, so you 
> should place these entries in the relevant ChangeLogs.  Also, look at 
> this section of the GDB wiki for more info on the proper format.


So I should include the changelog entry as part of the patch? (I just 
sent it in the email based on how I read the CONTRIBUTE file)


>
> https://sourceware.org/gdb/wiki/ContributionChecklist#Properly_Formatted_GNU_ChangeLog 
>
>
>> +static char arena[51] = 
>> "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
>> +
>> +/* Override malloc() so value_coerce_to_target() gets a known 
>> pointer, and we
>> +   know we'll see an error if $_as_string() returns a string that 
>> isn't NULL
>> +   terminated. */
>
> IIUC, the goal of overriding malloc is to ensure that the memory 
> return by malloc is not all zeroes, which would potentially hide the 
> bug?  If that's right, you could instead write a wrapper for malloc 
> instead of a replacement.  The wrapper would memset the allocated 
> buffer to 'x'es, for example.  This way, it will be safer in case 
> there are many calls to malloc or calls with size > 51.
>
> See option #2 of this answer: http://stackoverflow.com/a/262481
>



Yes, that was the reason. I used this way because I read that gdb also 
worked on non-POSIX systems (windows especially) and thought having a 
working test on all systems would be preferred (though I didn't check 
that all systems support the testing framework).
I believe that no other calls to malloc are made in the inferior for 
this test, and that this program isn't used anywhere else, so this limit 
of 51 bytes is never hit.
I agree this is a bug waiting to happen, so I can accept if the 
alternate would be preferred, but I thought I'd mention my reasoning.



>> +void *malloc(size_t size)
>
> We try to respect the GNU/GDB coding style even in tests, so please 
> put the return type on its own line and put a space after the function 
> name:


My apologies


> void *
> malloc (size_t size)
> {
>   ...
> }
>
>> +{
>> +    if (size > sizeof(arena))
>
> Space after sizeof.
>
>> +        return NULL;
>> +
>> +    return arena;
>> +}
>
> The indentation in C/C++ code sould be two spaces per indent, until 
> you have 8 spaces, it then becomes a tab.
>
>> +
>>  static enum EnumType enum_valid = ENUM_VALUE_B;
>>  static enum EnumType enum_invalid = 20;
>>
>> diff --git a/gdb/testsuite/gdb.python/py-as-string.exp
>> b/gdb/testsuite/gdb.python/py-as-string.exp
>> index 0c44d5f174..819442834c 100644
>> --- a/gdb/testsuite/gdb.python/py-as-string.exp
>> +++ b/gdb/testsuite/gdb.python/py-as-string.exp
>> @@ -35,6 +35,12 @@ proc test_as_string { } {
>>      gdb_test "p \$_as_string(2)" "\"2\""
>>      gdb_test "p \$_as_string(enum_valid)" "\"ENUM_VALUE_B\""
>>      gdb_test "p \$_as_string(enum_invalid)" "\"20\""
>> +    # Test that the NULL character is included in the returned value.
>> +    gdb_test "printf \"%s\\n\", \$_as_string(\"hi\")" "\"hi\""
>> +    # Quote once to define the string, and once for the regexp.
>> +    gdb_test "interpreter-exec mi '-var-create test *
>> \$_as_string(\"Hello\")'" \
>> + "\\^done,name=\"test\",numchild=\"8\",value=\"\\\[8]\",type=\"char
>> \\\[8]\",has_more=\"0\""
>
> Indent this with a leading tab.
>
> If you want to avoid massive escaping, you can use {} strings instead 
> of "" strings.  {} strings are treated literally, so there's no 
> $variable substitution, no [proc invocation], no need to escape a 
> literal backslash, etc.  You still need to escape characters that have 
> a special meaning in regex though.
>
> "\\^done,name=\"test\",numchild=\"8\",value=\"\\\[8]\",type=\"char 
> \\\[8]\",has_more=\"0\""
>
> would become (I think, I have not tested)
>
>   {\^done,name="test",numchild="8",value="\[8]",type="char 
> \[8]",has_more="0"}



Yes, this does work, I had chosen "" strings to match the previous lines 
(I figured I'd have a comment either mentioning why this string used 
different delimiters or why there was extra backslashing, and it looked 
neater to me this way).
Would you prefer using {} strings? or was that just a heads-up in case I 
didn't know?


>
> Finally, feel free to add newlines between logical groups of lines to 
> make the code more readable.
>
> Thanks,
>
> Simon
>
  
Simon Marchi Feb. 25, 2017, 9:33 p.m. UTC | #3
On 2017-02-25 14:11, Matthew Malcomson wrote:
>> On 2017-02-25 06:45, Matthew Malcomson wrote:
>>> CHANGELOG:
>>> 
>>> 2017-02-19  Matthew Malcomson <hardenedapple@gmail.com>
>>> 
>>>     * python/py-value.c (convert_value_from_python): Include NULL
>>> terminator in result.
>>>     testsuite/gdb.python/py-as-string.c,
>>> testsuite/gdb.python/py-as-string.exp: Update tests
>>>     to account for NULL terminator from python string values.
>>>     doc/gdb.texinfo ($trace_func): Mention this value can't be used 
>>> with printf.
>> 
>> There is a ChangeLog in the doc and testsuite directories, so you 
>> should place these entries in the relevant ChangeLogs.  Also, look at 
>> this section of the GDB wiki for more info on the proper format.
> 
> 
> So I should include the changelog entry as part of the patch? (I just
> sent it in the email based on how I read the CONTRIBUTE file)

You did it right, the changes only go in the actual ChangeLog files just 
before pushing the patch.

Just make sure to put each change in the relevant ChangeLog, the one 
"closest" to the change in the directory structure.  For example, for 
you change, I would do:

gdb/ChangeLog:

	* python/py-value.c (convert_value_from_python): Consider terminating
	NULL byte in string length.

gdb/doc/ChangeLog:

	* gdb.texinfo (Convenience Variables for Tracepoints): Mention that
	trace_func should not be used with output and not printf.

gdb/testsuite/ChangeLog:

	* gdb.python/py-as-string.c (malloc): New function.
	* gdb.python/py-as-string.exp (test_as_string): Test $_as_string on
	a string with printf.
	* gdb.python/py-mi.exp: Adjust array length.

>> IIUC, the goal of overriding malloc is to ensure that the memory 
>> return by malloc is not all zeroes, which would potentially hide the 
>> bug?  If that's right, you could instead write a wrapper for malloc 
>> instead of a replacement.  The wrapper would memset the allocated 
>> buffer to 'x'es, for example.  This way, it will be safer in case 
>> there are many calls to malloc or calls with size > 51.
>> 
>> See option #2 of this answer: http://stackoverflow.com/a/262481
> 
> Yes, that was the reason. I used this way because I read that gdb also
> worked on non-POSIX systems (windows especially) and thought having a
> working test on all systems would be preferred (though I didn't check
> that all systems support the testing framework).
> I believe that no other calls to malloc are made in the inferior for
> this test, and that this program isn't used anywhere else, so this
> limit of 51 bytes is never hit.
> I agree this is a bug waiting to happen, so I can accept if the
> alternate would be preferred, but I thought I'd mention my reasoning.

That's a good justification too, I'm ok with either.

>>> +void *malloc(size_t size)
>> 
>> We try to respect the GNU/GDB coding style even in tests, so please 
>> put the return type on its own line and put a space after the function 
>> name:
> 
> 
> My apologies

Apologies accepted :)

>> void *
>> malloc (size_t size)
>> {
>>   ...
>> }
>> 
>>> +{
>>> +    if (size > sizeof(arena))
>> 
>> Space after sizeof.
>> 
>>> +        return NULL;
>>> +
>>> +    return arena;
>>> +}
>> 
>> The indentation in C/C++ code sould be two spaces per indent, until 
>> you have 8 spaces, it then becomes a tab.
>> 
>>> +
>>>  static enum EnumType enum_valid = ENUM_VALUE_B;
>>>  static enum EnumType enum_invalid = 20;
>>> 
>>> diff --git a/gdb/testsuite/gdb.python/py-as-string.exp
>>> b/gdb/testsuite/gdb.python/py-as-string.exp
>>> index 0c44d5f174..819442834c 100644
>>> --- a/gdb/testsuite/gdb.python/py-as-string.exp
>>> +++ b/gdb/testsuite/gdb.python/py-as-string.exp
>>> @@ -35,6 +35,12 @@ proc test_as_string { } {
>>>      gdb_test "p \$_as_string(2)" "\"2\""
>>>      gdb_test "p \$_as_string(enum_valid)" "\"ENUM_VALUE_B\""
>>>      gdb_test "p \$_as_string(enum_invalid)" "\"20\""
>>> +    # Test that the NULL character is included in the returned 
>>> value.
>>> +    gdb_test "printf \"%s\\n\", \$_as_string(\"hi\")" "\"hi\""
>>> +    # Quote once to define the string, and once for the regexp.
>>> +    gdb_test "interpreter-exec mi '-var-create test *
>>> \$_as_string(\"Hello\")'" \
>>> + "\\^done,name=\"test\",numchild=\"8\",value=\"\\\[8]\",type=\"char
>>> \\\[8]\",has_more=\"0\""
>> 
>> Indent this with a leading tab.
>> 
>> If you want to avoid massive escaping, you can use {} strings instead 
>> of "" strings.  {} strings are treated literally, so there's no 
>> $variable substitution, no [proc invocation], no need to escape a 
>> literal backslash, etc.  You still need to escape characters that have 
>> a special meaning in regex though.
>> 
>> "\\^done,name=\"test\",numchild=\"8\",value=\"\\\[8]\",type=\"char 
>> \\\[8]\",has_more=\"0\""
>> 
>> would become (I think, I have not tested)
>> 
>>   {\^done,name="test",numchild="8",value="\[8]",type="char 
>> \[8]",has_more="0"}
> 
> 
> 
> Yes, this does work, I had chosen "" strings to match the previous
> lines (I figured I'd have a comment either mentioning why this string
> used different delimiters or why there was extra backslashing, and it
> looked neater to me this way).
> Would you prefer using {} strings? or was that just a heads-up in case
> I didn't know?

It was just a heads up, since most people are not very literate in TCL 
(me included), feel free to use whichever you want.

Thanks,

Simon
  

Patch

diff --git a/gdb/doc/gdb.texinfo b/gdb/doc/gdb.texinfo
index c465dc2f9f..5fb34853f1 100644
--- a/gdb/doc/gdb.texinfo
+++ b/gdb/doc/gdb.texinfo
@@ -13645,8 +13645,8 @@  The source file for the current trace snapshot.
  The name of the function containing @code{$tracepoint}.
  @end table

-Note: @code{$trace_file} is not suitable for use in @code{printf},
-use @code{output} instead.
+Note: @code{$trace_file} and @code{$trace_file} are not suitable for use in
+@code{printf}, use @code{output} instead.

  Here's a simple example of using these convenience variables for
  stepping through all the trace snapshots and printing some of their
diff --git a/gdb/python/py-value.c b/gdb/python/py-value.c
index eb3d307b19..c786f68865 100644
--- a/gdb/python/py-value.c
+++ b/gdb/python/py-value.c
@@ -1615,7 +1615,7 @@  convert_value_from_python (PyObject *obj)
        gdb::unique_xmalloc_ptr<char> s
          = python_string_to_target_string (obj);
        if (s != NULL)
-        value = value_cstring (s.get (), strlen (s.get ()),
+        value = value_cstring (s.get (), strlen (s.get ()) + 1,
                     builtin_type_pychar);
      }
        else if (PyObject_TypeCheck (obj, &value_object_type))
diff --git a/gdb/testsuite/gdb.python/py-as-string.c 
b/gdb/testsuite/gdb.python/py-as-string.c
index de2e8a1951..c35a692712 100644
--- a/gdb/testsuite/gdb.python/py-as-string.c
+++ b/gdb/testsuite/gdb.python/py-as-string.c
@@ -15,6 +15,8 @@ 
     You should have received a copy of the GNU General Public License
     along with this program.  If not, see 
<http://www.gnu.org/licenses/>. */

+#include <stddef.h>
+
  enum EnumType {
    ENUM_VALUE_A,
    ENUM_VALUE_B,
@@ -22,6 +24,19 @@  enum EnumType {
    ENUM_VALUE_D,
  };

+static char arena[51] = 
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
+
+/* Override malloc() so value_coerce_to_target() gets a known pointer, 
and we
+   know we'll see an error if $_as_string() returns a string that isn't 
NULL
+   terminated. */
+void *malloc(size_t size)
+{
+    if (size > sizeof(arena))
+        return NULL;
+
+    return arena;
+}
+
  static enum EnumType enum_valid = ENUM_VALUE_B;
  static enum EnumType enum_invalid = 20;

diff --git a/gdb/testsuite/gdb.python/py-as-string.exp 
b/gdb/testsuite/gdb.python/py-as-string.exp
index 0c44d5f174..819442834c 100644
--- a/gdb/testsuite/gdb.python/py-as-string.exp
+++ b/gdb/testsuite/gdb.python/py-as-string.exp
@@ -35,6 +35,12 @@  proc test_as_string { } {
      gdb_test "p \$_as_string(2)" "\"2\""
      gdb_test "p \$_as_string(enum_valid)" "\"ENUM_VALUE_B\""
      gdb_test "p \$_as_string(enum_invalid)" "\"20\""
+    # Test that the NULL character is included in the returned value.
+    gdb_test "printf \"%s\\n\", \$_as_string(\"hi\")" "\"hi\""
+    # Quote once to define the string, and once for the regexp.
+    gdb_test "interpreter-exec mi '-var-create test * 
\$_as_string(\"Hello\")'" \
+ "\\^done,name=\"test\",numchild=\"8\",value=\"\\\[8]\",type=\"char 
\\\[8]\",has_more=\"0\""
+    gdb_test "interpreter-exec mi '-var-delete test'" 
"\\^done,ndeleted=\"1\""
  }

  test_as_string
diff --git a/gdb/testsuite/gdb.python/py-mi.exp 
b/gdb/testsuite/gdb.python/py-mi.exp
index 736dc7a0d6..a5ad3f0f44 100644
--- a/gdb/testsuite/gdb.python/py-mi.exp
+++ b/gdb/testsuite/gdb.python/py-mi.exp
@@ -281,7 +281,7 @@  mi_create_dynamic_varobj nstype2 nstype2 1 \
    "create nstype2 varobj"

  mi_list_varobj_children nstype2 {
-    { {nstype2.<error at 0>} {<error at 0>} 6 {char \[6\]} }
+    { {nstype2.<error at 0>} {<error at 0>} 7 {char \[7\]} }
  } "list children after setting exception flag"

  mi_create_varobj me me \