gdb/python: Add new gdb.Value.bytes attribute

Message ID 9cefc82f3c5d6619bca1f04a783e48ea5639a841.1697641024.git.aburgess@redhat.com
State New
Headers
Series gdb/python: Add new gdb.Value.bytes attribute |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gdb_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gdb_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gdb_check--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gdb_check--master-arm success Testing passed

Commit Message

Andrew Burgess Oct. 18, 2023, 2:57 p.m. UTC
  Add a gdb.Value.bytes attribute.  This attribute contains the bytes of
the value (assuming the complete bytes of the value are available).

If the bytes of the gdb.Value are not available then accessing this
attribute raises an exception.

The bytes object returned from gdb.Value.bytes is cached within GDB so
that the same bytes object is returned each time.  The bytes object is
created on-demand though to reduce unnecessary work.

For some values we can of course obtain the same information by
reading inferior memory based on gdb.Value.address and
gdb.Value.type.sizeof, however, not every value is in memory, so we
don't always have an address.

The gdb.Value.bytes attribute will convert any value to a bytes
object, so long as the contents are available.  The value can be one
created purely in Python code, the value could be in a register,
or (of course) the value could be in memory.
---
 gdb/NEWS                              |  3 ++
 gdb/doc/python.texi                   |  7 ++++
 gdb/python/py-value.c                 | 35 +++++++++++++++++
 gdb/testsuite/gdb.python/py-value.exp | 56 ++++++++++++++++++++++++++-
 4 files changed, 100 insertions(+), 1 deletion(-)


base-commit: 29736fc507c7a9c6e797b7f83e8df4be73d37767
  

Comments

Eli Zaretskii Oct. 18, 2023, 3:40 p.m. UTC | #1
> From: Andrew Burgess <aburgess@redhat.com>
> Cc: Andrew Burgess <aburgess@redhat.com>
> Date: Wed, 18 Oct 2023 15:57:15 +0100
> 
>  gdb/NEWS                              |  3 ++
>  gdb/doc/python.texi                   |  7 ++++
>  gdb/python/py-value.c                 | 35 +++++++++++++++++
>  gdb/testsuite/gdb.python/py-value.exp | 56 ++++++++++++++++++++++++++-
>  4 files changed, 100 insertions(+), 1 deletion(-)
> 
> diff --git a/gdb/NEWS b/gdb/NEWS
> index 08d779010f0..d89df5dbb45 100644
> --- a/gdb/NEWS
> +++ b/gdb/NEWS
> @@ -11,6 +11,9 @@
>    ** New function gdb.notify_mi(NAME, DATA), that emits custom
>       GDB/MI async notification.
>  
> +  ** New attribute gdb.Value.bytes that contains a bytes object
> +     holding the contents of this value.
> +
>  *** Changes in GDB 14
>  
>  * GDB now supports the AArch64 Scalable Matrix Extension 2 (SME2), which
> diff --git a/gdb/doc/python.texi b/gdb/doc/python.texi
> index 546b4d4b962..a2db6c1b863 100644
> --- a/gdb/doc/python.texi
> +++ b/gdb/doc/python.texi
> @@ -916,6 +916,13 @@
>  method is invoked.  
>  @end defvar
>  
> +@defvar Value.bytes
> +The value of this read-only attribute is a @code{bytes} object
> +containing the bytes that make up this Value's value.  If the complete
                                          ^^^^^
I think "Value" should be in @code.

> +contents of this value are not available then accessing this attribute
> +will raise an exception.
> +@end defvar
> +

The documentation parts are okay, but I wonder: is it enough to say
"the bytes that make up this Value"?  Shouldn't we say something about
endianness, for example?  Or what are "the bytes" of a string with
non-ASCII characters in it?
  
Tom Tromey Oct. 20, 2023, 5:21 p.m. UTC | #2
>>>>> "Andrew" == Andrew Burgess <aburgess@redhat.com> writes:

Andrew> Add a gdb.Value.bytes attribute.  This attribute contains the bytes of
Andrew> the value (assuming the complete bytes of the value are available).

Thanks for doing this.  I think we've wanted this for a long time.
Also, it's half of https://sourceware.org/bugzilla/show_bug.cgi?id=13267

Andrew> +  value_obj->content_bytes
Andrew> +    =  PyBytes_FromStringAndSize ((const char *) contents.data (),
Andrew> +				  contents.size ());
Andrew> +  Py_XINCREF (value_obj->content_bytes);
Andrew> +  return value_obj->content_bytes;

I think Value.assign should clear the cache.

Tom
  

Patch

diff --git a/gdb/NEWS b/gdb/NEWS
index 08d779010f0..d89df5dbb45 100644
--- a/gdb/NEWS
+++ b/gdb/NEWS
@@ -11,6 +11,9 @@ 
   ** New function gdb.notify_mi(NAME, DATA), that emits custom
      GDB/MI async notification.
 
+  ** New attribute gdb.Value.bytes that contains a bytes object
+     holding the contents of this value.
+
 *** Changes in GDB 14
 
 * GDB now supports the AArch64 Scalable Matrix Extension 2 (SME2), which
diff --git a/gdb/doc/python.texi b/gdb/doc/python.texi
index 546b4d4b962..a2db6c1b863 100644
--- a/gdb/doc/python.texi
+++ b/gdb/doc/python.texi
@@ -916,6 +916,13 @@ 
 method is invoked.  
 @end defvar
 
+@defvar Value.bytes
+The value of this read-only attribute is a @code{bytes} object
+containing the bytes that make up this Value's value.  If the complete
+contents of this value are not available then accessing this attribute
+will raise an exception.
+@end defvar
+
 The following methods are provided:
 
 @defun Value.__init__ (val)
diff --git a/gdb/python/py-value.c b/gdb/python/py-value.c
index 0bf1d6e0dae..87287cd9c7f 100644
--- a/gdb/python/py-value.c
+++ b/gdb/python/py-value.c
@@ -63,6 +63,7 @@  struct value_object {
   PyObject *address;
   PyObject *type;
   PyObject *dynamic_type;
+  PyObject *content_bytes;
 };
 
 /* List of all values which are currently exposed to Python. It is
@@ -86,6 +87,7 @@  valpy_clear_value (value_object *self)
   Py_CLEAR (self->address);
   Py_CLEAR (self->type);
   Py_CLEAR (self->dynamic_type);
+  Py_CLEAR (self->content_bytes);
 }
 
 /* Called by the Python interpreter when deallocating a value object.  */
@@ -1304,6 +1306,36 @@  valpy_get_is_lazy (PyObject *self, void *closure)
   Py_RETURN_FALSE;
 }
 
+/* Implements gdb.Value.bytes attribute.  */
+static PyObject *
+valpy_get_bytes (PyObject *self, void *closure)
+{
+  value_object *value_obj = (value_object *) self;
+  struct value *value = value_obj->value;
+
+  if (value_obj->content_bytes != nullptr)
+    {
+      Py_INCREF (value_obj->content_bytes);
+      return value_obj->content_bytes;
+    }
+
+  gdb::array_view<const gdb_byte> contents;
+  try
+    {
+      contents = value->contents ();
+    }
+  catch (const gdb_exception &except)
+    {
+      GDB_PY_HANDLE_EXCEPTION (except);
+    }
+
+  value_obj->content_bytes
+    =  PyBytes_FromStringAndSize ((const char *) contents.data (),
+				  contents.size ());
+  Py_XINCREF (value_obj->content_bytes);
+  return value_obj->content_bytes;
+}
+
 /* Implements gdb.Value.fetch_lazy ().  */
 static PyObject *
 valpy_fetch_lazy (PyObject *self, PyObject *args)
@@ -1865,6 +1897,7 @@  value_to_value_object (struct value *val)
       val_obj->address = NULL;
       val_obj->type = NULL;
       val_obj->dynamic_type = NULL;
+      val_obj->content_bytes = nullptr;
       note_value (val_obj);
     }
 
@@ -2152,6 +2185,8 @@  static gdb_PyGetSetDef value_object_getset[] = {
     "Boolean telling whether the value is lazy (not fetched yet\n\
 from the inferior).  A lazy value is fetched when needed, or when\n\
 the \"fetch_lazy()\" method is called.", NULL },
+  { "bytes", valpy_get_bytes, nullptr,
+    "Return a bytearray containing the bytes of this value.", nullptr },
   {NULL}  /* Sentinel */
 };
 
diff --git a/gdb/testsuite/gdb.python/py-value.exp b/gdb/testsuite/gdb.python/py-value.exp
index cdfcd414cd4..89bfbfd8153 100644
--- a/gdb/testsuite/gdb.python/py-value.exp
+++ b/gdb/testsuite/gdb.python/py-value.exp
@@ -66,7 +66,14 @@  proc test_value_creation {} {
   gdb_test "python print ('result = %s' % i.address)" "= None" "test address attribute in non-addressable value"
 
   # Test creating / printing an optimized out value
-  gdb_test "python print(gdb.Value(gdb.Value(5).type.optimized_out()))"
+  gdb_test "python print(gdb.Value(gdb.Value(5).type.optimized_out()))" \
+      "<optimized out>"
+
+  # Test accessing the bytes of an optimised out value.
+  gdb_test "python print(gdb.Value(gdb.Value(5).type.optimized_out()).bytes)" \
+      [multi_line \
+	   "gdb\\.error: value has been optimized out" \
+	   "Error while executing Python code\\."]
 }
 
 # Check that we can call gdb.Value.__init__ to change a value.
@@ -485,6 +492,34 @@  proc test_subscript_regression {exefile lang} {
  gdb_py_test_silent_cmd "print {\"fu \",\"foo\",\"bar\"}" "Build array" 1
  gdb_py_test_silent_cmd "python marray = gdb.history(0)" "fetch marray" 0
  gdb_test "python print (marray\[1\]\[2\])" "o." "test multiple subscript"
+
+ # A Python helper function.  Fetch VAR_NAME from the inferior as a
+ # gdb.Value.  Read the bytes of the value based on its address, and
+ # the size of its type.  The compare these bytes to the value
+ # obtained from gdb.Value.bytes.  Assert that the two bytes objects
+ # match.
+ gdb_test_multiline "Create a function to check Value.bytes" \
+     "python" "" \
+     "def check_value_bytes(var_name):" "" \
+     "   val = gdb.parse_and_eval(var_name)" "" \
+     "   addr = val.address" "" \
+     "   len = val.type.sizeof" "" \
+     "   mem = gdb.selected_inferior().read_memory(addr, len)" "" \
+     "   mem_bytes = mem.tobytes()" "" \
+     "   val_bytes = val.bytes" "" \
+     "   assert mem_bytes == val_bytes" "" \
+     "end" ""
+
+ gdb_test_no_output { python check_value_bytes("a") }
+ gdb_test_no_output { python check_value_bytes("p") }
+ gdb_test_no_output { python check_value_bytes("i") }
+ gdb_test_no_output { python check_value_bytes("ptr_i") }
+ gdb_test_no_output { python check_value_bytes("embed") }
+ gdb_test_no_output { python check_value_bytes("fp1") }
+ gdb_test_no_output { python check_value_bytes("nullst") }
+ gdb_test_no_output { python check_value_bytes("st") }
+ gdb_test_no_output { python check_value_bytes("s") }
+ gdb_test_no_output { python check_value_bytes("u") }
 }
 
 # A few tests of gdb.parse_and_eval.
@@ -542,13 +577,30 @@  proc prepare_type_and_buffer {} {
 proc test_value_from_buffer {} {
   global gdb_prompt
 
+  # A Python helper function.  Create a bytes object from inferior
+  # memory LEN bytes starting at ADDR, and compare this to the bytes
+  # obtained from VAL.bytes.  Assert that the two bytes object match.
+  gdb_test_multiline "Create another function to check Value.bytes" \
+     "python" "" \
+     "def compare_value_bytes_to_mem(val, addr, len):" "" \
+     "   mem = gdb.selected_inferior().read_memory(addr, len)" "" \
+     "   mem_bytes = mem.tobytes()" "" \
+     "   val_bytes = val.bytes" "" \
+     "   assert mem_bytes == val_bytes" "" \
+     "end" ""
+
   prepare_type_and_buffer
   gdb_test "python v=gdb.Value(b,tp); print(v)" "1" \
             "construct value from buffer"
+  gdb_test_no_output { python compare_value_bytes_to_mem(v, addr, size_a0) }
   gdb_test "python v=gdb.Value(b\[size_a0:\],tp); print(v)" "2" \
             "convert 2nd elem of buffer to value"
+  gdb_test_no_output \
+      { python compare_value_bytes_to_mem(v, (int(addr) + size_a0), size_a0) }
   gdb_test "python v=gdb.Value(b\[2*size_a0:\],tp); print(v)" "3" \
            "convert 3rd elem of buffer to value"
+  gdb_test_no_output \
+      { python compare_value_bytes_to_mem(v, (int(addr) + (2 * size_a0)), size_a0) }
   gdb_test "python v=gdb.Value(b\[2*size_a0+1:\],tp); print(v)" \
            "ValueError: Size of type is larger than that of buffer object\..*" \
 	   "attempt to convert smaller buffer than size of type"
@@ -556,6 +608,8 @@  proc test_value_from_buffer {} {
                          "make array type" 0
   gdb_py_test_silent_cmd "python va=gdb.Value(b,atp)" \
                          "construct array value from buffer" 0
+  gdb_test_no_output \
+      { python compare_value_bytes_to_mem(va, addr, size_a0 * 3) }
   gdb_test "python print(va)" "\\{1, 2, 3\\}" "print array value"
   gdb_test "python print(va\[0\])" "1" "print first array element"
   gdb_test "python print(va\[1\])" "2" "print second array element"