Rewrite Rust slice type handling

Message ID 20240202182017.1731887-1-tromey@adacore.com
State New
Headers
Series Rewrite Rust slice type handling |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gdb_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gdb_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gdb_check--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gdb_check--master-arm success Testing passed

Commit Message

Tom Tromey Feb. 2, 2024, 6:20 p.m. UTC
  This patch rewrites the handling of slice types in Rust.

More recent versions of the Rust compiler changed how unsized types
were emitted, letting gdb inspect them more nicely.  However, gdb did
not do this, and in fact treated all such types as if they were slices
of arrays, which is incorrect.

This patch rewrites this handling and removes the restriction that
unsized types must be array slices.  I've added a comment explaining
how unsized types are represented to rust-lang.c as well.

I looked into a different approach, namely changing the DWARF reader
to fix up slice types to have a dynamic type.  However, the approach
taken here turned out to be simpler.

Tested on x86-64 Fedora 38 with a variety of Rust compiler versions.

Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=30330
---
 gdb/rust-lang.c                     | 239 ++++++++++++++++++++--------
 gdb/rust-lang.h                     |  13 +-
 gdb/testsuite/gdb.rust/simple.exp   |   2 +-
 gdb/testsuite/gdb.rust/unsized.exp  |   2 +-
 gdb/testsuite/gdb.rust/unsized2.exp |  59 +++++++
 gdb/testsuite/gdb.rust/unsized2.rs  |  67 ++++++++
 6 files changed, 310 insertions(+), 72 deletions(-)
 create mode 100644 gdb/testsuite/gdb.rust/unsized2.exp
 create mode 100644 gdb/testsuite/gdb.rust/unsized2.rs
  

Comments

Tom Tromey Feb. 20, 2024, 8:52 p.m. UTC | #1
>>>>> "Tom" == Tom Tromey <tromey@adacore.com> writes:

Tom> This patch rewrites the handling of slice types in Rust.
Tom> More recent versions of the Rust compiler changed how unsized types
Tom> were emitted, letting gdb inspect them more nicely.  However, gdb did
Tom> not do this, and in fact treated all such types as if they were slices
Tom> of arrays, which is incorrect.

Tom> This patch rewrites this handling and removes the restriction that
Tom> unsized types must be array slices.  I've added a comment explaining
Tom> how unsized types are represented to rust-lang.c as well.

Tom> I looked into a different approach, namely changing the DWARF reader
Tom> to fix up slice types to have a dynamic type.  However, the approach
Tom> taken here turned out to be simpler.

Tom> Tested on x86-64 Fedora 38 with a variety of Rust compiler versions.

I'm checking this in now.

Tom
  

Patch

diff --git a/gdb/rust-lang.c b/gdb/rust-lang.c
index e0b5a887a23..ab537cc9752 100644
--- a/gdb/rust-lang.c
+++ b/gdb/rust-lang.c
@@ -153,9 +153,9 @@  rust_tuple_struct_type_p (struct type *type)
   return type->num_fields () > 0 && rust_underscore_fields (type);
 }
 
-/* See rust-lang.h.  */
+/* Return true if TYPE is "slice-like"; false otherwise.  */
 
-bool
+static bool
 rust_slice_type_p (const struct type *type)
 {
   if (type->code () == TYPE_CODE_STRUCT
@@ -269,6 +269,125 @@  rust_get_trait_object_pointer (struct value *value)
   return value_cast (pointer_type, value_field (value, 1 - vtable_field));
 }
 
+/* Find and possibly rewrite the unsized part of a slice-like type.
+
+   This function has two modes.  If the out parameters are both NULL,
+   it will return true if an unsized member of IN_TYPE is found.
+
+   If the out parameters are both non-NULL, it will do the same, but
+   will also rewrite the unsized member's type to be an array of the
+   appropriate type.  BOUND is the upper bound of the new array.
+
+   See convert_slice to understand the different kinds of unsized type
+   and how they are represented.
+*/
+static bool
+rewrite_slice_type (struct type *in_type, struct type **new_type,
+		    LONGEST bound, ULONGEST *additional_length)
+{
+  if (in_type->code () != TYPE_CODE_STRUCT)
+    return false;
+
+  unsigned nfields = in_type->num_fields ();
+  if (nfields == 0)
+    return false;
+
+  struct type *rewritten;
+  const field &field = in_type->field (nfields - 1);
+  struct type *field_type = field.type ();
+  if (field.loc_kind () == FIELD_LOC_KIND_BITPOS
+      && field.loc_bitpos () == 8 * in_type->length ())
+    {
+      if (additional_length == nullptr)
+	return true;
+      rewritten = lookup_array_range_type (field_type, 0, bound);
+      *additional_length = rewritten->length ();
+    }
+  else
+    {
+    if (!rewrite_slice_type (field_type, &rewritten, bound,
+			     additional_length))
+	return false;
+      if (additional_length == nullptr)
+	return true;
+    }
+
+  struct type *result = copy_type (in_type);
+  result->copy_fields (in_type);
+  result->field (nfields - 1).set_type (rewritten);
+  result->set_length (result->length () + *additional_length);
+
+  *new_type = result;
+  return true;
+}
+
+/* Convert a Rust slice to its "true" representation.
+
+   The Rust compiler emits slices as "fat" pointers like:
+
+   struct { payload *data_ptr; usize length }
+
+   Any sort of unsized type is emitted this way.
+
+   If 'payload' is a struct type, then it must be searched to see if
+   the trailing field is unsized.  This has to be done recursively (as
+   in, if the final field in the struct type itself has struct type,
+   then that type must be searched).  In this scenario, the unsized
+   field can be recognized because it does not contribute to the
+   type's size.
+
+   If 'payload' does not have a trailing unsized type, or if it is not
+   of struct type, then this slice is "array-like".  In this case
+   rewriting will return an array.
+*/
+static struct value *
+convert_slice (struct value *val)
+{
+  struct type *type = check_typedef (val->type ());
+  /* This must have been checked by the caller.  */
+  gdb_assert (rust_slice_type_p (type));
+
+  struct value *len = value_struct_elt (&val, {}, "length", nullptr,
+					"slice");
+  LONGEST llen = value_as_long (len);
+
+  struct value *ptr = value_struct_elt (&val, {}, "data_ptr", nullptr,
+					"slice");
+  struct type *original_type = ptr->type ()->target_type ();
+  ULONGEST new_length_storage = 0;
+  struct type *new_type = nullptr;
+  if (!rewrite_slice_type (original_type, &new_type, llen - 1,
+			   &new_length_storage))
+    new_type = lookup_array_range_type (original_type, 0, llen - 1);
+
+  struct value *result = value::allocate_lazy (new_type);
+  result->set_lval (lval_memory);
+  result->set_address (value_as_address (ptr));
+  result->fetch_lazy ();
+
+  return result;
+}
+
+/* If TYPE is an array-like slice, return the element type; otherwise
+   return NULL.  */
+static struct type *
+rust_array_like_element_type (struct type *type)
+{
+  /* Caller must check this.  */
+  gdb_assert (rust_slice_type_p (type));
+  for (int i = 0; i < type->num_fields (); ++i)
+    {
+      if (strcmp (type->field (i).name (), "data_ptr") == 0)
+	{
+	  struct type *base_type = type->field (i).type ()->target_type ();
+	  if (rewrite_slice_type (base_type, nullptr, 0, nullptr))
+	    return nullptr;
+	  return base_type;
+	}
+    }
+  return nullptr;
+}
+
 
 
 /* See language.h.  */
@@ -324,57 +443,40 @@  static const struct generic_val_print_decorations rust_decorations =
 struct value *
 rust_slice_to_array (struct value *val)
 {
-  struct type *type = check_typedef (val->type ());
-  /* This must have been checked by the caller.  */
-  gdb_assert (rust_slice_type_p (type));
-
-  struct value *base = value_struct_elt (&val, {}, "data_ptr", NULL,
-					 "slice");
-  struct value *len = value_struct_elt (&val, {}, "length", NULL, "slice");
-  LONGEST llen = value_as_long (len);
-
-  struct type *elt_type = base->type ()->target_type ();
-  struct type *array_type = lookup_array_range_type (elt_type, 0,
-						     llen - 1);
-  struct value *array = value::allocate_lazy (array_type);
-  array->set_lval (lval_memory);
-  array->set_address (value_as_address (base));
-
-  return array;
+  val = convert_slice (val);
+  if (val->type ()->code () != TYPE_CODE_ARRAY)
+    return nullptr;
+  return val;
 }
 
 /* Helper function to print a slice.  */
 
-static void
-rust_val_print_slice (struct value *val, struct ui_file *stream, int recurse,
-		      const struct value_print_options *options)
+void
+rust_language::val_print_slice
+     (struct value *val, struct ui_file *stream, int recurse,
+      const struct value_print_options *options) const
 {
-  struct value *base = value_struct_elt (&val, {}, "data_ptr", NULL,
-					 "slice");
-  struct value *len = value_struct_elt (&val, {}, "length", NULL, "slice");
+  struct type *orig_type = check_typedef (val->type ());
 
+  val = convert_slice (val);
   struct type *type = check_typedef (val->type ());
-  if (strcmp (type->name (), "&str") == 0)
-    val_print_string (base->type ()->target_type (), "UTF-8",
-		      value_as_address (base), value_as_long (len), stream,
-		      options);
-  else
-    {
-      LONGEST llen = value_as_long (len);
 
-      type_print (val->type (), "", stream, -1);
-      gdb_printf (stream, " ");
-
-      if (llen == 0)
-	gdb_printf (stream, "[]");
-      else
+  /* &str is handled here; but for all other slice types it is fine to
+     simply print the contents.  */
+  if (orig_type->name () != nullptr
+      && strcmp (orig_type->name (), "&str") == 0)
+    {
+      LONGEST low_bound, high_bound;
+      if (get_array_bounds (type, &low_bound, &high_bound))
 	{
-	  struct value *array = rust_slice_to_array (val);
-	  array->fetch_lazy ();
-	  generic_value_print (array, stream, recurse, options,
-			       &rust_decorations);
+	  val_print_string (type->target_type (), "UTF-8",
+			    val->address (), high_bound - low_bound + 1,
+			    stream, options);
+	  return;
 	}
     }
+
+  value_print_inner (val, stream, recurse, options);
 }
 
 /* See rust-lang.h.  */
@@ -390,7 +492,7 @@  rust_language::val_print_struct
 
   if (rust_slice_type_p (type))
     {
-      rust_val_print_slice (val, stream, recurse, options);
+      val_print_slice (val, stream, recurse, options);
       return;
     }
 
@@ -1180,6 +1282,7 @@  rust_subscript (struct type *expect_type, struct expression *exp,
     low = value_as_long (rhs);
 
   struct type *type = check_typedef (lhs->type ());
+  struct type *orig_type = type;
   if (noside == EVAL_AVOID_SIDE_EFFECTS)
     {
       struct type *base_type = nullptr;
@@ -1187,16 +1290,9 @@  rust_subscript (struct type *expect_type, struct expression *exp,
 	base_type = type->target_type ();
       else if (rust_slice_type_p (type))
 	{
-	  for (int i = 0; i < type->num_fields (); ++i)
-	    {
-	      if (strcmp (type->field (i).name (), "data_ptr") == 0)
-		{
-		  base_type = type->field (i).type ()->target_type ();
-		  break;
-		}
-	    }
+	  base_type = rust_array_like_element_type (type);
 	  if (base_type == nullptr)
-	    error (_("Could not find 'data_ptr' in slice type"));
+	    error (_("Cannot subscript non-array-like slice"));
 	}
       else if (type->code () == TYPE_CODE_PTR)
 	base_type = type->target_type ();
@@ -1227,6 +1323,12 @@  rust_subscript (struct type *expect_type, struct expression *exp,
       LONGEST low_bound;
       struct value *base;
 
+      if (rust_slice_type_p (type))
+	{
+	  lhs = convert_slice (lhs);
+	  type = check_typedef (lhs->type ());
+	}
+
       if (type->code () == TYPE_CODE_ARRAY)
 	{
 	  base = lhs;
@@ -1236,15 +1338,6 @@  rust_subscript (struct type *expect_type, struct expression *exp,
 	    error (_("Found array with non-zero lower bound"));
 	  ++high_bound;
 	}
-      else if (rust_slice_type_p (type))
-	{
-	  struct value *len;
-
-	  base = value_struct_elt (&lhs, {}, "data_ptr", NULL, "slice");
-	  len = value_struct_elt (&lhs, {}, "length", NULL, "slice");
-	  low_bound = 0;
-	  high_bound = value_as_long (len);
-	}
       else if (type->code () == TYPE_CODE_PTR)
 	{
 	  base = lhs;
@@ -1284,9 +1377,11 @@  rust_subscript (struct type *expect_type, struct expression *exp,
 	  usize = language_lookup_primitive_type (exp->language_defn,
 						  exp->gdbarch,
 						  "usize");
-	  const char *new_name = ((type != nullptr
-				   && rust_slice_type_p (type))
-				  ? type->name () : "&[*gdb*]");
+	  /* Preserve the name for slice-of-slice; this lets
+	     string-printing work a bit more nicely.  */
+	  const char *new_name = ((orig_type != nullptr
+				   && rust_slice_type_p (orig_type))
+				  ? orig_type->name () : "&[*gdb*]");
 
 	  slice = rust_slice_type (new_name, result->type (), usize);
 
@@ -1477,7 +1572,11 @@  rust_structop::evaluate (struct type *expect_type,
 	}
     }
   else
-    result = value_struct_elt (&lhs, {}, field_name, NULL, "structure");
+    {
+      if (rust_slice_type_p (type))
+	lhs = convert_slice (lhs);
+      result = value_struct_elt (&lhs, {}, field_name, NULL, "structure");
+    }
   if (noside == EVAL_AVOID_SIDE_EFFECTS)
     result = value::zero (result->type (), result->lval ());
   return result;
@@ -1677,6 +1776,16 @@  rust_language::emitchar (int ch, struct type *chtype,
 
 /* See language.h.  */
 
+bool
+rust_language::is_array_like (struct type *type) const
+{
+  if (!rust_slice_type_p (type))
+    return false;
+  return rust_array_like_element_type (type) != nullptr;
+}
+
+/* See language.h.  */
+
 bool
 rust_language::is_string_type_p (struct type *type) const
 {
diff --git a/gdb/rust-lang.h b/gdb/rust-lang.h
index e76a63ee037..9ae5961e9ac 100644
--- a/gdb/rust-lang.h
+++ b/gdb/rust-lang.h
@@ -34,9 +34,6 @@  extern bool rust_tuple_type_p (struct type *type);
 /* Return true if TYPE is a tuple struct type; otherwise false.  */
 extern bool rust_tuple_struct_type_p (struct type *type);
 
-/* Return true if TYPE is a slice type, otherwise false.  */
-extern bool rust_slice_type_p (const struct type *type);
-
 /* Given a block, find the name of the block's crate. Returns an empty
    stringif no crate name can be found.  */
 extern std::string rust_crate_for_block (const struct block *block);
@@ -196,8 +193,7 @@  class rust_language : public language_defn
 
   /* See language.h.  */
 
-  bool is_array_like (struct type *type) const override
-  { return rust_slice_type_p (type); }
+  bool is_array_like (struct type *type) const override;
 
   /* See language.h.  */
 
@@ -211,6 +207,13 @@  class rust_language : public language_defn
 
 private:
 
+  /* Helper for value_print_inner, arguments are as for that function.
+     Prints a slice.  */
+
+  void val_print_slice (struct value *val, struct ui_file *stream,
+			int recurse,
+			const struct value_print_options *options) const;
+
   /* Helper for value_print_inner, arguments are as for that function.
      Prints structs and untagged unions.  */
 
diff --git a/gdb/testsuite/gdb.rust/simple.exp b/gdb/testsuite/gdb.rust/simple.exp
index 1e6fc94400e..7f5fbad7a3f 100644
--- a/gdb/testsuite/gdb.rust/simple.exp
+++ b/gdb/testsuite/gdb.rust/simple.exp
@@ -317,7 +317,7 @@  proc test_one_slice {svar length base range} {
 	global hex
 
 	# Just accept any array here.
-	set result " = &\\\[.*\\\] \\\[.*\\\]"
+	set result " = \\\[.*\\\]"
 
 	gdb_test "print $svar" $result
 	gdb_test "print &${base}\[${range}\]" $result
diff --git a/gdb/testsuite/gdb.rust/unsized.exp b/gdb/testsuite/gdb.rust/unsized.exp
index 94750896288..fab655790e6 100644
--- a/gdb/testsuite/gdb.rust/unsized.exp
+++ b/gdb/testsuite/gdb.rust/unsized.exp
@@ -33,6 +33,6 @@  if {![runto ${srcfile}:$line]} {
 gdb_test "ptype us" " = .*V<\\\[u8\\\]>.*"
 
 if {[rust_at_least 1.61]} {
-    gdb_test "print us2" " = .*Box<.*> \\\[1, 2, 3\\\]"
+    gdb_test "print us2" " = \\\[1, 2, 3\\\]"
     gdb_test "ptype us2" "type = .*"
 }
diff --git a/gdb/testsuite/gdb.rust/unsized2.exp b/gdb/testsuite/gdb.rust/unsized2.exp
new file mode 100644
index 00000000000..5b7be45d8ce
--- /dev/null
+++ b/gdb/testsuite/gdb.rust/unsized2.exp
@@ -0,0 +1,59 @@ 
+# Copyright (C) 2024 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# Test the handling of unsized types.
+
+load_lib rust-support.exp
+require allow_rust_tests
+require {can_compile rust}
+require {rust_at_least 1.61}
+
+standard_testfile .rs
+if {[prepare_for_testing "failed to prepare" $testfile $srcfile {debug rust}]} {
+    return -1
+}
+
+set line [gdb_get_line_number "set breakpoint here"]
+if {![runto ${srcfile}:$line]} {
+    untested "could not run to breakpoint"
+    return -1
+}
+
+set base_value \
+    [string cat \
+	 "MaybeUnsizedStruct<.*?>" \
+	 [string_to_regexp " {regular: 23, rest: \[5, 6, 7\]}"]]
+
+gdb_test "print *sized_struct" \
+    " = .*$base_value"
+gdb_test "print *nested_sized_struct" \
+    " = .*MaybeUnsizedStruct<.*?> {regular: 91, rest: .*$base_value}"
+
+gdb_test "print unsized_struct" \
+    " = .*$base_value"
+gdb_test "print *reference" \
+    " = .*$base_value"
+
+gdb_test "print nested_unsized_struct" \
+    " = .*MaybeUnsizedStruct<.*?> {regular: 91, rest: .*$base_value}"
+
+gdb_test "print alpha" \
+    " = .*MaybeUnsizedStruct2<.*?> {value: \\\[97, 98, 99, 0\\\]}"
+gdb_test "print beta" \
+    " = .*MaybeUnsizedStruct2<.*?> {value: \\\[97, 98, 99, 0\\\]}"
+
+gdb_test "print sized_struct.regular" " = 23"
+gdb_test "print nested_unsized_struct.regular" " = 91"
+gdb_test "print unsized_struct.rest\[1\]" " = 6"
diff --git a/gdb/testsuite/gdb.rust/unsized2.rs b/gdb/testsuite/gdb.rust/unsized2.rs
new file mode 100644
index 00000000000..980a5fe92f3
--- /dev/null
+++ b/gdb/testsuite/gdb.rust/unsized2.rs
@@ -0,0 +1,67 @@ 
+// Copyright (C) 2024 Free Software Foundation, Inc.
+
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+#![allow(dead_code)]
+#![allow(unused_variables)]
+#![allow(unused_assignments)]
+
+fn ignore<T>(x: T) { }
+
+// A generic struct that is unsized if T is unsized.
+pub struct MaybeUnsizedStruct<T: ?Sized> {
+    pub regular: u32,
+    pub rest: T,
+}
+
+// Same but without an ordinary part.
+pub struct MaybeUnsizedStruct2<T: ?Sized> {
+    value: T,
+}
+
+fn main() {
+    // This struct is still sized because T is a fixed-length array
+    let sized_struct = &MaybeUnsizedStruct {
+        regular: 23,
+        rest: [5, 6, 7],
+    };
+
+    // This struct is still sized because T is sized
+    let nested_sized_struct = &MaybeUnsizedStruct {
+        regular: 91,
+        rest: MaybeUnsizedStruct {
+            regular: 23,
+            rest: [5, 6, 7],
+        },
+    };
+
+    // This will be a fat pointer, containing the length of the final
+    // field.
+    let unsized_struct: &MaybeUnsizedStruct<[u32]> = sized_struct;
+
+    // This will also be a fat pointer, containing the length of the
+    // final field.
+    let nested_unsized_struct:
+        &MaybeUnsizedStruct<MaybeUnsizedStruct<[u32]>> = nested_sized_struct;
+
+    let alpha: MaybeUnsizedStruct2<[u8; 4]> = MaybeUnsizedStruct2 { value: *b"abc\0" };
+    let beta: &MaybeUnsizedStruct2<[u8]> = &alpha;
+
+    let reference = &unsized_struct;
+
+    ignore(sized_struct); // set breakpoint here
+    ignore(nested_sized_struct);
+    ignore(unsized_struct);
+    ignore(nested_unsized_struct);
+}