[048/125] gccrs: format-args: Start storing string in Rust memory

Message ID 20240801145809.366388-50-arthur.cohen@embecosm.com
State New
Headers
Series [001/125] Rust: Make 'tree'-level 'MAIN_NAME_P' work |

Commit Message

Arthur Cohen Aug. 1, 2024, 2:56 p.m. UTC
  gcc/rust/ChangeLog:

	* ast/rust-fmt.cc (ffi::RustHamster::to_string): New.
	(Pieces::collect): Adapt to use new handle API.
	(Pieces::~Pieces): Likewise.
	(Pieces::Pieces): Likewise.
	(Pieces::operator=): Likewise.
	* ast/rust-fmt.h (struct RustString): Add members.
	(struct FormatArgsHandle): New.
	(clone_pieces): Adapt for new FFI API.
	(destroy_pieces): Likewise.
	(struct Pieces): Store new FormatArgsHandle type.
	* expand/rust-expand-format-args.cc (expand_format_args): Use proper
	namespace.
	* resolve/rust-ast-resolve-base.cc (ResolverBase::visit): FormatArgs
	nodes are already resolved, so do nothing.

libgrust/ChangeLog:

	* libformat_parser/src/lib.rs: Use new Handle struct and expose it.
---
 gcc/rust/ast/rust-fmt.cc                   |  32 ++++---
 gcc/rust/ast/rust-fmt.h                    |  46 ++++++---
 gcc/rust/expand/rust-expand-format-args.cc |   4 +-
 gcc/rust/resolve/rust-ast-resolve-base.cc  |   5 +-
 libgrust/libformat_parser/src/lib.rs       | 105 ++++++++++++++++-----
 5 files changed, 133 insertions(+), 59 deletions(-)
  

Patch

diff --git a/gcc/rust/ast/rust-fmt.cc b/gcc/rust/ast/rust-fmt.cc
index b82e089fc41..fda02e530fd 100644
--- a/gcc/rust/ast/rust-fmt.cc
+++ b/gcc/rust/ast/rust-fmt.cc
@@ -22,44 +22,48 @@ 
 namespace Rust {
 namespace Fmt {
 
+std::string
+ffi::RustHamster::to_string () const
+{
+  return std::string (ptr, len);
+}
+
 Pieces
-Pieces::collect (std::string &&to_parse, bool append_newline)
+Pieces::collect (const std::string &to_parse, bool append_newline)
 {
-  auto piece_slice = collect_pieces (to_parse.c_str (), append_newline);
+  auto handle = ffi::collect_pieces (to_parse.c_str (), append_newline);
 
   // this performs multiple copies, can we avoid them maybe?
   // TODO: Instead of just creating a vec of, basically, `ffi::Piece`s, we
   // should transform them into the proper C++ type which we can work with. so
   // transform all the strings into C++ strings? all the Option<T> into
   // tl::optional<T>?
-  auto pieces = std::vector<Piece> (piece_slice.base_ptr,
-				    piece_slice.base_ptr + piece_slice.len);
+  auto pieces_vector = std::vector<ffi::Piece> (handle.piece_slice.base_ptr,
+						handle.piece_slice.base_ptr
+						  + handle.piece_slice.len);
 
-  return Pieces (std::move (pieces), piece_slice, std::move (to_parse));
+  return Pieces (handle, std::move (pieces_vector));
 }
 
-Pieces::~Pieces () { destroy_pieces (slice); }
+Pieces::~Pieces () { ffi::destroy_pieces (handle); }
 
-Pieces::Pieces (const Pieces &other)
-  : pieces_vector (other.pieces_vector), to_parse (other.to_parse)
+Pieces::Pieces (const Pieces &other) : pieces_vector (other.pieces_vector)
 {
-  slice = clone_pieces (other.slice.base_ptr, other.slice.len, other.slice.cap);
+  handle = ffi::clone_pieces (other.handle);
 }
 
 Pieces &
 Pieces::operator= (const Pieces &other)
 {
-  slice = clone_pieces (other.slice.base_ptr, other.slice.len, other.slice.cap);
-  to_parse = other.to_parse;
+  handle = ffi::clone_pieces (other.handle);
+  pieces_vector = other.pieces_vector;
 
   return *this;
 }
 
 Pieces::Pieces (Pieces &&other)
   : pieces_vector (std::move (other.pieces_vector)),
-    slice (
-      clone_pieces (other.slice.base_ptr, other.slice.len, other.slice.cap)),
-    to_parse (std::move (other.to_parse))
+    handle (clone_pieces (other.handle))
 {}
 
 } // namespace Fmt
diff --git a/gcc/rust/ast/rust-fmt.h b/gcc/rust/ast/rust-fmt.h
index ba412f9958c..6a0c116b841 100644
--- a/gcc/rust/ast/rust-fmt.h
+++ b/gcc/rust/ast/rust-fmt.h
@@ -20,15 +20,21 @@ 
 #define RUST_FMT_H
 
 #include "rust-system.h"
+#include <cstddef>
 
 // FIXME: How to encode Option?
 
 namespace Rust {
 namespace Fmt {
 
+namespace ffi {
+
 struct RustHamster
 {
-  // hehe
+  const char *ptr;
+  size_t len;
+
+  std::string to_string () const;
 };
 
 /// Enum of alignments which are supported.
@@ -240,21 +246,36 @@  struct PieceSlice
   size_t cap;
 };
 
+struct RustString
+{
+  const unsigned char *ptr;
+  size_t len;
+  size_t cap;
+};
+
+struct FormatArgsHandle
+{
+  PieceSlice piece_slice;
+  RustString rust_string;
+};
+
 extern "C" {
 
-PieceSlice
+FormatArgsHandle
 collect_pieces (const char *input, bool append_newline);
 
-PieceSlice
-clone_pieces (const Piece *base_ptr, size_t len, size_t cap);
+FormatArgsHandle
+clone_pieces (const FormatArgsHandle &);
 
-void destroy_pieces (PieceSlice);
+void destroy_pieces (FormatArgsHandle);
 
 } // extern "C"
 
+} // namespace ffi
+
 struct Pieces
 {
-  static Pieces collect (std::string &&to_parse, bool append_newline);
+  static Pieces collect (const std::string &to_parse, bool append_newline);
   ~Pieces ();
 
   Pieces (const Pieces &other);
@@ -262,7 +283,7 @@  struct Pieces
 
   Pieces (Pieces &&other);
 
-  const std::vector<Piece> &get_pieces () const { return pieces_vector; }
+  const std::vector<ffi::Piece> &get_pieces () const { return pieces_vector; }
 
   // {
   //   slice = clone_pieces (&other.slice);
@@ -272,19 +293,16 @@  struct Pieces
   // }
 
 private:
-  Pieces (std::vector<Piece> &&pieces_vector, PieceSlice slice,
-	  std::string &&to_parse)
-    : pieces_vector (std::move (pieces_vector)), slice (slice),
-      to_parse (std::move (to_parse))
+  Pieces (ffi::FormatArgsHandle handle, std::vector<ffi::Piece> &&pieces_vector)
+    : pieces_vector (std::move (pieces_vector)), handle (handle)
   {}
 
-  std::vector<Piece> pieces_vector;
+  std::vector<ffi::Piece> pieces_vector;
 
   // this memory is held for FFI reasons - it needs to be released and cloned
   // precisely, so try to not access it/modify it if possible. you should
   // instead work with `pieces_vector`
-  PieceSlice slice;
-  std::string to_parse;
+  ffi::FormatArgsHandle handle;
 };
 
 } // namespace Fmt
diff --git a/gcc/rust/expand/rust-expand-format-args.cc b/gcc/rust/expand/rust-expand-format-args.cc
index 52249cb17e3..276ffd58c50 100644
--- a/gcc/rust/expand/rust-expand-format-args.cc
+++ b/gcc/rust/expand/rust-expand-format-args.cc
@@ -28,10 +28,10 @@  expand_format_args (AST::FormatArgs &fmt)
     {
       switch (node.tag)
 	{
-	case Fmt::Piece::Tag::String:
+	case Fmt::ffi::Piece::Tag::String:
 	  // rust_debug ("[ARTHUR]: %s", node.string._0.c_str ());
 
-	case Fmt::Piece::Tag::NextArgument:
+	case Fmt::ffi::Piece::Tag::NextArgument:
 	  rust_debug ("[ARTHUR]: NextArgument");
 	  break;
 	}
diff --git a/gcc/rust/resolve/rust-ast-resolve-base.cc b/gcc/rust/resolve/rust-ast-resolve-base.cc
index 1939a2056e4..fa1be197941 100644
--- a/gcc/rust/resolve/rust-ast-resolve-base.cc
+++ b/gcc/rust/resolve/rust-ast-resolve-base.cc
@@ -648,10 +648,7 @@  ResolverBase::visit (AST::FunctionParam &)
 
 void
 ResolverBase::visit (AST::FormatArgs &fmt)
-{
-  rust_sorry_at (fmt.get_locus (), "%s:%u: unimplemented FormatArgs visitor",
-		 __FILE__, __LINE__);
-}
+{}
 
 } // namespace Resolver
 } // namespace Rust
diff --git a/libgrust/libformat_parser/src/lib.rs b/libgrust/libformat_parser/src/lib.rs
index 6bf78c4f2a8..84fac38e224 100644
--- a/libgrust/libformat_parser/src/lib.rs
+++ b/libgrust/libformat_parser/src/lib.rs
@@ -27,6 +27,23 @@  where
 mod ffi {
     use super::IntoFFI;
 
+    // FIXME: We need to ensure we deal with memory properly - whether it's owned by the C++ side or the Rust side
+    #[derive(Copy, Clone, PartialEq, Eq, Debug)]
+    #[repr(C)]
+    pub struct RustHamster {
+        ptr: *const u8,
+        len: usize,
+    }
+
+    impl<'a> From<&'a str> for RustHamster {
+        fn from(s: &'a str) -> RustHamster {
+            RustHamster {
+                ptr: s.as_ptr(),
+                len: s.len(),
+            }
+        }
+    }
+
     // Note: copied from rustc_span
     /// Range inside of a `Span` used for diagnostics when we only have access to relative positions.
     #[derive(Copy, Clone, PartialEq, Eq, Debug)]
@@ -81,7 +98,7 @@  mod ffi {
     #[repr(C)]
     pub enum Piece<'a> {
         /// A literal string which should directly be emitted
-        String(&'a str),
+        String(RustHamster),
         /// This describes that formatting should process the next argument (as
         /// specified inside) for emission.
         // do we need a pointer here? we're doing big cloning anyway
@@ -201,7 +218,7 @@  mod ffi {
     impl<'a> From<generic_format_parser::Piece<'a>> for Piece<'a> {
         fn from(old: generic_format_parser::Piece<'a>) -> Self {
             match old {
-                generic_format_parser::Piece::String(x) => Piece::String(x),
+                generic_format_parser::Piece::String(x) => Piece::String(x.into()),
                 generic_format_parser::Piece::NextArgument(x) => {
                     // FIXME: This is problematic - if we do this, then we probably run into the issue that the Box
                     // is freed at the end of the call to collect_pieces. if we just .leak() it, then we have
@@ -336,53 +353,91 @@  pub struct PieceSlice {
     cap: usize,
 }
 
-#[no_mangle]
-pub extern "C" fn collect_pieces(input: *const libc::c_char, append_newline: bool) -> PieceSlice {
-    dbg!(input);
+#[repr(C)]
+// FIXME: we should probably use FFIString here
+pub struct RustString {
+    ptr: *const u8,
+    len: usize,
+    cap: usize,
+}
+
+#[repr(C)]
+pub struct FormatArgsHandle(PieceSlice, RustString);
 
+#[no_mangle]
+pub extern "C" fn collect_pieces(
+    input: *const libc::c_char,
+    append_newline: bool,
+) -> FormatArgsHandle {
     // FIXME: Add comment
     let str = unsafe { CStr::from_ptr(input) };
+    let str = str.to_str().unwrap().to_owned();
 
-    // FIXME: No unwrap
-    let pieces: Vec<ffi::Piece<'_>> =
-        rust::collect_pieces(str.to_str().unwrap(), None, None, append_newline)
-            .into_iter()
-            .map(Into::into)
-            .collect();
+    // we are never going to free this string here (we leak it later on), so we can extend its lifetime
+    // to send it across an FFI boundary.
+    // FIXME: Is that correct?
+    let s = &str;
+    let s = unsafe { std::mem::transmute::<&'_ str, &'static str>(s) };
 
-    println!("[ARTHUR]: debug: {:?}, {:?}", pieces.as_ptr(), pieces.len());
+    // FIXME: No unwrap
+    let pieces: Vec<ffi::Piece<'_>> = rust::collect_pieces(s, None, None, append_newline)
+        .into_iter()
+        .map(Into::into)
+        .collect();
 
-    PieceSlice {
+    let piece_slice = PieceSlice {
         len: pieces.len(),
         cap: pieces.capacity(),
         base_ptr: pieces.leak().as_mut_ptr(),
-    }
+    };
+    let rust_string = RustString {
+        len: str.len(),
+        cap: str.capacity(),
+        ptr: str.leak().as_ptr(),
+    };
+
+    FormatArgsHandle(piece_slice, rust_string)
 }
 
 #[no_mangle]
-pub unsafe extern "C" fn destroy_pieces(PieceSlice { base_ptr, len, cap }: PieceSlice) {
-    eprintln!("[ARTHUR] destroying pieces: {base_ptr:?} {len} {cap}");
+pub unsafe extern "C" fn destroy_pieces(FormatArgsHandle(piece_slice, s): FormatArgsHandle) {
+    let PieceSlice { base_ptr, len, cap } = piece_slice;
     drop(Vec::from_raw_parts(base_ptr, len, cap));
+
+    let RustString { ptr, len, cap } = s;
+    drop(String::from_raw_parts(ptr as *mut u8, len, cap));
 }
 
 #[no_mangle]
 pub extern "C" fn clone_pieces(
-    base_ptr: *mut ffi::Piece<'static>,
-    len: usize,
-    cap: usize,
-) -> PieceSlice {
-    eprintln!("[ARTHUR] cloning pieces: {base_ptr:?} {len} {cap}");
+    FormatArgsHandle(piece_slice, s): &FormatArgsHandle,
+) -> FormatArgsHandle {
+    let PieceSlice { base_ptr, len, cap } = *piece_slice;
 
     let v = unsafe { Vec::from_raw_parts(base_ptr, len, cap) };
-
     let cloned_v = v.clone();
 
     // FIXME: Add documentation
     v.leak();
 
-    PieceSlice {
+    let piece_slice = PieceSlice {
         len: cloned_v.len(),
         cap: cloned_v.capacity(),
-        base_ptr: dbg!(cloned_v.leak().as_mut_ptr()),
-    }
+        base_ptr: cloned_v.leak().as_mut_ptr(),
+    };
+
+    let RustString { ptr, len, cap } = *s;
+    let s = unsafe { String::from_raw_parts(ptr as *mut u8, len, cap) };
+    let cloned_s = s.clone();
+
+    // FIXME: Documentation
+    s.leak();
+
+    let rust_string = RustString {
+        len: cloned_s.len(),
+        cap: cloned_s.capacity(),
+        ptr: cloned_s.leak().as_ptr(),
+    };
+
+    FormatArgsHandle(piece_slice, rust_string)
 }