[013/125] gccrs: libformat_parser: Update header and remove old interface

Message ID 20240801145809.366388-15-arthur.cohen@embecosm.com
State New
Headers
Series [001/125] Rust: Make 'tree'-level 'MAIN_NAME_P' work |

Commit Message

Arthur Cohen Aug. 1, 2024, 2:56 p.m. UTC
  gcc/rust/ChangeLog:

	* ast/rust-fmt.cc (Pieces::collect): Use new Pieces API.
	* ast/rust-fmt.h: Update interface with new FFI bindings.

libgrust/ChangeLog:

	* libformat_parser/src/lib.rs: Add IntoFFI trait.
	* libformat_parser/libformat-parser.h: Removed.
---
 gcc/rust/ast/rust-fmt.cc                     |  10 +-
 gcc/rust/ast/rust-fmt.h                      | 199 ++++++++++++----
 libgrust/libformat_parser/libformat-parser.h | 224 -------------------
 libgrust/libformat_parser/src/lib.rs         |  56 +++--
 4 files changed, 200 insertions(+), 289 deletions(-)
 delete mode 100644 libgrust/libformat_parser/libformat-parser.h
  

Patch

diff --git a/gcc/rust/ast/rust-fmt.cc b/gcc/rust/ast/rust-fmt.cc
index 559b1c8b579..a7c4341c52d 100644
--- a/gcc/rust/ast/rust-fmt.cc
+++ b/gcc/rust/ast/rust-fmt.cc
@@ -17,6 +17,7 @@ 
 // <http://www.gnu.org/licenses/>.
 
 #include "rust-fmt.h"
+#include "rust-diagnostics.h"
 
 namespace Rust {
 namespace Fmt {
@@ -26,13 +27,12 @@  Pieces::collect (const std::string &to_parse)
 {
   auto piece_slice = collect_pieces (to_parse.c_str ());
 
-  rust_debug ("[ARTHUR] %p, %lu", (void *) piece_slice.ptr, piece_slice.len);
+  rust_debug ("[ARTHUR] %p, %lu", (const void *) piece_slice.base_ptr,
+	      piece_slice.len);
 
   // this performs multiple copies, can we avoid them maybe?
-  auto pieces
-    = std::vector (piece_slice.ptr, piece_slice.ptr + piece_slice.len);
-
-  rust_debug ("[ARTHUR] %p, %lu", (void *) pieces.data (), pieces.size ());
+  // auto pieces = std::vector<Piece> (piece_slice.base_ptr,
+  // 	     piece_slice.base_ptr + piece_slice.len);
 
   return Pieces{};
 }
diff --git a/gcc/rust/ast/rust-fmt.h b/gcc/rust/ast/rust-fmt.h
index 27c1c3625d3..7ec9a2a199d 100644
--- a/gcc/rust/ast/rust-fmt.h
+++ b/gcc/rust/ast/rust-fmt.h
@@ -1,4 +1,4 @@ 
-// Copyright (C) 2020-2023 Free Software Foundation, Inc.
+// Copyright (C) 2023-2024 Free Software Foundation, Inc.
 
 // This file is part of GCC.
 
@@ -19,9 +19,10 @@ 
 #ifndef RUST_FMT_H
 #define RUST_FMT_H
 
-#include "rust-diagnostics.h"
 #include "rust-system.h"
 
+// FIXME: How to encode Option?
+
 namespace Rust {
 namespace Fmt {
 
@@ -30,116 +31,220 @@  struct RustHamster
   // hehe
 };
 
-struct InnerSpan
+/// Enum of alignments which are supported.
+enum class Alignment
 {
+  /// The value will be aligned to the left.
+  AlignLeft,
+  /// The value will be aligned to the right.
+  AlignRight,
+  /// The value will be aligned in the center.
+  AlignCenter,
+  /// The value will take on a default alignment.
+  AlignUnknown,
 };
 
-struct Count
+/// Enum for the debug hex flags.
+enum class DebugHex
 {
-  enum class Kind
-  {
-    Is,
-    IsName,
-    IsParam,
-    IsStar,
-    Implied
-  } kind;
-
-  union
-  {
-    size_t is;
-    std::pair<RustHamster, InnerSpan> is_name;
-    size_t is_param;
-    size_t is_star;
-  } data;
+  /// The `x` flag in `{:x?}`.
+  Lower,
+  /// The `X` flag in `{:X?}`.
+  Upper,
 };
 
-struct DebugHex
+/// Enum for the sign flags.
+enum class Sign
 {
+  /// The `+` flag.
+  Plus,
+  /// The `-` flag.
+  Minus,
 };
 
-struct Sign
+/// Enum describing where an argument for a format can be located.
+struct Position
 {
-};
+  enum class Tag
+  {
+    /// The argument is implied to be located at an index
+    ArgumentImplicitlyIs,
+    /// The argument is located at a specific index given in the format,
+    ArgumentIs,
+    /// The argument has a name.
+    ArgumentNamed,
+  };
 
-struct Alignment
-{
+  struct ArgumentImplicitlyIs_Body
+  {
+    size_t _0;
+  };
+
+  struct ArgumentIs_Body
+  {
+    size_t _0;
+  };
+
+  struct ArgumentNamed_Body
+  {
+    RustHamster _0;
+  };
+
+  Tag tag;
+  union
+  {
+    ArgumentImplicitlyIs_Body argument_implicitly_is;
+    ArgumentIs_Body argument_is;
+    ArgumentNamed_Body argument_named;
+  };
 };
 
-struct RustString
+/// Range inside of a `Span` used for diagnostics when we only have access to
+/// relative positions.
+struct InnerSpan
 {
-  // hehe
+  size_t start;
+  size_t end;
 };
 
-struct Position
+/// A count is used for the precision and width parameters of an integer, and
+/// can reference either an argument or a literal integer.
+struct Count
 {
+  enum class Tag
+  {
+    /// The count is specified explicitly.
+    CountIs,
+    /// The count is specified by the argument with the given name.
+    CountIsName,
+    /// The count is specified by the argument at the given index.
+    CountIsParam,
+    /// The count is specified by a star (like in `{:.*}`) that refers to the
+    /// argument at the given index.
+    CountIsStar,
+    /// The count is implied and cannot be explicitly specified.
+    CountImplied,
+  };
+
+  struct CountIs_Body
+  {
+    size_t _0;
+  };
+
+  struct CountIsName_Body
+  {
+    RustHamster _0;
+    InnerSpan _1;
+  };
+
+  struct CountIsParam_Body
+  {
+    size_t _0;
+  };
+
+  struct CountIsStar_Body
+  {
+    size_t _0;
+  };
+
+  Tag tag;
+  union
+  {
+    CountIs_Body count_is;
+    CountIsName_Body count_is_name;
+    CountIsParam_Body count_is_param;
+    CountIsStar_Body count_is_star;
+  };
 };
 
+/// Specification for the formatting of an argument in the format string.
 struct FormatSpec
 {
   /// Optionally specified character to fill alignment with.
-  tl::optional<char /* FIXME: This is a Rust char, not a C++ char - use an uint32_t instead?  */> fill;
+  const uint32_t *fill;
   /// Span of the optionally specified fill character.
-  tl::optional<InnerSpan> fill_span;
+  const InnerSpan *fill_span;
   /// Optionally specified alignment.
   Alignment align;
   /// The `+` or `-` flag.
-  tl::optional<Sign> sign;
+  const Sign *sign;
   /// The `#` flag.
   bool alternate;
   /// The `0` flag.
   bool zero_pad;
   /// The `x` or `X` flag. (Only for `Debug`.)
-  tl::optional<DebugHex> debug_hex;
+  const DebugHex *debug_hex;
   /// The integer precision to use.
   Count precision;
   /// The span of the precision formatting flag (for diagnostics).
-  tl::optional<InnerSpan> precision_span;
+  const InnerSpan *precision_span;
   /// The string width requested for the resulting format.
   Count width;
   /// The span of the width formatting flag (for diagnostics).
-  tl::optional<InnerSpan> width_span;
+  const InnerSpan *width_span;
   /// The descriptor string representing the name of the format desired for
   /// this argument, this can be empty or any number of characters, although
   /// it is required to be one word.
   RustHamster ty;
-  // &'a str ty;
   /// The span of the descriptor string (for diagnostics).
-  tl::optional<InnerSpan> ty_span;
+  const InnerSpan *ty_span;
 };
 
+/// Representation of an argument specification.
 struct Argument
 {
+  /// Where to find this argument
   Position position;
-  InnerSpan inner_span;
+  /// The span of the position indicator. Includes any whitespace in implicit
+  /// positions (`{  }`).
+  InnerSpan position_span;
+  /// How to format the argument
   FormatSpec format;
 };
 
+/// A piece is a portion of the format string which represents the next part
+/// to emit. These are emitted as a stream by the `Parser` class.
 struct Piece
 {
-  enum class Kind
+  enum class Tag
   {
+    /// A literal string which should directly be emitted
     String,
-    NextArgument
-  } kind;
+    /// This describes that formatting should process the next argument (as
+    /// specified inside) for emission.
+    NextArgument,
+  };
+
+  struct String_Body
+  {
+    RustHamster _0;
+  };
+
+  struct NextArgument_Body
+  {
+    const Argument *_0;
+  };
 
+  Tag tag;
   union
   {
-    RustString string;
-    Argument *next_argument;
-  } data;
+    String_Body string;
+    NextArgument_Body next_argument;
+  };
 };
 
 struct PieceSlice
 {
-  Piece *ptr;
+  const Piece *base_ptr;
   size_t len;
 };
 
 extern "C" {
+
 PieceSlice
-collect_pieces (const char *);
-}
+collect_pieces (const char *input);
+
+} // extern "C"
 
 struct Pieces
 {
@@ -149,4 +254,4 @@  struct Pieces
 } // namespace Fmt
 } // namespace Rust
 
-#endif // ! RUST_FMT_H
+#endif // !RUST_FMT_H
diff --git a/libgrust/libformat_parser/libformat-parser.h b/libgrust/libformat_parser/libformat-parser.h
deleted file mode 100644
index a4bc8a75494..00000000000
--- a/libgrust/libformat_parser/libformat-parser.h
+++ /dev/null
@@ -1,224 +0,0 @@ 
-#include <cstdarg>
-#include <cstdint>
-#include <cstdlib>
-#include <ostream>
-#include <new>
-
-/// Enum of alignments which are supported.
-enum class Alignment
-{
-  /// The value will be aligned to the left.
-  AlignLeft,
-  /// The value will be aligned to the right.
-  AlignRight,
-  /// The value will be aligned in the center.
-  AlignCenter,
-  /// The value will take on a default alignment.
-  AlignUnknown,
-};
-
-/// Enum for the debug hex flags.
-enum class DebugHex
-{
-  /// The `x` flag in `{:x?}`.
-  Lower,
-  /// The `X` flag in `{:X?}`.
-  Upper,
-};
-
-/// Enum for the sign flags.
-enum class Sign
-{
-  /// The `+` flag.
-  Plus,
-  /// The `-` flag.
-  Minus,
-};
-
-template <typename T = void> struct Box;
-
-template <typename T = void> struct Option;
-
-/// Enum describing where an argument for a format can be located.
-struct Position
-{
-  enum class Tag
-  {
-    /// The argument is implied to be located at an index
-    ArgumentImplicitlyIs,
-    /// The argument is located at a specific index given in the format,
-    ArgumentIs,
-    /// The argument has a name.
-    ArgumentNamed,
-  };
-
-  struct ArgumentImplicitlyIs_Body
-  {
-    uintptr_t _0;
-  };
-
-  struct ArgumentIs_Body
-  {
-    uintptr_t _0;
-  };
-
-  struct ArgumentNamed_Body
-  {
-    const str *_0;
-  };
-
-  Tag tag;
-  union
-  {
-    ArgumentImplicitlyIs_Body argument_implicitly_is;
-    ArgumentIs_Body argument_is;
-    ArgumentNamed_Body argument_named;
-  };
-};
-
-/// Range inside of a `Span` used for diagnostics when we only have access to
-/// relative positions.
-struct InnerSpan
-{
-  uintptr_t start;
-  uintptr_t end;
-};
-
-/// A count is used for the precision and width parameters of an integer, and
-/// can reference either an argument or a literal integer.
-struct Count
-{
-  enum class Tag
-  {
-    /// The count is specified explicitly.
-    CountIs,
-    /// The count is specified by the argument with the given name.
-    CountIsName,
-    /// The count is specified by the argument at the given index.
-    CountIsParam,
-    /// The count is specified by a star (like in `{:.*}`) that refers to the
-    /// argument at the given index.
-    CountIsStar,
-    /// The count is implied and cannot be explicitly specified.
-    CountImplied,
-  };
-
-  struct CountIs_Body
-  {
-    uintptr_t _0;
-  };
-
-  struct CountIsName_Body
-  {
-    const str *_0;
-    InnerSpan _1;
-  };
-
-  struct CountIsParam_Body
-  {
-    uintptr_t _0;
-  };
-
-  struct CountIsStar_Body
-  {
-    uintptr_t _0;
-  };
-
-  Tag tag;
-  union
-  {
-    CountIs_Body count_is;
-    CountIsName_Body count_is_name;
-    CountIsParam_Body count_is_param;
-    CountIsStar_Body count_is_star;
-  };
-};
-
-/// Specification for the formatting of an argument in the format string.
-struct FormatSpec
-{
-  /// Optionally specified character to fill alignment with.
-  Option<uint32_t> fill;
-  /// Span of the optionally specified fill character.
-  Option<InnerSpan> fill_span;
-  /// Optionally specified alignment.
-  Alignment align;
-  /// The `+` or `-` flag.
-  Option<Sign> sign;
-  /// The `#` flag.
-  bool alternate;
-  /// The `0` flag.
-  bool zero_pad;
-  /// The `x` or `X` flag. (Only for `Debug`.)
-  Option<DebugHex> debug_hex;
-  /// The integer precision to use.
-  Count precision;
-  /// The span of the precision formatting flag (for diagnostics).
-  Option<InnerSpan> precision_span;
-  /// The string width requested for the resulting format.
-  Count width;
-  /// The span of the width formatting flag (for diagnostics).
-  Option<InnerSpan> width_span;
-  /// The descriptor string representing the name of the format desired for
-  /// this argument, this can be empty or any number of characters, although
-  /// it is required to be one word.
-  const str *ty;
-  /// The span of the descriptor string (for diagnostics).
-  Option<InnerSpan> ty_span;
-};
-
-/// Representation of an argument specification.
-struct Argument
-{
-  /// Where to find this argument
-  Position position;
-  /// The span of the position indicator. Includes any whitespace in implicit
-  /// positions (`{  }`).
-  InnerSpan position_span;
-  /// How to format the argument
-  FormatSpec format;
-};
-
-/// A piece is a portion of the format string which represents the next part
-/// to emit. These are emitted as a stream by the `Parser` class.
-struct Piece
-{
-  enum class Tag
-  {
-    /// A literal string which should directly be emitted
-    String,
-    /// This describes that formatting should process the next argument (as
-    /// specified inside) for emission.
-    NextArgument,
-  };
-
-  struct String_Body
-  {
-    const str *_0;
-  };
-
-  struct NextArgument_Body
-  {
-    Box<Argument> _0;
-  };
-
-  Tag tag;
-  union
-  {
-    String_Body string;
-    NextArgument_Body next_argument;
-  };
-};
-
-struct PieceSlice
-{
-  const Piece *base_ptr;
-  uintptr_t len;
-};
-
-extern "C" {
-
-PieceSlice
-collect_pieces (const char *input);
-
-} // extern "C"
diff --git a/libgrust/libformat_parser/src/lib.rs b/libgrust/libformat_parser/src/lib.rs
index 49821e7cd2f..4bbc468c755 100644
--- a/libgrust/libformat_parser/src/lib.rs
+++ b/libgrust/libformat_parser/src/lib.rs
@@ -5,8 +5,31 @@ 
 
 use std::ffi::CStr;
 
+trait IntoFFI {
+    type Output;
+
+    fn into_ffi(&self) -> Self::Output;
+}
+
+impl<T> IntoFFI for Option<T>
+where
+    T: Sized,
+{
+    type Output = *const T;
+
+    fn into_ffi(&self) -> Self::Output {
+        match self.as_ref() {
+            None => std::ptr::null(),
+            Some(r) => r as *const T,
+        }
+    }
+}
+
+// FIXME: Make an ffi module in a separate file
+// FIXME: Remember to leak the boxed type somehow
+// FIXME: How to encode the Option type? As a pointer? Option<T> -> Option<&T> -> *const T could work maybe?
 mod ffi {
-    use std::ops::Deref;
+    use super::IntoFFI;
 
     // Note: copied from rustc_span
     /// Range inside of a `Span` used for diagnostics when we only have access to relative positions.
@@ -102,31 +125,31 @@  mod ffi {
         /// Optionally specified character to fill alignment with.
         pub fill: Option<char>,
         /// Span of the optionally specified fill character.
-        pub fill_span: Option<InnerSpan>,
+        pub fill_span: *const InnerSpan,
         /// Optionally specified alignment.
         pub align: Alignment,
         /// The `+` or `-` flag.
-        pub sign: Option<Sign>,
+        pub sign: *const Sign,
         /// The `#` flag.
         pub alternate: bool,
         /// The `0` flag.
         pub zero_pad: bool,
         /// The `x` or `X` flag. (Only for `Debug`.)
-        pub debug_hex: Option<DebugHex>,
+        pub debug_hex: *const DebugHex,
         /// The integer precision to use.
         pub precision: Count<'a>,
         /// The span of the precision formatting flag (for diagnostics).
-        pub precision_span: Option<InnerSpan>,
+        pub precision_span: *const InnerSpan,
         /// The string width requested for the resulting format.
         pub width: Count<'a>,
         /// The span of the width formatting flag (for diagnostics).
-        pub width_span: Option<InnerSpan>,
+        pub width_span: *const InnerSpan,
         /// The descriptor string representing the name of the format desired for
         /// this argument, this can be empty or any number of characters, although
         /// it is required to be one word.
         pub ty: &'a str,
         /// The span of the descriptor string (for diagnostics).
-        pub ty_span: Option<InnerSpan>,
+        pub ty_span: *const InnerSpan,
     }
 
     /// Enum describing where an argument for a format can be located.
@@ -197,6 +220,11 @@  mod ffi {
             match old {
                 generic_format_parser::Piece::String(x) => Piece::String(x),
                 generic_format_parser::Piece::NextArgument(x) => {
+                    // FIXME: This is problematic - if we do this, then we probably run into the issue that the Box
+                    // is freed at the end of the call to collect_pieces. if we just .leak() it, then we have
+                    // a memory leak... should we resend the info back to the Rust lib afterwards to free it?
+                    // this is definitely the best way - store that pointer in the FFI piece and rebuild the box
+                    // in a Rust destructor
                     Piece::NextArgument(Box::new(Into::<Argument>::into(*x)))
                 }
             }
@@ -240,18 +268,18 @@  mod ffi {
         fn from(old: generic_format_parser::FormatSpec<'a>) -> Self {
             FormatSpec {
                 fill: old.fill,
-                fill_span: old.fill_span.map(Into::into),
+                fill_span: old.fill_span.map(Into::into).into_ffi(),
                 align: old.align.into(),
-                sign: old.sign.map(Into::into),
+                sign: old.sign.map(Into::into).into_ffi(),
                 alternate: old.alternate,
                 zero_pad: old.zero_pad,
-                debug_hex: old.debug_hex.map(Into::into),
+                debug_hex: old.debug_hex.map(Into::into).into_ffi(),
                 precision: old.precision.into(),
-                precision_span: old.precision_span.map(Into::into),
+                precision_span: old.precision_span.map(Into::into).into_ffi(),
                 width: old.width.into(),
-                width_span: old.width_span.map(Into::into),
+                width_span: old.width_span.map(Into::into).into_ffi(),
                 ty: old.ty,
-                ty_span: old.ty_span.map(Into::into),
+                ty_span: old.ty_span.map(Into::into).into_ffi(),
             }
         }
     }
@@ -327,6 +355,8 @@  pub extern "C" fn collect_pieces(input: *const libc::c_char) -> PieceSlice {
         .map(Into::into)
         .collect();
 
+    println!("debug: {:?}, {:?}", pieces.as_ptr(), pieces.len());
+
     PieceSlice {
         base_ptr: pieces.as_ptr(),
         len: pieces.len(),