[PATCHv3] libiberty rust-demangle, ignore .suffix

Message ID 20220116003534.52674-1-mark@klomp.org
State Committed
Commit d3b2ead595467166c849950ecd3710501a5094d9
Headers
Series [PATCHv3] libiberty rust-demangle, ignore .suffix |

Commit Message

Mark Wielaard Jan. 16, 2022, 12:35 a.m. UTC
  Rust symbols can have a .suffix because of compiler transformations.
These can be ignored in the demangled name. Which is what this patch
implements. By stopping at the first dot for v0 symbols and searching
backwards to the ending 'E' for legacy symbols.

An alternative implementation could be to follow what C++ does and
represent these as [clone .suffix] tagged onto the demangled name.
But this seems somewhat confusing since it results in a demangled
name that cannot be mangled again. And it would mean trying to
decode compiler internal naming.

https://bugs.kde.org/show_bug.cgi?id=445916
https://github.com/rust-lang/rust/issues/60705

libiberty/Changelog

	* rust-demangle.c (rust_demangle_callback): Ignore everything
	after '.' char in sym for v0. For legacy symbols search
	backwards to find the last 'E' before any '.'.
	* testsuite/rust-demangle-expected: Add new .suffix testcases.
---
 libiberty/rust-demangle.c                  | 21 ++++++++++++++---
 libiberty/testsuite/rust-demangle-expected | 26 ++++++++++++++++++++++
 2 files changed, 44 insertions(+), 3 deletions(-)

V3 - Add more testcases
   - Allow @ in legacy symbols (which can appear in the .suffix)
  

Comments

Mark Wielaard Feb. 17, 2022, 10:45 a.m. UTC | #1
Ping. Is this OK to commit now?
I am not sure who can approve this.

On Sun, Jan 16, 2022 at 01:35:34AM +0100, Mark Wielaard wrote:
> Rust symbols can have a .suffix because of compiler transformations.
> These can be ignored in the demangled name. Which is what this patch
> implements. By stopping at the first dot for v0 symbols and searching
> backwards to the ending 'E' for legacy symbols.
> 
> An alternative implementation could be to follow what C++ does and
> represent these as [clone .suffix] tagged onto the demangled name.
> But this seems somewhat confusing since it results in a demangled
> name that cannot be mangled again. And it would mean trying to
> decode compiler internal naming.
> 
> https://bugs.kde.org/show_bug.cgi?id=445916
> https://github.com/rust-lang/rust/issues/60705
> 
> libiberty/Changelog
> 
> 	* rust-demangle.c (rust_demangle_callback): Ignore everything
> 	after '.' char in sym for v0. For legacy symbols search
> 	backwards to find the last 'E' before any '.'.
> 	* testsuite/rust-demangle-expected: Add new .suffix testcases.
> ---
>  libiberty/rust-demangle.c                  | 21 ++++++++++++++---
>  libiberty/testsuite/rust-demangle-expected | 26 ++++++++++++++++++++++
>  2 files changed, 44 insertions(+), 3 deletions(-)
> 
> V3 - Add more testcases
>    - Allow @ in legacy symbols (which can appear in the .suffix)
> 
> diff --git a/libiberty/rust-demangle.c b/libiberty/rust-demangle.c
> index 18c760491bdc..42c88161da30 100644
> --- a/libiberty/rust-demangle.c
> +++ b/libiberty/rust-demangle.c
> @@ -1340,13 +1340,19 @@ rust_demangle_callback (const char *mangled, int options,
>    /* Rust symbols (v0) use only [_0-9a-zA-Z] characters. */
>    for (p = rdm.sym; *p; p++)
>      {
> +      /* Rust v0 symbols can have '.' suffixes, ignore those.  */
> +      if (rdm.version == 0 && *p == '.')
> +        break;
> +
>        rdm.sym_len++;
>  
>        if (*p == '_' || ISALNUM (*p))
>          continue;
>  
> -      /* Legacy Rust symbols can also contain [.:$] characters. */
> -      if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'))
> +      /* Legacy Rust symbols can also contain [.:$] characters.
> +         Or @ in the .suffix (which will be skipped, see below). */
> +      if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'
> +                                || *p == '@'))
>          continue;
>  
>        return 0;
> @@ -1355,7 +1361,16 @@ rust_demangle_callback (const char *mangled, int options,
>    /* Legacy Rust symbols need to be handled separately. */
>    if (rdm.version == -1)
>      {
> -      /* Legacy Rust symbols always end with E. */
> +      /* Legacy Rust symbols always end with E.  But can be followed by a
> +         .suffix (which we want to ignore).  */
> +      int dot_suffix = 1;
> +      while (rdm.sym_len > 0 &&
> +             !(dot_suffix && rdm.sym[rdm.sym_len - 1] == 'E'))
> +        {
> +          dot_suffix = rdm.sym[rdm.sym_len - 1] == '.';
> +          rdm.sym_len--;
> +        }
> +
>        if (!(rdm.sym_len > 0 && rdm.sym[rdm.sym_len - 1] == 'E'))
>          return 0;
>        rdm.sym_len--;
> diff --git a/libiberty/testsuite/rust-demangle-expected b/libiberty/testsuite/rust-demangle-expected
> index 7dca315d0054..b565084cfefa 100644
> --- a/libiberty/testsuite/rust-demangle-expected
> +++ b/libiberty/testsuite/rust-demangle-expected
> @@ -295,3 +295,29 @@ _RMCs4fqI2P2rA04_13const_genericINtB0_4CharKc2202_E
>  --format=auto
>  _RNvNvMCs4fqI2P2rA04_13const_genericINtB4_3FooKpE3foo3FOO
>  <const_generic::Foo<_>>::foo::FOO
> +#
> +# Suffixes
> +#
> +--format=rust
> +_RNvMs0_NtCs5l0EXMQXRMU_21rustc_data_structures17obligation_forestINtB5_16ObligationForestNtNtNtCsdozMG8X9FIu_21rustc_trait_selection6traits7fulfill26PendingPredicateObligationE22register_obligation_atB1v_.llvm.8517020237817239694
> +<rustc_data_structures::obligation_forest::ObligationForest<rustc_trait_selection::traits::fulfill::PendingPredicateObligation>>::register_obligation_at
> +--format=rust
> +_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h27f14859c664490dE.llvm.8091179795805947855
> +core::ptr::drop_in_place<std::rt::lang_start<()>::{{closure}}>
> +# old style rustc llvm thinlto
> +--format=rust
> +_ZN9backtrace3foo17hbb467fcdaea5d79bE.llvm.A5310EB9
> +backtrace::foo
> +--format=rust
> +_ZN9backtrace3foo17hbb467fcdaea5d79bE.llvm.A5310EB9@@16
> +backtrace::foo
> +# new style rustc llvm thinlto
> +--format=rust
> +_RC3foo.llvm.9D1C9369
> +foo
> +--format=rust
> +_RC3foo.llvm.9D1C9369@@16
> +foo
> +--format=rust
> +_RNvC9backtrace3foo.llvm.A5310EB9
> +backtrace::foo
> -- 
> 2.30.2
>
  
Ian Lance Taylor Feb. 17, 2022, 4:36 p.m. UTC | #2
On Thu, Feb 17, 2022 at 2:45 AM Mark Wielaard <mark@klomp.org> wrote:
>
> Ping. Is this OK to commit now?
> I am not sure who can approve this.
>
> On Sun, Jan 16, 2022 at 01:35:34AM +0100, Mark Wielaard wrote:
> > Rust symbols can have a .suffix because of compiler transformations.
> > These can be ignored in the demangled name. Which is what this patch
> > implements. By stopping at the first dot for v0 symbols and searching
> > backwards to the ending 'E' for legacy symbols.
> >
> > An alternative implementation could be to follow what C++ does and
> > represent these as [clone .suffix] tagged onto the demangled name.
> > But this seems somewhat confusing since it results in a demangled
> > name that cannot be mangled again. And it would mean trying to
> > decode compiler internal naming.
> >
> > https://bugs.kde.org/show_bug.cgi?id=445916
> > https://github.com/rust-lang/rust/issues/60705
> >
> > libiberty/Changelog
> >
> >       * rust-demangle.c (rust_demangle_callback): Ignore everything
> >       after '.' char in sym for v0. For legacy symbols search
> >       backwards to find the last 'E' before any '.'.
> >       * testsuite/rust-demangle-expected: Add new .suffix testcases.

This is OK.

Thanks.

Ian
  

Patch

diff --git a/libiberty/rust-demangle.c b/libiberty/rust-demangle.c
index 18c760491bdc..42c88161da30 100644
--- a/libiberty/rust-demangle.c
+++ b/libiberty/rust-demangle.c
@@ -1340,13 +1340,19 @@  rust_demangle_callback (const char *mangled, int options,
   /* Rust symbols (v0) use only [_0-9a-zA-Z] characters. */
   for (p = rdm.sym; *p; p++)
     {
+      /* Rust v0 symbols can have '.' suffixes, ignore those.  */
+      if (rdm.version == 0 && *p == '.')
+        break;
+
       rdm.sym_len++;
 
       if (*p == '_' || ISALNUM (*p))
         continue;
 
-      /* Legacy Rust symbols can also contain [.:$] characters. */
-      if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'))
+      /* Legacy Rust symbols can also contain [.:$] characters.
+         Or @ in the .suffix (which will be skipped, see below). */
+      if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'
+                                || *p == '@'))
         continue;
 
       return 0;
@@ -1355,7 +1361,16 @@  rust_demangle_callback (const char *mangled, int options,
   /* Legacy Rust symbols need to be handled separately. */
   if (rdm.version == -1)
     {
-      /* Legacy Rust symbols always end with E. */
+      /* Legacy Rust symbols always end with E.  But can be followed by a
+         .suffix (which we want to ignore).  */
+      int dot_suffix = 1;
+      while (rdm.sym_len > 0 &&
+             !(dot_suffix && rdm.sym[rdm.sym_len - 1] == 'E'))
+        {
+          dot_suffix = rdm.sym[rdm.sym_len - 1] == '.';
+          rdm.sym_len--;
+        }
+
       if (!(rdm.sym_len > 0 && rdm.sym[rdm.sym_len - 1] == 'E'))
         return 0;
       rdm.sym_len--;
diff --git a/libiberty/testsuite/rust-demangle-expected b/libiberty/testsuite/rust-demangle-expected
index 7dca315d0054..b565084cfefa 100644
--- a/libiberty/testsuite/rust-demangle-expected
+++ b/libiberty/testsuite/rust-demangle-expected
@@ -295,3 +295,29 @@  _RMCs4fqI2P2rA04_13const_genericINtB0_4CharKc2202_E
 --format=auto
 _RNvNvMCs4fqI2P2rA04_13const_genericINtB4_3FooKpE3foo3FOO
 <const_generic::Foo<_>>::foo::FOO
+#
+# Suffixes
+#
+--format=rust
+_RNvMs0_NtCs5l0EXMQXRMU_21rustc_data_structures17obligation_forestINtB5_16ObligationForestNtNtNtCsdozMG8X9FIu_21rustc_trait_selection6traits7fulfill26PendingPredicateObligationE22register_obligation_atB1v_.llvm.8517020237817239694
+<rustc_data_structures::obligation_forest::ObligationForest<rustc_trait_selection::traits::fulfill::PendingPredicateObligation>>::register_obligation_at
+--format=rust
+_ZN4core3ptr85drop_in_place$LT$std..rt..lang_start$LT$$LP$$RP$$GT$..$u7b$$u7b$closure$u7d$$u7d$$GT$17h27f14859c664490dE.llvm.8091179795805947855
+core::ptr::drop_in_place<std::rt::lang_start<()>::{{closure}}>
+# old style rustc llvm thinlto
+--format=rust
+_ZN9backtrace3foo17hbb467fcdaea5d79bE.llvm.A5310EB9
+backtrace::foo
+--format=rust
+_ZN9backtrace3foo17hbb467fcdaea5d79bE.llvm.A5310EB9@@16
+backtrace::foo
+# new style rustc llvm thinlto
+--format=rust
+_RC3foo.llvm.9D1C9369
+foo
+--format=rust
+_RC3foo.llvm.9D1C9369@@16
+foo
+--format=rust
+_RNvC9backtrace3foo.llvm.A5310EB9
+backtrace::foo