[3/3] RISC-V: Avoid parsing arch string repeatedly for dis-assembler

Message ID 20250313023136.1930-3-nelson@rivosinc.com
State New
Headers
Series [1/3] RISC-V: Fixed riscv_update_subset1 returning wrong boolean value |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_binutils_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 success Test passed
linaro-tcwg-bot/tcwg_binutils_check--master-arm success Test passed

Commit Message

Nelson Chu March 13, 2025, 2:31 a.m. UTC
  Since we now always generate $x+isa for now, these would increase the
dis-assemble time by parsing the same architecture string repeatedly.  We
already have `arch_str' field into `subset_list' to record the current
architecture stirng, but it's only useful for assembler, since dis-assembler
and linker don't need it before.  Now for dis-assembler, we just need to
update the `arch_str' after parsing the architecture stirng, and then avoid
parsing repeatedly if the strings are the same.
---
 bfd/elfnn-riscv.c     |  3 ++-
 bfd/elfxx-riscv.c     |  9 ++++++++-
 bfd/elfxx-riscv.h     |  2 +-
 gas/config/tc-riscv.c | 23 +++++------------------
 opcodes/riscv-dis.c   | 28 +++++++++++++++++++++-------
 5 files changed, 37 insertions(+), 28 deletions(-)
  

Comments

Nelson Chu March 18, 2025, 4:23 a.m. UTC | #1
Committed, thanks.

Nelson

On Thu, Mar 13, 2025 at 10:31 AM Nelson Chu <nelson@rivosinc.com> wrote:

> Since we now always generate $x+isa for now, these would increase the
> dis-assemble time by parsing the same architecture string repeatedly.  We
> already have `arch_str' field into `subset_list' to record the current
> architecture stirng, but it's only useful for assembler, since
> dis-assembler
> and linker don't need it before.  Now for dis-assembler, we just need to
> update the `arch_str' after parsing the architecture stirng, and then avoid
> parsing repeatedly if the strings are the same.
> ---
>  bfd/elfnn-riscv.c     |  3 ++-
>  bfd/elfxx-riscv.c     |  9 ++++++++-
>  bfd/elfxx-riscv.h     |  2 +-
>  gas/config/tc-riscv.c | 23 +++++------------------
>  opcodes/riscv-dis.c   | 28 +++++++++++++++++++++-------
>  5 files changed, 37 insertions(+), 28 deletions(-)
>
> diff --git a/bfd/elfnn-riscv.c b/bfd/elfnn-riscv.c
> index 85dcb924f14..27473926fbb 100644
> --- a/bfd/elfnn-riscv.c
> +++ b/bfd/elfnn-riscv.c
> @@ -4005,7 +4005,8 @@ riscv_merge_arch_attr_info (bfd *ibfd, char
> *in_arch, char *out_arch)
>    if (merged_arch_str != NULL)
>      free ((void *) merged_arch_str);
>
> -  merged_arch_str = riscv_arch_str (ARCH_SIZE, &merged_subsets);
> +  merged_arch_str = riscv_arch_str (ARCH_SIZE, &merged_subsets,
> +                                   false/* update */);
>
>    /* Release the subset lists.  */
>    riscv_release_subset_list (&in_subsets);
> diff --git a/bfd/elfxx-riscv.c b/bfd/elfxx-riscv.c
> index ffcf32be69e..72610dc7a74 100644
> --- a/bfd/elfxx-riscv.c
> +++ b/bfd/elfxx-riscv.c
> @@ -2328,7 +2328,7 @@ riscv_arch_str1 (riscv_subset_t *subset,
>  /* Convert subset information into string with explicit versions.  */
>
>  char *
> -riscv_arch_str (unsigned xlen, const riscv_subset_list_t *subset)
> +riscv_arch_str (unsigned xlen, riscv_subset_list_t *subset, bool update)
>  {
>    size_t arch_str_len = riscv_estimate_arch_strlen (subset);
>    char *attr_str = xmalloc (arch_str_len);
> @@ -2339,6 +2339,13 @@ riscv_arch_str (unsigned xlen, const
> riscv_subset_list_t *subset)
>    riscv_arch_str1 (subset->head, attr_str, buf, arch_str_len);
>    free (buf);
>
> +  if (update)
> +    {
> +      if (subset->arch_str != NULL)
> +       free ((void *) subset->arch_str);
> +      subset->arch_str = attr_str;
> +    }
> +
>    return attr_str;
>  }
>
> diff --git a/bfd/elfxx-riscv.h b/bfd/elfxx-riscv.h
> index 19e04adfa6a..1ce682a97ac 100644
> --- a/bfd/elfxx-riscv.h
> +++ b/bfd/elfxx-riscv.h
> @@ -98,7 +98,7 @@ extern void
>  riscv_release_subset_list (riscv_subset_list_t *);
>
>  extern char *
> -riscv_arch_str (unsigned, const riscv_subset_list_t *);
> +riscv_arch_str (unsigned, riscv_subset_list_t *, bool);
>
>  extern size_t
>  riscv_estimate_digit (unsigned);
> diff --git a/gas/config/tc-riscv.c b/gas/config/tc-riscv.c
> index 31359afa902..8485ad441f5 100644
> --- a/gas/config/tc-riscv.c
> +++ b/gas/config/tc-riscv.c
> @@ -325,19 +325,6 @@ static riscv_parse_subset_t riscv_rps_as =
>    true,                        /* check_unknown_prefixed_ext.  */
>  };
>
> -/* Update file/function-level architecture string according to the
> -   subset_list.  */
> -
> -static void
> -riscv_set_arch_str (const char **arch_str_p)
> -{
> -  riscv_subset_list_t *subsets = riscv_rps_as.subset_list;
> -  const char *arch_str = *arch_str_p;
> -  if (arch_str != NULL)
> -    free ((void *) arch_str);
> -  *arch_str_p = riscv_arch_str (xlen, subsets);
> -}
> -
>  /* This structure is used to hold a stack of .option values.  */
>  struct riscv_option_stack
>  {
> @@ -369,8 +356,8 @@ riscv_set_arch (const char *s)
>      }
>    riscv_release_subset_list (riscv_rps_as.subset_list);
>    riscv_parse_subset (&riscv_rps_as, s);
> -  riscv_set_arch_str (&file_arch_str);
> -  riscv_set_arch_str (&riscv_rps_as.subset_list->arch_str);
> +  riscv_arch_str (xlen, riscv_rps_as.subset_list, true/* update */);
> +  file_arch_str = strdup (riscv_rps_as.subset_list->arch_str);
>
>    riscv_set_rvc (riscv_subset_supports (&riscv_rps_as, "c")
>                  || riscv_subset_supports (&riscv_rps_as, "zca"));
> @@ -4947,13 +4934,13 @@ s_riscv_option (int x ATTRIBUTE_UNUSED)
>    if (strcmp (name, "rvc") == 0)
>      {
>        riscv_update_subset (&riscv_rps_as, "+c");
> -      riscv_set_arch_str (&riscv_rps_as.subset_list->arch_str);
> +      riscv_arch_str (xlen, riscv_rps_as.subset_list, true/* update */);
>        riscv_set_rvc (true);
>      }
>    else if (strcmp (name, "norvc") == 0)
>      {
>        riscv_update_subset (&riscv_rps_as, "-c");
> -      riscv_set_arch_str (&riscv_rps_as.subset_list->arch_str);
> +      riscv_arch_str (xlen, riscv_rps_as.subset_list, true/* update */);
>        riscv_set_rvc (false);
>      }
>    else if (strcmp (name, "pic") == 0)
> @@ -4974,7 +4961,7 @@ s_riscv_option (int x ATTRIBUTE_UNUSED)
>        if (is_whitespace (*name) && *name != '\0')
>         name++;
>        riscv_update_subset (&riscv_rps_as, name);
> -      riscv_set_arch_str (&riscv_rps_as.subset_list->arch_str);
> +      riscv_arch_str (xlen, riscv_rps_as.subset_list, true/* update */);
>
>        riscv_set_rvc (riscv_subset_supports (&riscv_rps_as, "c")
>                      || riscv_subset_supports (&riscv_rps_as, "zca"));
> diff --git a/opcodes/riscv-dis.c b/opcodes/riscv-dis.c
> index c5e93620b7d..c341a0fe317 100644
> --- a/opcodes/riscv-dis.c
> +++ b/opcodes/riscv-dis.c
> @@ -1051,6 +1051,23 @@ riscv_disassemble_insn (bfd_vma memaddr,
>    return insnlen;
>  }
>
> +/* Decide if we need to parse the architecture string again, also record
> the
> +   string into the current subset list.  */
> +
> +static void
> +riscv_dis_parse_subset (struct disassemble_info *info, const char
> *arch_new)
> +{
> +  struct riscv_private_data *pd = info->private_data;
> +  const char *arch_subset_list = pd->riscv_rps_dis.subset_list->arch_str;
> +  if (arch_subset_list == NULL || strcmp (arch_subset_list, arch_new) !=
> 0)
> +    {
> +      riscv_release_subset_list (pd->riscv_rps_dis.subset_list);
> +      riscv_parse_subset (&pd->riscv_rps_dis, arch_new);
> +      riscv_arch_str (pd->xlen, pd->riscv_rps_dis.subset_list,
> +                     true/* update */);
> +    }
> +}
> +
>  /* If we find the suitable mapping symbol update the STATE.
>     Otherwise, do nothing.  */
>
> @@ -1073,13 +1090,11 @@ riscv_update_map_state (int n,
>    else if (strcmp (name, "$x") == 0)
>      {
>        *state = MAP_INSN;
> -      riscv_release_subset_list (pd->riscv_rps_dis.subset_list);
> -      riscv_parse_subset (&pd->riscv_rps_dis, pd->default_arch);
> +      riscv_dis_parse_subset (info, pd->default_arch);
>      }
>    else if (strncmp (name, "$xrv", 4) == 0)
>      {
>        *state = MAP_INSN;
> -      riscv_release_subset_list (pd->riscv_rps_dis.subset_list);
>
>        /* ISA mapping string may be numbered, suffixed with '.n'. Do not
>          consider this as part of the ISA string.  */
> @@ -1090,11 +1105,11 @@ riscv_update_map_state (int n,
>           char *name_substr = xmalloc (suffix_index + 1);
>           strncpy (name_substr, name, suffix_index);
>           name_substr[suffix_index] = '\0';
> -         riscv_parse_subset (&pd->riscv_rps_dis, name_substr + 2);
> +         riscv_dis_parse_subset (info, name_substr + 2);
>           free (name_substr);
>         }
>        else
> -       riscv_parse_subset (&pd->riscv_rps_dis, name + 2);
> +       riscv_dis_parse_subset (info, name + 2);
>      }
>  }
>
> @@ -1409,8 +1424,6 @@ riscv_init_disasm_info (struct disassemble_info
> *info)
>             }
>         }
>      }
> -  riscv_release_subset_list (pd->riscv_rps_dis.subset_list);
> -  riscv_parse_subset (&pd->riscv_rps_dis, pd->default_arch);
>
>    pd->last_map_symbol = -1;
>    pd->last_stop_offset = 0;
> @@ -1423,6 +1436,7 @@ riscv_init_disasm_info (struct disassemble_info
> *info)
>    pd->all_ext = false;
>
>    info->private_data = pd;
> +  riscv_dis_parse_subset (info, pd->default_arch);
>    return true;
>  }
>
> --
> 2.39.3 (Apple Git-146)
>
>
  

Patch

diff --git a/bfd/elfnn-riscv.c b/bfd/elfnn-riscv.c
index 85dcb924f14..27473926fbb 100644
--- a/bfd/elfnn-riscv.c
+++ b/bfd/elfnn-riscv.c
@@ -4005,7 +4005,8 @@  riscv_merge_arch_attr_info (bfd *ibfd, char *in_arch, char *out_arch)
   if (merged_arch_str != NULL)
     free ((void *) merged_arch_str);
 
-  merged_arch_str = riscv_arch_str (ARCH_SIZE, &merged_subsets);
+  merged_arch_str = riscv_arch_str (ARCH_SIZE, &merged_subsets,
+				    false/* update */);
 
   /* Release the subset lists.  */
   riscv_release_subset_list (&in_subsets);
diff --git a/bfd/elfxx-riscv.c b/bfd/elfxx-riscv.c
index ffcf32be69e..72610dc7a74 100644
--- a/bfd/elfxx-riscv.c
+++ b/bfd/elfxx-riscv.c
@@ -2328,7 +2328,7 @@  riscv_arch_str1 (riscv_subset_t *subset,
 /* Convert subset information into string with explicit versions.  */
 
 char *
-riscv_arch_str (unsigned xlen, const riscv_subset_list_t *subset)
+riscv_arch_str (unsigned xlen, riscv_subset_list_t *subset, bool update)
 {
   size_t arch_str_len = riscv_estimate_arch_strlen (subset);
   char *attr_str = xmalloc (arch_str_len);
@@ -2339,6 +2339,13 @@  riscv_arch_str (unsigned xlen, const riscv_subset_list_t *subset)
   riscv_arch_str1 (subset->head, attr_str, buf, arch_str_len);
   free (buf);
 
+  if (update)
+    {
+      if (subset->arch_str != NULL)
+	free ((void *) subset->arch_str);
+      subset->arch_str = attr_str;
+    }
+
   return attr_str;
 }
 
diff --git a/bfd/elfxx-riscv.h b/bfd/elfxx-riscv.h
index 19e04adfa6a..1ce682a97ac 100644
--- a/bfd/elfxx-riscv.h
+++ b/bfd/elfxx-riscv.h
@@ -98,7 +98,7 @@  extern void
 riscv_release_subset_list (riscv_subset_list_t *);
 
 extern char *
-riscv_arch_str (unsigned, const riscv_subset_list_t *);
+riscv_arch_str (unsigned, riscv_subset_list_t *, bool);
 
 extern size_t
 riscv_estimate_digit (unsigned);
diff --git a/gas/config/tc-riscv.c b/gas/config/tc-riscv.c
index 31359afa902..8485ad441f5 100644
--- a/gas/config/tc-riscv.c
+++ b/gas/config/tc-riscv.c
@@ -325,19 +325,6 @@  static riscv_parse_subset_t riscv_rps_as =
   true,			/* check_unknown_prefixed_ext.  */
 };
 
-/* Update file/function-level architecture string according to the
-   subset_list.  */
-
-static void
-riscv_set_arch_str (const char **arch_str_p)
-{
-  riscv_subset_list_t *subsets = riscv_rps_as.subset_list;
-  const char *arch_str = *arch_str_p;
-  if (arch_str != NULL)
-    free ((void *) arch_str);
-  *arch_str_p = riscv_arch_str (xlen, subsets);
-}
-
 /* This structure is used to hold a stack of .option values.  */
 struct riscv_option_stack
 {
@@ -369,8 +356,8 @@  riscv_set_arch (const char *s)
     }
   riscv_release_subset_list (riscv_rps_as.subset_list);
   riscv_parse_subset (&riscv_rps_as, s);
-  riscv_set_arch_str (&file_arch_str);
-  riscv_set_arch_str (&riscv_rps_as.subset_list->arch_str);
+  riscv_arch_str (xlen, riscv_rps_as.subset_list, true/* update */);
+  file_arch_str = strdup (riscv_rps_as.subset_list->arch_str);
 
   riscv_set_rvc (riscv_subset_supports (&riscv_rps_as, "c")
 		 || riscv_subset_supports (&riscv_rps_as, "zca"));
@@ -4947,13 +4934,13 @@  s_riscv_option (int x ATTRIBUTE_UNUSED)
   if (strcmp (name, "rvc") == 0)
     {
       riscv_update_subset (&riscv_rps_as, "+c");
-      riscv_set_arch_str (&riscv_rps_as.subset_list->arch_str);
+      riscv_arch_str (xlen, riscv_rps_as.subset_list, true/* update */);
       riscv_set_rvc (true);
     }
   else if (strcmp (name, "norvc") == 0)
     {
       riscv_update_subset (&riscv_rps_as, "-c");
-      riscv_set_arch_str (&riscv_rps_as.subset_list->arch_str);
+      riscv_arch_str (xlen, riscv_rps_as.subset_list, true/* update */);
       riscv_set_rvc (false);
     }
   else if (strcmp (name, "pic") == 0)
@@ -4974,7 +4961,7 @@  s_riscv_option (int x ATTRIBUTE_UNUSED)
       if (is_whitespace (*name) && *name != '\0')
 	name++;
       riscv_update_subset (&riscv_rps_as, name);
-      riscv_set_arch_str (&riscv_rps_as.subset_list->arch_str);
+      riscv_arch_str (xlen, riscv_rps_as.subset_list, true/* update */);
 
       riscv_set_rvc (riscv_subset_supports (&riscv_rps_as, "c")
 		     || riscv_subset_supports (&riscv_rps_as, "zca"));
diff --git a/opcodes/riscv-dis.c b/opcodes/riscv-dis.c
index c5e93620b7d..c341a0fe317 100644
--- a/opcodes/riscv-dis.c
+++ b/opcodes/riscv-dis.c
@@ -1051,6 +1051,23 @@  riscv_disassemble_insn (bfd_vma memaddr,
   return insnlen;
 }
 
+/* Decide if we need to parse the architecture string again, also record the
+   string into the current subset list.  */
+
+static void
+riscv_dis_parse_subset (struct disassemble_info *info, const char *arch_new)
+{
+  struct riscv_private_data *pd = info->private_data;
+  const char *arch_subset_list = pd->riscv_rps_dis.subset_list->arch_str;
+  if (arch_subset_list == NULL || strcmp (arch_subset_list, arch_new) != 0)
+    {
+      riscv_release_subset_list (pd->riscv_rps_dis.subset_list);
+      riscv_parse_subset (&pd->riscv_rps_dis, arch_new);
+      riscv_arch_str (pd->xlen, pd->riscv_rps_dis.subset_list,
+		      true/* update */);
+    }
+}
+
 /* If we find the suitable mapping symbol update the STATE.
    Otherwise, do nothing.  */
 
@@ -1073,13 +1090,11 @@  riscv_update_map_state (int n,
   else if (strcmp (name, "$x") == 0)
     {
       *state = MAP_INSN;
-      riscv_release_subset_list (pd->riscv_rps_dis.subset_list);
-      riscv_parse_subset (&pd->riscv_rps_dis, pd->default_arch);
+      riscv_dis_parse_subset (info, pd->default_arch);
     }
   else if (strncmp (name, "$xrv", 4) == 0)
     {
       *state = MAP_INSN;
-      riscv_release_subset_list (pd->riscv_rps_dis.subset_list);
 
       /* ISA mapping string may be numbered, suffixed with '.n'. Do not
 	 consider this as part of the ISA string.  */
@@ -1090,11 +1105,11 @@  riscv_update_map_state (int n,
 	  char *name_substr = xmalloc (suffix_index + 1);
 	  strncpy (name_substr, name, suffix_index);
 	  name_substr[suffix_index] = '\0';
-	  riscv_parse_subset (&pd->riscv_rps_dis, name_substr + 2);
+	  riscv_dis_parse_subset (info, name_substr + 2);
 	  free (name_substr);
 	}
       else
-	riscv_parse_subset (&pd->riscv_rps_dis, name + 2);
+	riscv_dis_parse_subset (info, name + 2);
     }
 }
 
@@ -1409,8 +1424,6 @@  riscv_init_disasm_info (struct disassemble_info *info)
 	    }
 	}
     }
-  riscv_release_subset_list (pd->riscv_rps_dis.subset_list);
-  riscv_parse_subset (&pd->riscv_rps_dis, pd->default_arch);
 
   pd->last_map_symbol = -1;
   pd->last_stop_offset = 0;
@@ -1423,6 +1436,7 @@  riscv_init_disasm_info (struct disassemble_info *info)
   pd->all_ext = false;
 
   info->private_data = pd;
+  riscv_dis_parse_subset (info, pd->default_arch);
   return true;
 }