[3/3] RISC-V: Avoid parsing arch string repeatedly for dis-assembler
Checks
Context |
Check |
Description |
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_binutils_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 |
success
|
Test passed
|
linaro-tcwg-bot/tcwg_binutils_check--master-arm |
success
|
Test passed
|
Commit Message
Since we now always generate $x+isa for now, these would increase the
dis-assemble time by parsing the same architecture string repeatedly. We
already have `arch_str' field into `subset_list' to record the current
architecture stirng, but it's only useful for assembler, since dis-assembler
and linker don't need it before. Now for dis-assembler, we just need to
update the `arch_str' after parsing the architecture stirng, and then avoid
parsing repeatedly if the strings are the same.
---
bfd/elfnn-riscv.c | 3 ++-
bfd/elfxx-riscv.c | 9 ++++++++-
bfd/elfxx-riscv.h | 2 +-
gas/config/tc-riscv.c | 23 +++++------------------
opcodes/riscv-dis.c | 28 +++++++++++++++++++++-------
5 files changed, 37 insertions(+), 28 deletions(-)
Comments
Committed, thanks.
Nelson
On Thu, Mar 13, 2025 at 10:31 AM Nelson Chu <nelson@rivosinc.com> wrote:
> Since we now always generate $x+isa for now, these would increase the
> dis-assemble time by parsing the same architecture string repeatedly. We
> already have `arch_str' field into `subset_list' to record the current
> architecture stirng, but it's only useful for assembler, since
> dis-assembler
> and linker don't need it before. Now for dis-assembler, we just need to
> update the `arch_str' after parsing the architecture stirng, and then avoid
> parsing repeatedly if the strings are the same.
> ---
> bfd/elfnn-riscv.c | 3 ++-
> bfd/elfxx-riscv.c | 9 ++++++++-
> bfd/elfxx-riscv.h | 2 +-
> gas/config/tc-riscv.c | 23 +++++------------------
> opcodes/riscv-dis.c | 28 +++++++++++++++++++++-------
> 5 files changed, 37 insertions(+), 28 deletions(-)
>
> diff --git a/bfd/elfnn-riscv.c b/bfd/elfnn-riscv.c
> index 85dcb924f14..27473926fbb 100644
> --- a/bfd/elfnn-riscv.c
> +++ b/bfd/elfnn-riscv.c
> @@ -4005,7 +4005,8 @@ riscv_merge_arch_attr_info (bfd *ibfd, char
> *in_arch, char *out_arch)
> if (merged_arch_str != NULL)
> free ((void *) merged_arch_str);
>
> - merged_arch_str = riscv_arch_str (ARCH_SIZE, &merged_subsets);
> + merged_arch_str = riscv_arch_str (ARCH_SIZE, &merged_subsets,
> + false/* update */);
>
> /* Release the subset lists. */
> riscv_release_subset_list (&in_subsets);
> diff --git a/bfd/elfxx-riscv.c b/bfd/elfxx-riscv.c
> index ffcf32be69e..72610dc7a74 100644
> --- a/bfd/elfxx-riscv.c
> +++ b/bfd/elfxx-riscv.c
> @@ -2328,7 +2328,7 @@ riscv_arch_str1 (riscv_subset_t *subset,
> /* Convert subset information into string with explicit versions. */
>
> char *
> -riscv_arch_str (unsigned xlen, const riscv_subset_list_t *subset)
> +riscv_arch_str (unsigned xlen, riscv_subset_list_t *subset, bool update)
> {
> size_t arch_str_len = riscv_estimate_arch_strlen (subset);
> char *attr_str = xmalloc (arch_str_len);
> @@ -2339,6 +2339,13 @@ riscv_arch_str (unsigned xlen, const
> riscv_subset_list_t *subset)
> riscv_arch_str1 (subset->head, attr_str, buf, arch_str_len);
> free (buf);
>
> + if (update)
> + {
> + if (subset->arch_str != NULL)
> + free ((void *) subset->arch_str);
> + subset->arch_str = attr_str;
> + }
> +
> return attr_str;
> }
>
> diff --git a/bfd/elfxx-riscv.h b/bfd/elfxx-riscv.h
> index 19e04adfa6a..1ce682a97ac 100644
> --- a/bfd/elfxx-riscv.h
> +++ b/bfd/elfxx-riscv.h
> @@ -98,7 +98,7 @@ extern void
> riscv_release_subset_list (riscv_subset_list_t *);
>
> extern char *
> -riscv_arch_str (unsigned, const riscv_subset_list_t *);
> +riscv_arch_str (unsigned, riscv_subset_list_t *, bool);
>
> extern size_t
> riscv_estimate_digit (unsigned);
> diff --git a/gas/config/tc-riscv.c b/gas/config/tc-riscv.c
> index 31359afa902..8485ad441f5 100644
> --- a/gas/config/tc-riscv.c
> +++ b/gas/config/tc-riscv.c
> @@ -325,19 +325,6 @@ static riscv_parse_subset_t riscv_rps_as =
> true, /* check_unknown_prefixed_ext. */
> };
>
> -/* Update file/function-level architecture string according to the
> - subset_list. */
> -
> -static void
> -riscv_set_arch_str (const char **arch_str_p)
> -{
> - riscv_subset_list_t *subsets = riscv_rps_as.subset_list;
> - const char *arch_str = *arch_str_p;
> - if (arch_str != NULL)
> - free ((void *) arch_str);
> - *arch_str_p = riscv_arch_str (xlen, subsets);
> -}
> -
> /* This structure is used to hold a stack of .option values. */
> struct riscv_option_stack
> {
> @@ -369,8 +356,8 @@ riscv_set_arch (const char *s)
> }
> riscv_release_subset_list (riscv_rps_as.subset_list);
> riscv_parse_subset (&riscv_rps_as, s);
> - riscv_set_arch_str (&file_arch_str);
> - riscv_set_arch_str (&riscv_rps_as.subset_list->arch_str);
> + riscv_arch_str (xlen, riscv_rps_as.subset_list, true/* update */);
> + file_arch_str = strdup (riscv_rps_as.subset_list->arch_str);
>
> riscv_set_rvc (riscv_subset_supports (&riscv_rps_as, "c")
> || riscv_subset_supports (&riscv_rps_as, "zca"));
> @@ -4947,13 +4934,13 @@ s_riscv_option (int x ATTRIBUTE_UNUSED)
> if (strcmp (name, "rvc") == 0)
> {
> riscv_update_subset (&riscv_rps_as, "+c");
> - riscv_set_arch_str (&riscv_rps_as.subset_list->arch_str);
> + riscv_arch_str (xlen, riscv_rps_as.subset_list, true/* update */);
> riscv_set_rvc (true);
> }
> else if (strcmp (name, "norvc") == 0)
> {
> riscv_update_subset (&riscv_rps_as, "-c");
> - riscv_set_arch_str (&riscv_rps_as.subset_list->arch_str);
> + riscv_arch_str (xlen, riscv_rps_as.subset_list, true/* update */);
> riscv_set_rvc (false);
> }
> else if (strcmp (name, "pic") == 0)
> @@ -4974,7 +4961,7 @@ s_riscv_option (int x ATTRIBUTE_UNUSED)
> if (is_whitespace (*name) && *name != '\0')
> name++;
> riscv_update_subset (&riscv_rps_as, name);
> - riscv_set_arch_str (&riscv_rps_as.subset_list->arch_str);
> + riscv_arch_str (xlen, riscv_rps_as.subset_list, true/* update */);
>
> riscv_set_rvc (riscv_subset_supports (&riscv_rps_as, "c")
> || riscv_subset_supports (&riscv_rps_as, "zca"));
> diff --git a/opcodes/riscv-dis.c b/opcodes/riscv-dis.c
> index c5e93620b7d..c341a0fe317 100644
> --- a/opcodes/riscv-dis.c
> +++ b/opcodes/riscv-dis.c
> @@ -1051,6 +1051,23 @@ riscv_disassemble_insn (bfd_vma memaddr,
> return insnlen;
> }
>
> +/* Decide if we need to parse the architecture string again, also record
> the
> + string into the current subset list. */
> +
> +static void
> +riscv_dis_parse_subset (struct disassemble_info *info, const char
> *arch_new)
> +{
> + struct riscv_private_data *pd = info->private_data;
> + const char *arch_subset_list = pd->riscv_rps_dis.subset_list->arch_str;
> + if (arch_subset_list == NULL || strcmp (arch_subset_list, arch_new) !=
> 0)
> + {
> + riscv_release_subset_list (pd->riscv_rps_dis.subset_list);
> + riscv_parse_subset (&pd->riscv_rps_dis, arch_new);
> + riscv_arch_str (pd->xlen, pd->riscv_rps_dis.subset_list,
> + true/* update */);
> + }
> +}
> +
> /* If we find the suitable mapping symbol update the STATE.
> Otherwise, do nothing. */
>
> @@ -1073,13 +1090,11 @@ riscv_update_map_state (int n,
> else if (strcmp (name, "$x") == 0)
> {
> *state = MAP_INSN;
> - riscv_release_subset_list (pd->riscv_rps_dis.subset_list);
> - riscv_parse_subset (&pd->riscv_rps_dis, pd->default_arch);
> + riscv_dis_parse_subset (info, pd->default_arch);
> }
> else if (strncmp (name, "$xrv", 4) == 0)
> {
> *state = MAP_INSN;
> - riscv_release_subset_list (pd->riscv_rps_dis.subset_list);
>
> /* ISA mapping string may be numbered, suffixed with '.n'. Do not
> consider this as part of the ISA string. */
> @@ -1090,11 +1105,11 @@ riscv_update_map_state (int n,
> char *name_substr = xmalloc (suffix_index + 1);
> strncpy (name_substr, name, suffix_index);
> name_substr[suffix_index] = '\0';
> - riscv_parse_subset (&pd->riscv_rps_dis, name_substr + 2);
> + riscv_dis_parse_subset (info, name_substr + 2);
> free (name_substr);
> }
> else
> - riscv_parse_subset (&pd->riscv_rps_dis, name + 2);
> + riscv_dis_parse_subset (info, name + 2);
> }
> }
>
> @@ -1409,8 +1424,6 @@ riscv_init_disasm_info (struct disassemble_info
> *info)
> }
> }
> }
> - riscv_release_subset_list (pd->riscv_rps_dis.subset_list);
> - riscv_parse_subset (&pd->riscv_rps_dis, pd->default_arch);
>
> pd->last_map_symbol = -1;
> pd->last_stop_offset = 0;
> @@ -1423,6 +1436,7 @@ riscv_init_disasm_info (struct disassemble_info
> *info)
> pd->all_ext = false;
>
> info->private_data = pd;
> + riscv_dis_parse_subset (info, pd->default_arch);
> return true;
> }
>
> --
> 2.39.3 (Apple Git-146)
>
>
@@ -4005,7 +4005,8 @@ riscv_merge_arch_attr_info (bfd *ibfd, char *in_arch, char *out_arch)
if (merged_arch_str != NULL)
free ((void *) merged_arch_str);
- merged_arch_str = riscv_arch_str (ARCH_SIZE, &merged_subsets);
+ merged_arch_str = riscv_arch_str (ARCH_SIZE, &merged_subsets,
+ false/* update */);
/* Release the subset lists. */
riscv_release_subset_list (&in_subsets);
@@ -2328,7 +2328,7 @@ riscv_arch_str1 (riscv_subset_t *subset,
/* Convert subset information into string with explicit versions. */
char *
-riscv_arch_str (unsigned xlen, const riscv_subset_list_t *subset)
+riscv_arch_str (unsigned xlen, riscv_subset_list_t *subset, bool update)
{
size_t arch_str_len = riscv_estimate_arch_strlen (subset);
char *attr_str = xmalloc (arch_str_len);
@@ -2339,6 +2339,13 @@ riscv_arch_str (unsigned xlen, const riscv_subset_list_t *subset)
riscv_arch_str1 (subset->head, attr_str, buf, arch_str_len);
free (buf);
+ if (update)
+ {
+ if (subset->arch_str != NULL)
+ free ((void *) subset->arch_str);
+ subset->arch_str = attr_str;
+ }
+
return attr_str;
}
@@ -98,7 +98,7 @@ extern void
riscv_release_subset_list (riscv_subset_list_t *);
extern char *
-riscv_arch_str (unsigned, const riscv_subset_list_t *);
+riscv_arch_str (unsigned, riscv_subset_list_t *, bool);
extern size_t
riscv_estimate_digit (unsigned);
@@ -325,19 +325,6 @@ static riscv_parse_subset_t riscv_rps_as =
true, /* check_unknown_prefixed_ext. */
};
-/* Update file/function-level architecture string according to the
- subset_list. */
-
-static void
-riscv_set_arch_str (const char **arch_str_p)
-{
- riscv_subset_list_t *subsets = riscv_rps_as.subset_list;
- const char *arch_str = *arch_str_p;
- if (arch_str != NULL)
- free ((void *) arch_str);
- *arch_str_p = riscv_arch_str (xlen, subsets);
-}
-
/* This structure is used to hold a stack of .option values. */
struct riscv_option_stack
{
@@ -369,8 +356,8 @@ riscv_set_arch (const char *s)
}
riscv_release_subset_list (riscv_rps_as.subset_list);
riscv_parse_subset (&riscv_rps_as, s);
- riscv_set_arch_str (&file_arch_str);
- riscv_set_arch_str (&riscv_rps_as.subset_list->arch_str);
+ riscv_arch_str (xlen, riscv_rps_as.subset_list, true/* update */);
+ file_arch_str = strdup (riscv_rps_as.subset_list->arch_str);
riscv_set_rvc (riscv_subset_supports (&riscv_rps_as, "c")
|| riscv_subset_supports (&riscv_rps_as, "zca"));
@@ -4947,13 +4934,13 @@ s_riscv_option (int x ATTRIBUTE_UNUSED)
if (strcmp (name, "rvc") == 0)
{
riscv_update_subset (&riscv_rps_as, "+c");
- riscv_set_arch_str (&riscv_rps_as.subset_list->arch_str);
+ riscv_arch_str (xlen, riscv_rps_as.subset_list, true/* update */);
riscv_set_rvc (true);
}
else if (strcmp (name, "norvc") == 0)
{
riscv_update_subset (&riscv_rps_as, "-c");
- riscv_set_arch_str (&riscv_rps_as.subset_list->arch_str);
+ riscv_arch_str (xlen, riscv_rps_as.subset_list, true/* update */);
riscv_set_rvc (false);
}
else if (strcmp (name, "pic") == 0)
@@ -4974,7 +4961,7 @@ s_riscv_option (int x ATTRIBUTE_UNUSED)
if (is_whitespace (*name) && *name != '\0')
name++;
riscv_update_subset (&riscv_rps_as, name);
- riscv_set_arch_str (&riscv_rps_as.subset_list->arch_str);
+ riscv_arch_str (xlen, riscv_rps_as.subset_list, true/* update */);
riscv_set_rvc (riscv_subset_supports (&riscv_rps_as, "c")
|| riscv_subset_supports (&riscv_rps_as, "zca"));
@@ -1051,6 +1051,23 @@ riscv_disassemble_insn (bfd_vma memaddr,
return insnlen;
}
+/* Decide if we need to parse the architecture string again, also record the
+ string into the current subset list. */
+
+static void
+riscv_dis_parse_subset (struct disassemble_info *info, const char *arch_new)
+{
+ struct riscv_private_data *pd = info->private_data;
+ const char *arch_subset_list = pd->riscv_rps_dis.subset_list->arch_str;
+ if (arch_subset_list == NULL || strcmp (arch_subset_list, arch_new) != 0)
+ {
+ riscv_release_subset_list (pd->riscv_rps_dis.subset_list);
+ riscv_parse_subset (&pd->riscv_rps_dis, arch_new);
+ riscv_arch_str (pd->xlen, pd->riscv_rps_dis.subset_list,
+ true/* update */);
+ }
+}
+
/* If we find the suitable mapping symbol update the STATE.
Otherwise, do nothing. */
@@ -1073,13 +1090,11 @@ riscv_update_map_state (int n,
else if (strcmp (name, "$x") == 0)
{
*state = MAP_INSN;
- riscv_release_subset_list (pd->riscv_rps_dis.subset_list);
- riscv_parse_subset (&pd->riscv_rps_dis, pd->default_arch);
+ riscv_dis_parse_subset (info, pd->default_arch);
}
else if (strncmp (name, "$xrv", 4) == 0)
{
*state = MAP_INSN;
- riscv_release_subset_list (pd->riscv_rps_dis.subset_list);
/* ISA mapping string may be numbered, suffixed with '.n'. Do not
consider this as part of the ISA string. */
@@ -1090,11 +1105,11 @@ riscv_update_map_state (int n,
char *name_substr = xmalloc (suffix_index + 1);
strncpy (name_substr, name, suffix_index);
name_substr[suffix_index] = '\0';
- riscv_parse_subset (&pd->riscv_rps_dis, name_substr + 2);
+ riscv_dis_parse_subset (info, name_substr + 2);
free (name_substr);
}
else
- riscv_parse_subset (&pd->riscv_rps_dis, name + 2);
+ riscv_dis_parse_subset (info, name + 2);
}
}
@@ -1409,8 +1424,6 @@ riscv_init_disasm_info (struct disassemble_info *info)
}
}
}
- riscv_release_subset_list (pd->riscv_rps_dis.subset_list);
- riscv_parse_subset (&pd->riscv_rps_dis, pd->default_arch);
pd->last_map_symbol = -1;
pd->last_stop_offset = 0;
@@ -1423,6 +1436,7 @@ riscv_init_disasm_info (struct disassemble_info *info)
pd->all_ext = false;
info->private_data = pd;
+ riscv_dis_parse_subset (info, pd->default_arch);
return true;
}