[v2,4/4] Rewrite register asm into hard register constraints
Checks
Context |
Check |
Description |
rivoscibot/toolchain-ci-rivos-apply-patch |
success
|
Patch applied
|
rivoscibot/toolchain-ci-rivos-lint |
warning
|
Lint failed
|
rivoscibot/toolchain-ci-rivos-build--newlib-rv64gcv-lp64d-multilib |
success
|
Build passed
|
rivoscibot/toolchain-ci-rivos-build--linux-rv64gcv-lp64d-multilib |
success
|
Build passed
|
rivoscibot/toolchain-ci-rivos-build--linux-rv64gc_zba_zbb_zbc_zbs-lp64d-multilib |
success
|
Build passed
|
rivoscibot/toolchain-ci-rivos-build--newlib-rv64gc-lp64d-non-multilib |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_build--master-arm |
success
|
Build passed
|
rivoscibot/toolchain-ci-rivos-build--linux-rv64gc-lp64d-non-multilib |
success
|
Build passed
|
rivoscibot/toolchain-ci-rivos-test |
fail
|
Testing failed
|
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 |
fail
|
Test failed
|
linaro-tcwg-bot/tcwg_gcc_check--master-arm |
fail
|
Test failed
|
Commit Message
Currently a register asm already materializes during expand. This
means, a hard register is allocated for the very first access of a
register asm as e.g. in an assignment. As a consequence this might lead
to suboptimal register allocation if the assignment and the using asm
statement are spread far apart. Even more problematic are function
calls in between register asm assignments and its using asm statement
since hard registers may be clobbered by a call. The former may be
solved by pulling register asm assignments and asm statements close by.
However, the latter is not easily solved since sometimes function calls
are implicit. For example
int
foo (int *x)
{
register int y asm ("0") = 42;
register int z asm ("1") = *x;
asm ("bar\t%0,%1" : "+r" (z) : "r" (y));
return z;
}
If compiled with address sanitizer, then a function call is introduced
for the memory load which in turn may interfer with the initialization
of register asm y. Likewise, for some targets and configurations even
an operation like an addition may lead to an implicit library call.
In contrast hard register constraints materialize during register
allocation and therefore do not suffer from this, i.e., asm operands are
kept in pseudos until RA. This patch adds the feature of rewriting
local register asm into code which exploits hard register constraints.
For example
register int global asm ("r3");
int foo (int x0)
{
register int x asm ("r4") = x0;
register int y asm ("r5");
asm ("bar\t%0,%1,%2" : "=r" (x) : "0" (x), "r" (global));
x += 42;
asm ("baz\t%0,%1" : "=r" (y) : "r" (x));
return y;
}
is rewritten during gimplification into
register int global asm ("r3");
int foo (int x0)
{
int x = x0;
int y;
asm ("bar\t%0,%1,%2" : "={r4}" (x) : "0" (x), "r" (global));
x += 42;
asm ("baz\t%0,%1" : "={r5}" (y) : "{r4}" (x));
return y;
}
The resulting code solely relies on hard register constraints modulo
global register asm.
Since I consider this as an experimental feature it is hidden behind new
flag -fdemote-register-asm (I'm open for other naming suggestions).
---
gcc/common.opt | 4 +
gcc/gimplify.cc | 78 +++++++++++++++++++
.../gcc.dg/asm-hard-reg-demotion-1.c | 19 +++++
.../gcc.dg/asm-hard-reg-demotion-2.c | 19 +++++
gcc/testsuite/gcc.dg/asm-hard-reg-demotion.h | 52 +++++++++++++
5 files changed, 172 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-demotion-1.c
create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-demotion-2.c
create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-demotion.h
@@ -3422,6 +3422,10 @@ fverbose-asm
Common Var(flag_verbose_asm)
Add extra commentary to assembler output.
+fdemote-register-asm
+Common Var(flag_demote_register_asm) Init(0)
+Demote local register asm and use hard register constraints instead
+
fvisibility=
Common Joined RejectNegative Enum(symbol_visibility) Var(default_visibility) Init(VISIBILITY_DEFAULT)
-fvisibility=[default|internal|hidden|protected] Set the default symbol visibility.
@@ -7049,6 +7049,73 @@ num_alternatives (const_tree link)
return num + 1;
}
+static hash_set<tree> demote_register_asm;
+
+static void
+gimplify_demote_register_asm (tree link)
+{
+ if (!flag_demote_register_asm)
+ return;
+ tree op = TREE_VALUE (link);
+ if (!VAR_P (op) || !DECL_HARD_REGISTER (op) || is_global_var (op))
+ return;
+ tree id = DECL_ASSEMBLER_NAME (op);
+ const char *regname = IDENTIFIER_POINTER (id);
+ ++regname;
+ int regno = decode_reg_name (regname);
+ if (regno < 0)
+ /* This indicates an error and we error out later on. */
+ return;
+ const char *constraint = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (link)));
+ auto_vec<char, 64> constraint_new;
+ for (const char *p = constraint; *p; )
+ {
+ bool pushed = false;
+ switch (*p)
+ {
+ case '+': case '=': case '%': case '?': case '!': case '*': case '&':
+ case '#': case '$': case '^': case '{': case 'E': case 'F': case 'G':
+ case 'H': case 's': case 'i': case 'n': case 'I': case 'J': case 'K':
+ case 'L': case 'M': case 'N': case 'O': case 'P': case ',': case '0':
+ case '1': case '2': case '3': case '4': case '5': case '6': case '7':
+ case '8': case '9': case '[': case '<': case '>': case 'g': case 'X':
+ break;
+
+ default:
+ if (!ISALPHA (*p))
+ break;
+ enum constraint_num cn = lookup_constraint (p);
+ enum reg_class rclass = reg_class_for_constraint (cn);
+ if (rclass != NO_REGS || insn_extra_address_constraint (cn))
+ {
+ gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (regno), rclass));
+ constraint_new.safe_push ('{');
+ size_t len = strlen (regname);
+ for (size_t i = 0; i < len; ++i)
+ constraint_new.safe_push (regname[i]);
+ constraint_new.safe_push ('}');
+ pushed = true;
+ }
+ break;
+ }
+
+ for (size_t len = CONSTRAINT_LEN (*p, p); len; len--, p++)
+ {
+ if (!pushed)
+ constraint_new.safe_push (*p);
+ if (*p == '\0')
+ break;
+ }
+ }
+ unsigned int len = constraint_new.length ();
+ char *new_constraint = new char[len + 1];
+ memcpy (new_constraint, &constraint_new[0], len);
+ new_constraint[len] = '\0';
+ tree str = build_string (len + 1, new_constraint);
+ TREE_VALUE (TREE_PURPOSE (link)) = str;
+ demote_register_asm.add (op);
+}
+
/* Gimplify the operands of an ASM_EXPR. Input operands should be a gimple
value; output operands should be a gimple lvalue. */
@@ -7100,6 +7167,8 @@ gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
bool ok;
size_t constraint_len;
+ gimplify_demote_register_asm (link);
+
link_next = TREE_CHAIN (link);
oconstraints[i]
@@ -7285,6 +7354,8 @@ gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
int input_num = 0;
for (link = ASM_INPUTS (expr); link; ++input_num, ++i, link = link_next)
{
+ gimplify_demote_register_asm (link);
+
link_next = TREE_CHAIN (link);
constraint = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (link)));
reg_info.operand = TREE_VALUE (link);
@@ -19525,6 +19596,13 @@ gimplify_body (tree fndecl, bool do_parms)
}
}
+ for (auto op : demote_register_asm)
+ {
+ DECL_REGISTER (op) = 0;
+ DECL_HARD_REGISTER (op) = 0;
+ }
+ demote_register_asm.empty ();
+
if ((flag_openacc || flag_openmp || flag_openmp_simd)
&& gimplify_omp_ctxp)
{
new file mode 100644
@@ -0,0 +1,19 @@
+/* { dg-do run { target aarch64*-*-* powerpc64*-*-* riscv64-*-* s390*-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -fdemote-register-asm" } */
+
+#include "asm-hard-reg-demotion.h"
+
+int
+main (void)
+{
+ if (bar (0) != 0
+ || bar (1) != 1
+ || bar (2) != 2
+ || bar (32) != 32
+ || baz (0) != 0
+ || baz (1) != 1
+ || baz (2) != 2
+ || baz (32) != 32)
+ __builtin_abort ();
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,19 @@
+/* { dg-do run { target aarch64*-*-* powerpc64*-*-* riscv64-*-* s390*-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -fno-demote-register-asm" } */
+
+#include "asm-hard-reg-demotion.h"
+
+int
+main (void)
+{
+ if (bar (0) != 42
+ || bar (1) != 42
+ || bar (2) != 42
+ || bar (32) != 42
+ || baz (0) != 0
+ || baz (1) != 1
+ || baz (2) != 2
+ || baz (32) != 32)
+ __builtin_abort ();
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,52 @@
+/* Pass parameter x in the first general argument register to the assembler
+ instruction.
+
+ In function bar we fail to do so because after the function call to foo,
+ variable argreg1 does not contain the value of x but rather 42 which got
+ passed to foo. Thus, the function always returns 42. In contrast in
+ function baz, variable x is saved over the function call and materializes in
+ the asm statement and therefore is returned. */
+
+#if defined (__aarch64__)
+# define REG register int argreg1 __asm__ ("x0") = x;
+# define MOVE1 __asm__ ("mov\t%0,%1" : "=r" (out) : "r" (argreg1));
+# define MOVE2 __asm__ ("mov\t%0,%1" : "=r" (out) : "{x0}" (x));
+#elif defined (__powerpc__) || defined (__POWERPC__)
+# define REG register int argreg1 __asm__ ("r3") = x;
+# define MOVE1 __asm__ ("mr\t%0,%1" : "=r" (out) : "r" (argreg1));
+# define MOVE2 __asm__ ("mr\t%0,%1" : "=r" (out) : "{r3}" (x));
+#elif defined (__riscv)
+# define REG register int argreg1 __asm__ ("a0") = x;
+# define MOVE1 __asm__ ("mv\t%0,%1" : "=r" (out) : "r" (argreg1));
+# define MOVE2 __asm__ ("mv\t%0,%1" : "=r" (out) : "{a0}" (x));
+#elif defined (__s390__)
+# define REG register int argreg1 __asm__ ("r2") = x;
+# define MOVE1 __asm__ ("lr\t%0,%1" : "=r" (out) : "r" (argreg1));
+# define MOVE2 __asm__ ("lr\t%0,%1" : "=r" (out) : "{r2}" (x));
+#elif defined (__x86_64__)
+# define REG register int argreg1 __asm__ ("edi") = x;
+# define MOVE1 __asm__ ("mov\t%1,%0" : "=r" (out) : "r" (argreg1));
+# define MOVE2 __asm__ ("mov\t%1,%0" : "=r" (out) : "{edi}" (x));
+#endif
+
+__attribute__ ((noipa))
+int foo (int unused) { }
+
+int
+bar (int x)
+{
+ int out;
+ REG
+ foo (42);
+ MOVE1
+ return out;
+}
+
+int
+baz (int x)
+{
+ int out;
+ foo (42);
+ MOVE2
+ return out;
+}