[v2,4/4] Rewrite register asm into hard register constraints

Message ID 20240918144332.3544018-5-stefansf@gcc.gnu.org
State New
Headers
Series Hard Register Constraints |

Checks

Context Check Description
rivoscibot/toolchain-ci-rivos-apply-patch success Patch applied
rivoscibot/toolchain-ci-rivos-lint warning Lint failed
rivoscibot/toolchain-ci-rivos-build--newlib-rv64gcv-lp64d-multilib success Build passed
rivoscibot/toolchain-ci-rivos-build--linux-rv64gcv-lp64d-multilib success Build passed
rivoscibot/toolchain-ci-rivos-build--linux-rv64gc_zba_zbb_zbc_zbs-lp64d-multilib success Build passed
rivoscibot/toolchain-ci-rivos-build--newlib-rv64gc-lp64d-non-multilib success Build passed
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Build passed
rivoscibot/toolchain-ci-rivos-build--linux-rv64gc-lp64d-non-multilib success Build passed
rivoscibot/toolchain-ci-rivos-test fail Testing failed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 fail Test failed
linaro-tcwg-bot/tcwg_gcc_check--master-arm fail Test failed

Commit Message

Stefan Schulze Frielinghaus Sept. 18, 2024, 2:43 p.m. UTC
  Currently a register asm already materializes during expand.  This
means, a hard register is allocated for the very first access of a
register asm as e.g. in an assignment.  As a consequence this might lead
to suboptimal register allocation if the assignment and the using asm
statement are spread far apart.  Even more problematic are function
calls in between register asm assignments and its using asm statement
since hard registers may be clobbered by a call.  The former may be
solved by pulling register asm assignments and asm statements close by.
However, the latter is not easily solved since sometimes function calls
are implicit.  For example

int
foo (int *x)
{
  register int y asm ("0") = 42;
  register int z asm ("1") = *x;
  asm ("bar\t%0,%1" : "+r" (z) : "r" (y));
  return z;
}

If compiled with address sanitizer, then a function call is introduced
for the memory load which in turn may interfer with the initialization
of register asm y.  Likewise, for some targets and configurations even
an operation like an addition may lead to an implicit library call.

In contrast hard register constraints materialize during register
allocation and therefore do not suffer from this, i.e., asm operands are
kept in pseudos until RA.  This patch adds the feature of rewriting
local register asm into code which exploits hard register constraints.
For example

register int global asm ("r3");

int foo (int x0)
{
  register int x asm ("r4") = x0;
  register int y asm ("r5");

  asm ("bar\t%0,%1,%2" : "=r" (x) : "0" (x), "r" (global));
  x += 42;
  asm ("baz\t%0,%1" : "=r" (y) : "r" (x));

  return y;
}

is rewritten during gimplification into

register int global asm ("r3");

int foo (int x0)
{
  int x = x0;
  int y;

  asm ("bar\t%0,%1,%2" : "={r4}" (x) : "0" (x), "r" (global));
  x += 42;
  asm ("baz\t%0,%1" : "={r5}" (y) : "{r4}" (x));

  return y;
}

The resulting code solely relies on hard register constraints modulo
global register asm.

Since I consider this as an experimental feature it is hidden behind new
flag -fdemote-register-asm (I'm open for other naming suggestions).
---
 gcc/common.opt                                |  4 +
 gcc/gimplify.cc                               | 78 +++++++++++++++++++
 .../gcc.dg/asm-hard-reg-demotion-1.c          | 19 +++++
 .../gcc.dg/asm-hard-reg-demotion-2.c          | 19 +++++
 gcc/testsuite/gcc.dg/asm-hard-reg-demotion.h  | 52 +++++++++++++
 5 files changed, 172 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-demotion-1.c
 create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-demotion-2.c
 create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-demotion.h
  

Patch

diff --git a/gcc/common.opt b/gcc/common.opt
index ea39f87ae71..859a735a0b7 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -3422,6 +3422,10 @@  fverbose-asm
 Common Var(flag_verbose_asm)
 Add extra commentary to assembler output.
 
+fdemote-register-asm
+Common Var(flag_demote_register_asm) Init(0)
+Demote local register asm and use hard register constraints instead
+
 fvisibility=
 Common Joined RejectNegative Enum(symbol_visibility) Var(default_visibility) Init(VISIBILITY_DEFAULT)
 -fvisibility=[default|internal|hidden|protected]	Set the default symbol visibility.
diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index 08e0b5d047b..c9bd1769c28 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -7049,6 +7049,73 @@  num_alternatives (const_tree link)
   return num + 1;
 }
 
+static hash_set<tree> demote_register_asm;
+
+static void
+gimplify_demote_register_asm (tree link)
+{
+  if (!flag_demote_register_asm)
+    return;
+  tree op = TREE_VALUE (link);
+  if (!VAR_P (op) || !DECL_HARD_REGISTER (op) || is_global_var (op))
+    return;
+  tree id = DECL_ASSEMBLER_NAME (op);
+  const char *regname = IDENTIFIER_POINTER (id);
+  ++regname;
+  int regno = decode_reg_name (regname);
+  if (regno < 0)
+    /* This indicates an error and we error out later on.  */
+    return;
+  const char *constraint = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (link)));
+  auto_vec<char, 64> constraint_new;
+  for (const char *p = constraint; *p; )
+    {
+      bool pushed = false;
+      switch (*p)
+	{
+	case '+': case '=': case '%': case '?': case '!': case '*': case '&':
+	case '#': case '$': case '^': case '{': case 'E': case 'F': case 'G':
+	case 'H': case 's': case 'i': case 'n': case 'I': case 'J': case 'K':
+	case 'L': case 'M': case 'N': case 'O': case 'P': case ',': case '0':
+	case '1': case '2': case '3': case '4': case '5': case '6': case '7':
+	case '8': case '9': case '[': case '<': case '>': case 'g': case 'X':
+	  break;
+
+	default:
+	  if (!ISALPHA (*p))
+	    break;
+	  enum constraint_num cn = lookup_constraint (p);
+	  enum reg_class rclass = reg_class_for_constraint (cn);
+	  if (rclass != NO_REGS || insn_extra_address_constraint (cn))
+	    {
+	      gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (regno), rclass));
+	      constraint_new.safe_push ('{');
+	      size_t len = strlen (regname);
+	      for (size_t i = 0; i < len; ++i)
+		constraint_new.safe_push (regname[i]);
+	      constraint_new.safe_push ('}');
+	      pushed = true;
+	    }
+	  break;
+	}
+
+      for (size_t len = CONSTRAINT_LEN (*p, p); len; len--, p++)
+	{
+	  if (!pushed)
+	    constraint_new.safe_push (*p);
+	  if (*p == '\0')
+	    break;
+	}
+    }
+  unsigned int len = constraint_new.length ();
+  char *new_constraint = new char[len + 1];
+  memcpy (new_constraint, &constraint_new[0], len);
+  new_constraint[len] = '\0';
+  tree str = build_string (len + 1, new_constraint);
+  TREE_VALUE (TREE_PURPOSE (link)) = str;
+  demote_register_asm.add (op);
+}
+
 /* Gimplify the operands of an ASM_EXPR.  Input operands should be a gimple
    value; output operands should be a gimple lvalue.  */
 
@@ -7100,6 +7167,8 @@  gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
       bool ok;
       size_t constraint_len;
 
+      gimplify_demote_register_asm (link);
+
       link_next = TREE_CHAIN (link);
 
       oconstraints[i]
@@ -7285,6 +7354,8 @@  gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
   int input_num = 0;
   for (link = ASM_INPUTS (expr); link; ++input_num, ++i, link = link_next)
     {
+      gimplify_demote_register_asm (link);
+
       link_next = TREE_CHAIN (link);
       constraint = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (link)));
       reg_info.operand = TREE_VALUE (link);
@@ -19525,6 +19596,13 @@  gimplify_body (tree fndecl, bool do_parms)
 	  }
     }
 
+  for (auto op : demote_register_asm)
+    {
+      DECL_REGISTER (op) = 0;
+      DECL_HARD_REGISTER (op) = 0;
+    }
+  demote_register_asm.empty ();
+
   if ((flag_openacc || flag_openmp || flag_openmp_simd)
       && gimplify_omp_ctxp)
     {
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-demotion-1.c b/gcc/testsuite/gcc.dg/asm-hard-reg-demotion-1.c
new file mode 100644
index 00000000000..541a66a8d05
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-demotion-1.c
@@ -0,0 +1,19 @@ 
+/* { dg-do run { target aarch64*-*-* powerpc64*-*-* riscv64-*-* s390*-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -fdemote-register-asm" } */
+
+#include "asm-hard-reg-demotion.h"
+
+int
+main (void)
+{
+  if (bar (0) != 0
+      || bar (1) != 1
+      || bar (2) != 2
+      || bar (32) != 32
+      || baz (0) != 0
+      || baz (1) != 1
+      || baz (2) != 2
+      || baz (32) != 32)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-demotion-2.c b/gcc/testsuite/gcc.dg/asm-hard-reg-demotion-2.c
new file mode 100644
index 00000000000..3d216d440af
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-demotion-2.c
@@ -0,0 +1,19 @@ 
+/* { dg-do run { target aarch64*-*-* powerpc64*-*-* riscv64-*-* s390*-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -fno-demote-register-asm" } */
+
+#include "asm-hard-reg-demotion.h"
+
+int
+main (void)
+{
+  if (bar (0) != 42
+      || bar (1) != 42
+      || bar (2) != 42
+      || bar (32) != 42
+      || baz (0) != 0
+      || baz (1) != 1
+      || baz (2) != 2
+      || baz (32) != 32)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-demotion.h b/gcc/testsuite/gcc.dg/asm-hard-reg-demotion.h
new file mode 100644
index 00000000000..6d72f622ce9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-demotion.h
@@ -0,0 +1,52 @@ 
+/* Pass parameter x in the first general argument register to the assembler
+   instruction.
+
+   In function bar we fail to do so because after the function call to foo,
+   variable argreg1 does not contain the value of x but rather 42 which got
+   passed to foo.  Thus, the function always returns 42.  In contrast in
+   function baz, variable x is saved over the function call and materializes in
+   the asm statement and therefore is returned.  */
+
+#if defined (__aarch64__)
+# define REG register int argreg1 __asm__ ("x0") = x;
+# define MOVE1 __asm__ ("mov\t%0,%1" : "=r" (out) : "r" (argreg1));
+# define MOVE2 __asm__ ("mov\t%0,%1" : "=r" (out) : "{x0}" (x));
+#elif defined (__powerpc__) || defined (__POWERPC__)
+# define REG register int argreg1 __asm__ ("r3") = x;
+# define MOVE1 __asm__ ("mr\t%0,%1" : "=r" (out) : "r" (argreg1));
+# define MOVE2 __asm__ ("mr\t%0,%1" : "=r" (out) : "{r3}" (x));
+#elif defined (__riscv)
+# define REG register int argreg1 __asm__ ("a0") = x;
+# define MOVE1 __asm__ ("mv\t%0,%1" : "=r" (out) : "r" (argreg1));
+# define MOVE2 __asm__ ("mv\t%0,%1" : "=r" (out) : "{a0}" (x));
+#elif defined (__s390__)
+# define REG register int argreg1 __asm__ ("r2") = x;
+# define MOVE1 __asm__ ("lr\t%0,%1" : "=r" (out) : "r" (argreg1));
+# define MOVE2 __asm__ ("lr\t%0,%1" : "=r" (out) : "{r2}" (x));
+#elif defined (__x86_64__)
+# define REG register int argreg1 __asm__ ("edi") = x;
+# define MOVE1 __asm__ ("mov\t%1,%0" : "=r" (out) : "r" (argreg1));
+# define MOVE2 __asm__ ("mov\t%1,%0" : "=r" (out) : "{edi}" (x));
+#endif
+
+__attribute__ ((noipa))
+int foo (int unused) { }
+
+int
+bar (int x)
+{
+  int out;
+  REG
+  foo (42);
+  MOVE1
+  return out;
+}
+
+int
+baz (int x)
+{
+  int out;
+  foo (42);
+  MOVE2
+  return out;
+}