[v3,4/4] Rewrite register asm into hard register constraints

Message ID 20241025095720.2990683-5-stefansf@gcc.gnu.org
State New
Headers
Series Hard Register Constraints |

Checks

Context Check Description
rivoscibot/toolchain-ci-rivos-lint warning Lint failed
rivoscibot/toolchain-ci-rivos-apply-patch success Patch applied
rivoscibot/toolchain-ci-rivos-build--newlib-rv64gc-lp64d-non-multilib success Build passed
rivoscibot/toolchain-ci-rivos-build--linux-rv64gc-lp64d-non-multilib success Build passed
linaro-tcwg-bot/tcwg_gcc_build--master-arm fail Patch failed to apply
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 fail Patch failed to apply
rivoscibot/toolchain-ci-rivos-build--newlib-rv64gcv-lp64d-multilib success Build passed
rivoscibot/toolchain-ci-rivos-build--linux-rv64gc_zba_zbb_zbc_zbs-lp64d-multilib success Build passed
rivoscibot/toolchain-ci-rivos-build--linux-rv64gcv-lp64d-multilib success Build passed
rivoscibot/toolchain-ci-rivos-test fail Testing failed

Commit Message

Stefan Schulze Frielinghaus Oct. 25, 2024, 9:57 a.m. UTC
  Currently a register asm already materializes during expand.  This
means, a hard register is allocated for the very first access of a
register asm as e.g. in an assignment.  As a consequence this might lead
to suboptimal register allocation if the assignment and the using asm
statement are spread far apart.  Even more problematic are function
calls in between register asm assignments and its using asm statement
since hard registers may be clobbered by a call.  The former may be
solved by pulling register asm assignments and asm statements close by.
However, the latter is not easily solved since sometimes function calls
are implicit.  For example

int
foo (int *x)
{
  register int y asm ("0") = 42;
  register int z asm ("1") = *x;
  asm ("bar\t%0,%1" : "+r" (z) : "r" (y));
  return z;
}

If compiled with address sanitizer, then a function call is introduced
for the memory load which in turn may interfer with the initialization
of register asm y.  Likewise, for some targets and configurations even
an operation like an addition may lead to an implicit library call.

In contrast hard register constraints materialize during register
allocation and therefore do not suffer from this, i.e., asm operands are
kept in pseudos until RA.  This patch adds the feature of rewriting
local register asm into code which exploits hard register constraints.
For example

register int global asm ("r3");

int foo (int x0)
{
  register int x asm ("r4") = x0;
  register int y asm ("r5");

  asm ("bar\t%0,%1,%2" : "=r" (x) : "0" (x), "r" (global));
  x += 42;
  asm ("baz\t%0,%1" : "=r" (y) : "r" (x));

  return y;
}

is rewritten during gimplification into

register int global asm ("r3");

int foo (int x0)
{
  int x = x0;
  int y;

  asm ("bar\t%0,%1,%2" : "={r4}" (x) : "0" (x), "r" (global));
  x += 42;
  asm ("baz\t%0,%1" : "={r5}" (y) : "{r4}" (x));

  return y;
}

The resulting code solely relies on hard register constraints modulo
global register asm.

Since I consider this as an experimental feature it is hidden behind new
flag -fdemote-register-asm (I'm open for other naming suggestions).
---
 gcc/common.opt                                |  4 +
 gcc/gimplify.cc                               | 87 +++++++++++++++++++
 .../gcc.dg/asm-hard-reg-demotion-1.c          | 19 ++++
 .../gcc.dg/asm-hard-reg-demotion-2.c          | 19 ++++
 .../gcc.dg/asm-hard-reg-demotion-error-1.c    | 29 +++++++
 gcc/testsuite/gcc.dg/asm-hard-reg-demotion.h  | 52 +++++++++++
 6 files changed, 210 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-demotion-1.c
 create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-demotion-2.c
 create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-demotion-error-1.c
 create mode 100644 gcc/testsuite/gcc.dg/asm-hard-reg-demotion.h
  

Patch

diff --git a/gcc/common.opt b/gcc/common.opt
index d270e524ff4..8fcbb54018a 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -3426,6 +3426,10 @@  fverbose-asm
 Common Var(flag_verbose_asm)
 Add extra commentary to assembler output.
 
+fdemote-register-asm
+Common Var(flag_demote_register_asm) Init(0)
+Demote local register asm and use hard register constraints instead.
+
 fvisibility=
 Common Joined RejectNegative Enum(symbol_visibility) Var(default_visibility) Init(VISIBILITY_DEFAULT)
 -fvisibility=[default|internal|hidden|protected]	Set the default symbol visibility.
diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index e39ba2d3766..c0e2e63bda2 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -7061,6 +7061,72 @@  num_alternatives (const_tree link)
   return num + 1;
 }
 
+/* Keep track of all register asm which have been replaced by hard register
+   constraints.  After all asm statements of a function have been processed,
+   demote those to ordinary variables.  */
+static hash_set<tree> demote_register_asm;
+
+static void
+gimplify_demote_register_asm (tree link)
+{
+  tree op = TREE_VALUE (link);
+  if (!VAR_P (op) || !DECL_HARD_REGISTER (op) || is_global_var (op))
+    return;
+  tree id = DECL_ASSEMBLER_NAME (op);
+  const char *regname = IDENTIFIER_POINTER (id);
+  ++regname;
+  int regno = decode_reg_name (regname);
+  if (regno < 0)
+    /* This indicates an error and we error out later on.  */
+    return;
+  const char *constraint
+    = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (link)));
+  auto_vec<char, 64> constraint_new;
+  for (const char *p = constraint; *p; )
+    {
+      bool pushed = false;
+      switch (*p)
+	{
+	case '+': case '=': case '%': case '?': case '!': case '*': case '&':
+	case '#': case '$': case '^': case '{': case 'E': case 'F': case 'G':
+	case 'H': case 's': case 'i': case 'n': case 'I': case 'J': case 'K':
+	case 'L': case 'M': case 'N': case 'O': case 'P': case ',': case '0':
+	case '1': case '2': case '3': case '4': case '5': case '6': case '7':
+	case '8': case '9': case '[': case '<': case '>': case 'g': case 'X':
+	  break;
+
+	default:
+	  if (!ISALPHA (*p))
+	    break;
+	  enum constraint_num cn = lookup_constraint (p);
+	  enum reg_class rclass = reg_class_for_constraint (cn);
+	  if (rclass != NO_REGS || insn_extra_address_constraint (cn))
+	    {
+	      constraint_new.safe_push ('{');
+	      size_t len = strlen (regname);
+	      for (size_t i = 0; i < len; ++i)
+		constraint_new.safe_push (regname[i]);
+	      constraint_new.safe_push ('}');
+	      pushed = true;
+	    }
+	  break;
+	}
+
+      for (size_t len = CONSTRAINT_LEN (*p, p); len; len--, p++)
+	{
+	  if (!pushed)
+	    constraint_new.safe_push (*p);
+	  if (*p == '\0')
+	    break;
+	}
+    }
+  constraint_new.safe_push ('\0');
+  unsigned int len = constraint_new.length ();
+  tree str = build_string (len, constraint_new.address ());
+  TREE_VALUE (TREE_PURPOSE (link)) = str;
+  demote_register_asm.add (op);
+}
+
 /* Gimplify the operands of an ASM_EXPR.  Input operands should be a gimple
    value; output operands should be a gimple lvalue.  */
 
@@ -7464,6 +7530,20 @@  gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
   /* Do not add ASMs with errors to the gimple IL stream.  */
   if (ret != GS_ERROR)
     {
+      if (flag_demote_register_asm)
+	{
+	  for (unsigned i = 0; i < vec_safe_length (outputs); ++i)
+	    {
+	      tree link = (*outputs)[i];
+	      gimplify_demote_register_asm (link);
+	    }
+	  for (unsigned i = 0; i < vec_safe_length (inputs); ++i)
+	    {
+	      tree link = (*inputs)[i];
+	      gimplify_demote_register_asm (link);
+	    }
+	}
+
       stmt = gimple_build_asm_vec (TREE_STRING_POINTER (ASM_STRING (expr)),
 				   inputs, outputs, clobbers, labels);
 
@@ -19595,6 +19675,13 @@  gimplify_body (tree fndecl, bool do_parms)
 	  }
     }
 
+  for (auto op : demote_register_asm)
+    {
+      DECL_REGISTER (op) = 0;
+      DECL_HARD_REGISTER (op) = 0;
+    }
+  demote_register_asm.empty ();
+
   if ((flag_openacc || flag_openmp || flag_openmp_simd)
       && gimplify_omp_ctxp)
     {
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-demotion-1.c b/gcc/testsuite/gcc.dg/asm-hard-reg-demotion-1.c
new file mode 100644
index 00000000000..541a66a8d05
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-demotion-1.c
@@ -0,0 +1,19 @@ 
+/* { dg-do run { target aarch64*-*-* powerpc64*-*-* riscv64-*-* s390*-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -fdemote-register-asm" } */
+
+#include "asm-hard-reg-demotion.h"
+
+int
+main (void)
+{
+  if (bar (0) != 0
+      || bar (1) != 1
+      || bar (2) != 2
+      || bar (32) != 32
+      || baz (0) != 0
+      || baz (1) != 1
+      || baz (2) != 2
+      || baz (32) != 32)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-demotion-2.c b/gcc/testsuite/gcc.dg/asm-hard-reg-demotion-2.c
new file mode 100644
index 00000000000..3d216d440af
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-demotion-2.c
@@ -0,0 +1,19 @@ 
+/* { dg-do run { target aarch64*-*-* powerpc64*-*-* riscv64-*-* s390*-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -fno-demote-register-asm" } */
+
+#include "asm-hard-reg-demotion.h"
+
+int
+main (void)
+{
+  if (bar (0) != 42
+      || bar (1) != 42
+      || bar (2) != 42
+      || bar (32) != 42
+      || baz (0) != 0
+      || baz (1) != 1
+      || baz (2) != 2
+      || baz (32) != 32)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-demotion-error-1.c b/gcc/testsuite/gcc.dg/asm-hard-reg-demotion-error-1.c
new file mode 100644
index 00000000000..097a6ff901d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-demotion-error-1.c
@@ -0,0 +1,29 @@ 
+/* { dg-do compile } */
+/* { dg-options "-fdemote-register-asm" } */
+
+/* Verify input operands.  */
+
+int
+test (int x)
+{
+  register int y __asm__ ("0") = x;
+
+  /* Preserve status quo for register asm and don't error out.  This must hold
+     true even if we replace register asm with hard register constraints.  */
+  __asm__ ("" : "+r" (y) : "r" (y));
+  __asm__ ("" : "=r" (y) : "0" (y), "r" (y));
+  __asm__ ("" : "=r" (y) : "r" (y), "r" (y));
+
+  /* Be more strict for hard register constraints and error out.  */
+  __asm__ ("" : "+{0}" (x) : "{0}" (x)); /* { dg-error "multiple inputs to hard register" } */
+  __asm__ ("" : "={0}" (x) : "0" (x), "{0}" (x)); /* { dg-error "multiple inputs to hard register" } */
+  __asm__ ("" : "=r" (x) : "{0}" (x), "{0}" (x)); /* { dg-error "multiple inputs to hard register" } */
+
+  /* Still error out in case of a mixture of both constructs.  */
+  __asm__ ("" : "+{0}" (x) : "r" (y)); /* { dg-error "multiple inputs to hard register" } */
+  __asm__ ("" : "={0}" (x) : "0" (x), "r" (y)); /* { dg-error "multiple inputs to hard register" } */
+  __asm__ ("" : "+r" (y) : "{0}" (x)); /* { dg-error "multiple inputs to hard register" } */
+  __asm__ ("" : "=r" (y) : "0" (y), "{0}" (x)); /* { dg-error "multiple inputs to hard register" } */
+
+  return x + y;
+}
diff --git a/gcc/testsuite/gcc.dg/asm-hard-reg-demotion.h b/gcc/testsuite/gcc.dg/asm-hard-reg-demotion.h
new file mode 100644
index 00000000000..6d72f622ce9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asm-hard-reg-demotion.h
@@ -0,0 +1,52 @@ 
+/* Pass parameter x in the first general argument register to the assembler
+   instruction.
+
+   In function bar we fail to do so because after the function call to foo,
+   variable argreg1 does not contain the value of x but rather 42 which got
+   passed to foo.  Thus, the function always returns 42.  In contrast in
+   function baz, variable x is saved over the function call and materializes in
+   the asm statement and therefore is returned.  */
+
+#if defined (__aarch64__)
+# define REG register int argreg1 __asm__ ("x0") = x;
+# define MOVE1 __asm__ ("mov\t%0,%1" : "=r" (out) : "r" (argreg1));
+# define MOVE2 __asm__ ("mov\t%0,%1" : "=r" (out) : "{x0}" (x));
+#elif defined (__powerpc__) || defined (__POWERPC__)
+# define REG register int argreg1 __asm__ ("r3") = x;
+# define MOVE1 __asm__ ("mr\t%0,%1" : "=r" (out) : "r" (argreg1));
+# define MOVE2 __asm__ ("mr\t%0,%1" : "=r" (out) : "{r3}" (x));
+#elif defined (__riscv)
+# define REG register int argreg1 __asm__ ("a0") = x;
+# define MOVE1 __asm__ ("mv\t%0,%1" : "=r" (out) : "r" (argreg1));
+# define MOVE2 __asm__ ("mv\t%0,%1" : "=r" (out) : "{a0}" (x));
+#elif defined (__s390__)
+# define REG register int argreg1 __asm__ ("r2") = x;
+# define MOVE1 __asm__ ("lr\t%0,%1" : "=r" (out) : "r" (argreg1));
+# define MOVE2 __asm__ ("lr\t%0,%1" : "=r" (out) : "{r2}" (x));
+#elif defined (__x86_64__)
+# define REG register int argreg1 __asm__ ("edi") = x;
+# define MOVE1 __asm__ ("mov\t%1,%0" : "=r" (out) : "r" (argreg1));
+# define MOVE2 __asm__ ("mov\t%1,%0" : "=r" (out) : "{edi}" (x));
+#endif
+
+__attribute__ ((noipa))
+int foo (int unused) { }
+
+int
+bar (int x)
+{
+  int out;
+  REG
+  foo (42);
+  MOVE1
+  return out;
+}
+
+int
+baz (int x)
+{
+  int out;
+  foo (42);
+  MOVE2
+  return out;
+}