[16/16] aarch64: Update sibcall handling for SME

Message ID mptr0y7mari.fsf@arm.com
State New
Headers
Series aarch64: Add support for SME |

Commit Message

Richard Sandiford Nov. 13, 2022, 10:03 a.m. UTC
  We only support tail calls between functions with the same PSTATE.ZA
setting ("private-ZA" to "private-ZA" and "shared-ZA" to "shared-ZA").

Only a normal non-streaming function can tail-call another non-streaming
function, and only a streaming function can tail-call another streaming
function.  Any function can tail-call a streaming-compatible function.

gcc/
	* config/aarch64/aarch64.cc (aarch64_function_ok_for_sibcall):
	Enforce PSTATE.SM and PSTATE.ZA restrictions.
	(aarch64_expand_epilogue): Save and restore the arguments
	to a sibcall around any change to PSTATE.SM.

gcc/testsuite/
	* gcc.target/aarch64/sme/locally_streaming_4.c: New test.
	* gcc.target/aarch64/sme/sibcall_1.c: Likewise.
	* gcc.target/aarch64/sme/sibcall_2.c: Likewise.
	* gcc.target/aarch64/sme/sibcall_3.c: Likewise.
	* gcc.target/aarch64/sme/sibcall_4.c: Likewise.
	* gcc.target/aarch64/sme/sibcall_5.c: Likewise.
	* gcc.target/aarch64/sme/sibcall_6.c: Likewise.
	* gcc.target/aarch64/sme/sibcall_7.c: Likewise.
	* gcc.target/aarch64/sme/sibcall_8.c: Likewise.
---
 gcc/config/aarch64/aarch64.cc                 |   9 +-
 .../aarch64/sme/locally_streaming_4.c         | 129 ++++++++++++++++++
 .../gcc.target/aarch64/sme/sibcall_1.c        |  45 ++++++
 .../gcc.target/aarch64/sme/sibcall_2.c        |  45 ++++++
 .../gcc.target/aarch64/sme/sibcall_3.c        |  45 ++++++
 .../gcc.target/aarch64/sme/sibcall_4.c        |  45 ++++++
 .../gcc.target/aarch64/sme/sibcall_5.c        |  45 ++++++
 .../gcc.target/aarch64/sme/sibcall_6.c        |  26 ++++
 .../gcc.target/aarch64/sme/sibcall_7.c        |  26 ++++
 .../gcc.target/aarch64/sme/sibcall_8.c        |  19 +++
 10 files changed, 433 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_5.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_6.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_7.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_8.c
  

Patch

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 9a4a469a078..0d4c20f5c6a 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8110,6 +8110,11 @@  aarch64_function_ok_for_sibcall (tree, tree exp)
   if (crtl->abi->id () != expr_callee_abi (exp).id ())
     return false;
 
+  tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
+  if (aarch64_fntype_sm_state (fntype) & ~aarch64_cfun_incoming_sm_state ())
+    return false;
+  if (aarch64_fntype_za_state (fntype) != aarch64_cfun_incoming_za_state ())
+    return false;
   return true;
 }
 
@@ -11236,7 +11241,9 @@  aarch64_expand_epilogue (rtx_call_insn *sibcall)
 	guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM,
 						      aarch64_isa_flags);
       aarch64_sme_mode_switch_regs args_switch;
-      if (crtl->return_rtx && REG_P (crtl->return_rtx))
+      if (sibcall)
+	args_switch.add_call_args (sibcall);
+      else if (crtl->return_rtx && REG_P (crtl->return_rtx))
 	args_switch.add_reg (GET_MODE (crtl->return_rtx),
 			     REGNO (crtl->return_rtx));
       args_switch.emit_prologue ();
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_4.c b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_4.c
new file mode 100644
index 00000000000..b0e4759ed11
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_4.c
@@ -0,0 +1,129 @@ 
+// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" }
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_neon.h>
+#include <arm_sve.h>
+
+/*
+** test_d0:
+**	...
+**	fmov	x10, d0
+**	smstop	sm
+**	fmov	d0, x10
+**	...
+*/
+void consume_d0 (double d0);
+
+void __attribute__((arm_locally_streaming))
+test_d0 ()
+{
+  consume_d0 (1.0);
+}
+
+/*
+** test_d7:
+**	...
+**	fmov	x10, d0
+**	fmov	x11, d1
+**	fmov	x12, d2
+**	fmov	x13, d3
+**	fmov	x14, d4
+**	fmov	x15, d5
+**	fmov	x16, d6
+**	fmov	x17, d7
+**	smstop	sm
+**	fmov	d0, x10
+**	fmov	d1, x11
+**	fmov	d2, x12
+**	fmov	d3, x13
+**	fmov	d4, x14
+**	fmov	d5, x15
+**	fmov	d6, x16
+**	fmov	d7, x17
+**	...
+*/
+void consume_d7 (double d0, double d1, double d2, double d3,
+		 double d4, double d5, double d6, double d7);
+void __attribute__((arm_locally_streaming))
+test_d7 ()
+{
+  consume_d7 (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+}
+
+/*
+** test_q7:
+**	...
+**	stp	q0, q1, \[sp, #?-128\]!
+**	stp	q2, q3, \[sp, #?32\]
+**	stp	q4, q5, \[sp, #?64\]
+**	stp	q6, q7, \[sp, #?96\]
+**	smstop	sm
+**	ldp	q2, q3, \[sp, #?32\]
+**	ldp	q4, q5, \[sp, #?64\]
+**	ldp	q6, q7, \[sp, #?96\]
+**	ldp	q0, q1, \[sp\], #?128
+**	...
+*/
+void consume_q7 (int8x16x4_t q0, int8x16x4_t q4);
+
+void __attribute__((arm_locally_streaming))
+test_q7 (int8x16x4_t *ptr)
+{
+  consume_q7 (ptr[0], ptr[1]);
+}
+
+/*
+** test_z7:
+**	...
+**	addvl	sp, sp, #-8
+**	str	z0, \[sp\]
+**	str	z1, \[sp, #1, mul vl\]
+**	str	z2, \[sp, #2, mul vl\]
+**	str	z3, \[sp, #3, mul vl\]
+**	str	z4, \[sp, #4, mul vl\]
+**	str	z5, \[sp, #5, mul vl\]
+**	str	z6, \[sp, #6, mul vl\]
+**	str	z7, \[sp, #7, mul vl\]
+**	smstop	sm
+**	ldr	z0, \[sp\]
+**	ldr	z1, \[sp, #1, mul vl\]
+**	ldr	z2, \[sp, #2, mul vl\]
+**	ldr	z3, \[sp, #3, mul vl\]
+**	ldr	z4, \[sp, #4, mul vl\]
+**	ldr	z5, \[sp, #5, mul vl\]
+**	ldr	z6, \[sp, #6, mul vl\]
+**	ldr	z7, \[sp, #7, mul vl\]
+**	addvl	sp, sp, #8
+**	...
+*/
+void consume_z7 (svint8x4_t z0, svint8x4_t z4);
+
+void __attribute__((arm_locally_streaming))
+test_z7 (svint8x4_t *ptr1, svint8x4_t *ptr2)
+{
+  consume_z7 (*ptr1, *ptr2);
+}
+
+/*
+** test_p3:
+**	...
+**	addvl	sp, sp, #-1
+**	str	p0, \[sp\]
+**	str	p1, \[sp, #1, mul vl\]
+**	str	p2, \[sp, #2, mul vl\]
+**	str	p3, \[sp, #3, mul vl\]
+**	smstop	sm
+**	ldr	p0, \[sp\]
+**	ldr	p1, \[sp, #1, mul vl\]
+**	ldr	p2, \[sp, #2, mul vl\]
+**	ldr	p3, \[sp, #3, mul vl\]
+**	addvl	sp, sp, #1
+**	...
+*/
+void consume_p3 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3);
+
+void __attribute__((arm_locally_streaming))
+test_p3 (svbool_t *ptr1, svbool_t *ptr2, svbool_t *ptr3, svbool_t *ptr4)
+{
+  consume_p3 (*ptr1, *ptr2, *ptr3, *ptr4);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_1.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_1.c
new file mode 100644
index 00000000000..0b0f4191a60
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_1.c
@@ -0,0 +1,45 @@ 
+/* { dg-options "-O2" } */
+
+void __attribute__((arm_streaming_compatible)) sc_callee ();
+void __attribute__((arm_streaming)) s_callee ();
+void n_callee ();
+
+void __attribute__((noipa, arm_streaming_compatible, arm_locally_streaming))
+sc_ls_callee () {}
+void __attribute__((noipa, arm_locally_streaming))
+n_ls_callee () {}
+
+void __attribute__((arm_streaming_compatible))
+sc_to_sc ()
+{
+  sc_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tsc_callee} } } */
+
+void __attribute__((arm_streaming_compatible))
+sc_to_s ()
+{
+  s_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\ts_callee} } } */
+
+void __attribute__((arm_streaming_compatible))
+sc_to_n ()
+{
+  n_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\tn_callee} } } */
+
+void __attribute__((arm_streaming_compatible))
+sc_to_sc_ls ()
+{
+  sc_ls_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */
+
+void __attribute__((arm_streaming_compatible))
+sc_to_n_ls ()
+{
+  n_ls_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\tn_ls_callee} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_2.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_2.c
new file mode 100644
index 00000000000..95af22dd29d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_2.c
@@ -0,0 +1,45 @@ 
+/* { dg-options "-O2" } */
+
+void __attribute__((arm_streaming_compatible)) sc_callee ();
+void __attribute__((arm_streaming)) s_callee ();
+void n_callee ();
+
+void __attribute__((noipa, arm_streaming_compatible, arm_locally_streaming))
+sc_ls_callee () {}
+void __attribute__((noipa, arm_locally_streaming))
+n_ls_callee () {}
+
+void __attribute__((arm_streaming))
+s_to_sc ()
+{
+  sc_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tsc_callee} } } */
+
+void __attribute__((arm_streaming))
+s_to_s ()
+{
+  s_callee ();
+}
+/* { dg-final { scan-assembler {\tb\ts_callee} } } */
+
+void __attribute__((arm_streaming))
+s_to_n ()
+{
+  n_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\tn_callee} } } */
+
+void __attribute__((arm_streaming))
+s_to_sc_ls ()
+{
+  sc_ls_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */
+
+void __attribute__((arm_streaming))
+s_to_n_ls ()
+{
+  n_ls_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\tn_ls_callee} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_3.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_3.c
new file mode 100644
index 00000000000..5221f925567
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_3.c
@@ -0,0 +1,45 @@ 
+/* { dg-options "-O2" } */
+
+void __attribute__((arm_streaming_compatible)) sc_callee ();
+void __attribute__((arm_streaming)) s_callee ();
+void n_callee ();
+
+void __attribute__((noipa, arm_streaming_compatible, arm_locally_streaming))
+sc_ls_callee () {}
+void __attribute__((noipa, arm_locally_streaming))
+n_ls_callee () {}
+
+void
+n_to_sc ()
+{
+  sc_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tsc_callee} } } */
+
+void
+n_to_s ()
+{
+  s_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\ts_callee} } } */
+
+void
+n_to_n ()
+{
+  n_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tn_callee} } } */
+
+void
+n_to_sc_ls ()
+{
+  sc_ls_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */
+
+void
+n_to_n_ls ()
+{
+  n_ls_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tn_ls_callee} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_4.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_4.c
new file mode 100644
index 00000000000..21b6a66a1b2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_4.c
@@ -0,0 +1,45 @@ 
+/* { dg-options "-O2" } */
+
+void __attribute__((arm_streaming_compatible)) sc_callee ();
+void __attribute__((arm_streaming)) s_callee ();
+void n_callee ();
+
+void __attribute__((noipa, arm_streaming_compatible, arm_locally_streaming))
+sc_ls_callee () {}
+void __attribute__((noipa, arm_locally_streaming))
+n_ls_callee () {}
+
+void __attribute__((arm_streaming_compatible, arm_locally_streaming))
+sc_to_sc ()
+{
+  sc_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tsc_callee} } } */
+
+void __attribute__((arm_streaming_compatible, arm_locally_streaming))
+sc_to_s ()
+{
+  s_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\ts_callee} } } */
+
+void __attribute__((arm_streaming_compatible, arm_locally_streaming))
+sc_to_n ()
+{
+  n_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\tn_callee} } } */
+
+void __attribute__((arm_streaming_compatible, arm_locally_streaming))
+sc_to_sc_ls ()
+{
+  sc_ls_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */
+
+void __attribute__((arm_streaming_compatible, arm_locally_streaming))
+sc_to_n_ls ()
+{
+  n_ls_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\tn_ls_callee} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_5.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_5.c
new file mode 100644
index 00000000000..736797a476c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_5.c
@@ -0,0 +1,45 @@ 
+/* { dg-options "-O2" } */
+
+void __attribute__((arm_streaming_compatible)) sc_callee ();
+void __attribute__((arm_streaming)) s_callee ();
+void n_callee ();
+
+void __attribute__((noipa, arm_streaming_compatible, arm_locally_streaming))
+sc_ls_callee () {}
+void __attribute__((noipa, arm_locally_streaming))
+n_ls_callee () {}
+
+void __attribute__((arm_locally_streaming))
+n_to_sc ()
+{
+  sc_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tsc_callee} } } */
+
+void __attribute__((arm_locally_streaming))
+n_to_s ()
+{
+  s_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\ts_callee} } } */
+
+void __attribute__((arm_locally_streaming))
+n_to_n ()
+{
+  n_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tn_callee} } } */
+
+void __attribute__((arm_locally_streaming))
+n_to_sc_ls ()
+{
+  sc_ls_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */
+
+void __attribute__((arm_locally_streaming))
+n_to_n_ls ()
+{
+  n_ls_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tn_ls_callee} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_6.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_6.c
new file mode 100644
index 00000000000..b2f321b7c8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_6.c
@@ -0,0 +1,26 @@ 
+/* { dg-options "-O2" } */
+
+void __attribute__((arm_shared_za)) shared_callee ();
+void __attribute__((noipa, arm_new_za)) new_callee () {}
+void normal_callee ();
+
+void __attribute__((arm_shared_za))
+shared_to_shared ()
+{
+  shared_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tshared_callee} } } */
+
+void __attribute__((arm_shared_za))
+shared_to_new ()
+{
+  new_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\tnew_callee} } } */
+
+void __attribute__((arm_shared_za))
+shared_to_normal ()
+{
+  normal_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\tnormal_callee} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_7.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_7.c
new file mode 100644
index 00000000000..a096cf591b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_7.c
@@ -0,0 +1,26 @@ 
+/* { dg-options "-O2" } */
+
+void __attribute__((arm_shared_za)) shared_callee ();
+void __attribute__((noipa, arm_new_za)) new_callee () {}
+void normal_callee ();
+
+void __attribute__((arm_new_za))
+new_to_shared ()
+{
+  shared_callee ();
+}
+/* { dg-final { scan-assembler {\tbl\tshared_callee} } } */
+
+void __attribute__((arm_new_za))
+new_to_new ()
+{
+  new_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tnew_callee} } } */
+
+void __attribute__((arm_new_za))
+new_to_normal ()
+{
+  normal_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tnormal_callee} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_8.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_8.c
new file mode 100644
index 00000000000..2553c10045a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_8.c
@@ -0,0 +1,19 @@ 
+/* { dg-options "-O2" } */
+
+void __attribute__((arm_shared_za)) shared_callee ();
+void __attribute__((noipa, arm_new_za)) new_callee () {}
+void normal_callee ();
+
+void
+normal_to_new ()
+{
+  new_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tnew_callee} } } */
+
+void
+normal_to_normal ()
+{
+  normal_callee ();
+}
+/* { dg-final { scan-assembler {\tb\tnormal_callee} } } */