[committed] amdgcn: Vector procedure call ABI

Message ID faefd8b0-14c2-d35a-805d-23ad877b4abb@codesourcery.com
State Committed
Commit 4e1914625dec4aa09a5671c6294e877dbf4518f5
Headers
Series [committed] amdgcn: Vector procedure call ABI |

Commit Message

Andrew Stubbs Aug. 9, 2022, 12:39 p.m. UTC
  I've committed this patch for amdgcn.

This changes the procedure calling ABI such that vector arguments are 
passed in vector registers, rather than on the stack as before.

The ABI for scalar functions is the same for arguments, but the return 
value has now moved to a vector register; keeping it the same for all 
types simplifies the compiler implementation. If a significant down-side 
is found then we can move to having multiple return locations, and worry 
about how to fix the "untyped" calls then.

There's no "standard ABI" for this target, and there are no third party 
binaries with which to retain compatibility, so we're free to make 
whatever changes we wish.

Andrew
amdgcn: Vector procedure call ABI

Adjust the (unofficial) procedure calling ABI such that vector arguments are
passed in vector registers, not on the stack.  Scalar arguments continue to
be passed in scalar registers, making a total of 12 argument registers.

The return value is also moved to a vector register (even for scalars; it
would be possible to retain the scalar location, using untyped_call, but
there's no obvious advantage in doing so).

After this change the ABI is as follows:

s0-s13  : Reserved for kernel launch parameters.
s14-s15 : Frame pointer.
s16-s17 : Stack pointer.
s18-s19 : Link register.
s20-s21 : Exec Save.
s22-s23 : CC Save.
s24-s25 : Scalar arguments.          NO LONGER RETURN VALUE.
s26-s29 : Additional scalar arguments (makes 6 total).
s30-s31 : Static Chain.
v0      : Prologue/epilogue scratch.
v1      : Constant 0, 1, 2, 3, 4, ... 63.
v2-v7   : Prologue/epilogue scratch.
v8-v9   : Return value & vector arguments.              NEW.
v10-v13 : Additional vector arguments (makes 6 total).  NEW.

gcc/ChangeLog:

	* config/gcn/gcn.cc (gcn_function_value): Allow vector return values.
	(num_arg_regs): Allow vector arguments.
	(gcn_function_arg): Likewise.
	(gcn_function_arg_advance): Likewise.
	(gcn_arg_partial_bytes): Likewise.
	(gcn_return_in_memory): Likewise.
	(gcn_expand_epilogue): Get return value from v8.
	* config/gcn/gcn.h (RETURN_VALUE_REG): Set to v8.
	(FIRST_PARM_REG): USE FIRST_SGPR_REG for clarity.
	(FIRST_VPARM_REG): New.
	(FUNCTION_ARG_REGNO_P): Allow vector parameters.
	(struct gcn_args): Add vnum field.
	(LIBCALL_VALUE): All vector return values.
	* config/gcn/gcn.md (gcn_call_value): Add vector constraints.
	(gcn_call_value_indirect): Likewise.
  

Patch

diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 6fc20d3f659..96295e23aad 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -2284,7 +2284,7 @@  gcn_function_value (const_tree valtype, const_tree, bool)
       && GET_MODE_SIZE (mode) < 4)
     mode = SImode;
 
-  return gen_rtx_REG (mode, SGPR_REGNO (RETURN_VALUE_REG));
+  return gen_rtx_REG (mode, RETURN_VALUE_REG);
 }
 
 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.
@@ -2308,7 +2308,9 @@  num_arg_regs (const function_arg_info &arg)
     return 0;
 
   int size = arg.promoted_size_in_bytes ();
-  return (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+  int regsize = UNITS_PER_WORD * (VECTOR_MODE_P (arg.mode)
+				  ? GET_MODE_NUNITS (arg.mode) : 1);
+  return (size + regsize - 1) / regsize;
 }
 
 /* Implement TARGET_STRICT_ARGUMENT_NAMING.
@@ -2358,16 +2360,16 @@  gcn_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
       if (targetm.calls.must_pass_in_stack (arg))
 	return 0;
 
-      /* Vector parameters are not supported yet.  */
-      if (VECTOR_MODE_P (arg.mode))
-	return 0;
-
-      int reg_num = FIRST_PARM_REG + cum->num;
+      int first_reg = (VECTOR_MODE_P (arg.mode)
+		       ? FIRST_VPARM_REG : FIRST_PARM_REG);
+      int cum_num = (VECTOR_MODE_P (arg.mode)
+		     ? cum->vnum : cum->num);
+      int reg_num = first_reg + cum_num;
       int num_regs = num_arg_regs (arg);
       if (num_regs > 0)
 	while (reg_num % num_regs != 0)
 	  reg_num++;
-      if (reg_num + num_regs <= FIRST_PARM_REG + NUM_PARM_REGS)
+      if (reg_num + num_regs <= first_reg + NUM_PARM_REGS)
 	return gen_rtx_REG (arg.mode, reg_num);
     }
   else
@@ -2419,11 +2421,15 @@  gcn_function_arg_advance (cumulative_args_t cum_v,
       if (!arg.named)
 	return;
 
+      int first_reg = (VECTOR_MODE_P (arg.mode)
+		       ? FIRST_VPARM_REG : FIRST_PARM_REG);
+      int *cum_num = (VECTOR_MODE_P (arg.mode)
+		      ? &cum->vnum : &cum->num);
       int num_regs = num_arg_regs (arg);
       if (num_regs > 0)
-	while ((FIRST_PARM_REG + cum->num) % num_regs != 0)
-	  cum->num++;
-      cum->num += num_regs;
+	while ((first_reg + *cum_num) % num_regs != 0)
+	  (*cum_num)++;
+      *cum_num += num_regs;
     }
   else
     {
@@ -2454,14 +2460,18 @@  gcn_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
   if (targetm.calls.must_pass_in_stack (arg))
     return 0;
 
-  if (cum->num >= NUM_PARM_REGS)
+  int cum_num = (VECTOR_MODE_P (arg.mode) ? cum->vnum : cum->num);
+  int regsize = UNITS_PER_WORD * (VECTOR_MODE_P (arg.mode)
+				  ? GET_MODE_NUNITS (arg.mode) : 1);
+
+  if (cum_num >= NUM_PARM_REGS)
     return 0;
 
   /* If the argument fits entirely in registers, return 0.  */
-  if (cum->num + num_arg_regs (arg) <= NUM_PARM_REGS)
+  if (cum_num + num_arg_regs (arg) <= NUM_PARM_REGS)
     return 0;
 
-  return (NUM_PARM_REGS - cum->num) * UNITS_PER_WORD;
+  return (NUM_PARM_REGS - cum_num) * regsize;
 }
 
 /* A normal function which takes a pointer argument (to a scalar) may be
@@ -2549,14 +2559,11 @@  gcn_return_in_memory (const_tree type, const_tree ARG_UNUSED (fntype))
   if (AGGREGATE_TYPE_P (type))
     return true;
 
-  /* Vector return values are not supported yet.  */
-  if (VECTOR_TYPE_P (type))
-    return true;
-
   if (mode == BLKmode)
     return true;
 
-  if (size > 2 * UNITS_PER_WORD)
+  if ((!VECTOR_TYPE_P (type) && size > 2 * UNITS_PER_WORD)
+      || size > 2 * UNITS_PER_WORD * 64)
     return true;
 
   return false;
@@ -3199,9 +3206,10 @@  gcn_expand_epilogue (void)
       emit_move_insn (kernarg_reg, retptr_mem);
 
       rtx retval_mem = gen_rtx_MEM (SImode, kernarg_reg);
+      rtx scalar_retval = gen_rtx_REG (SImode, FIRST_PARM_REG);
       set_mem_addr_space (retval_mem, ADDR_SPACE_SCALAR_FLAT);
-      emit_move_insn (retval_mem,
-		      gen_rtx_REG (SImode, SGPR_REGNO (RETURN_VALUE_REG)));
+      emit_move_insn (scalar_retval, gen_rtx_REG (SImode, RETURN_VALUE_REG));
+      emit_move_insn (retval_mem, scalar_retval);
     }
 
   emit_jump_insn (gen_gcn_return ());
diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h
index a1297605047..318256c4a7a 100644
--- a/gcc/config/gcn/gcn.h
+++ b/gcc/config/gcn/gcn.h
@@ -138,7 +138,7 @@ 
 #define LINK_REGNUM		  18
 #define EXEC_SAVE_REG		  20
 #define CC_SAVE_REG		  22
-#define RETURN_VALUE_REG	  24	/* Must be divisible by 4.  */
+#define RETURN_VALUE_REG	  168	/* Must be divisible by 4.  */
 #define STATIC_CHAIN_REGNUM	  30
 #define WORK_ITEM_ID_Z_REG	  162
 #define SOFT_ARG_REG		  416
@@ -146,7 +146,8 @@ 
 #define DWARF_LINK_REGISTER	  420
 #define FIRST_PSEUDO_REGISTER	  421
 
-#define FIRST_PARM_REG 24
+#define FIRST_PARM_REG (FIRST_SGPR_REG + 24)
+#define FIRST_VPARM_REG (FIRST_VGPR_REG + 8)
 #define NUM_PARM_REGS  6
 
 /* There is no arg pointer.  Just choose random fixed register that does
@@ -164,7 +165,8 @@ 
 #define CC_REG_P(X)		(REG_P (X) && CC_REGNO_P (REGNO (X)))
 #define CC_REGNO_P(X)		((X) == SCC_REG || (X) == VCC_REG)
 #define FUNCTION_ARG_REGNO_P(N) \
-  ((N) >= FIRST_PARM_REG && (N) < (FIRST_PARM_REG + NUM_PARM_REGS))
+  (((N) >= FIRST_PARM_REG && (N) < (FIRST_PARM_REG + NUM_PARM_REGS)) \
+   || ((N) >= FIRST_VPARM_REG && (N) < (FIRST_VPARM_REG + NUM_PARM_REGS)))
 
 
 #define FIXED_REGISTERS {			    \
@@ -550,6 +552,7 @@  typedef struct gcn_args
   tree fntype;
   struct gcn_kernel_args args;
   int num;
+  int vnum;
   int offset;
   int alignment;
 } CUMULATIVE_ARGS;
@@ -653,7 +656,7 @@  enum gcn_builtin_codes
     }
 
 /* This needs to match gcn_function_value.  */
-#define LIBCALL_VALUE(MODE) gen_rtx_REG (MODE, SGPR_REGNO (RETURN_VALUE_REG))
+#define LIBCALL_VALUE(MODE) gen_rtx_REG (MODE, RETURN_VALUE_REG)
 
 /* The s_ff0 and s_flbit instructions return -1 if no input bits are set.  */
 #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = -1, 2)
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index 70a769babc4..7805e867901 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -908,11 +908,11 @@  (define_expand "call_value"
   {})
 
 (define_insn "gcn_call_value"
-  [(set (match_operand 0 "register_operand" "=Sg,Sg")
-	(call (mem (match_operand 1 "immediate_operand" "Y,B"))
+  [(set (match_operand 0 "register_operand"		"=Sgv,Sgv")
+	(call (mem (match_operand 1 "immediate_operand" "   Y,  B"))
 	      (match_operand 2 "const_int_operand")))
    (clobber (reg:DI LR_REGNUM))
-   (clobber (match_scratch:DI 3 "=&Sg,X"))]
+   (clobber (match_scratch:DI 3				"=&Sg,  X"))]
   ""
   "@
   s_getpc_b64\t%3\;s_add_u32\t%L3, %L3, %1@rel32@lo+4\;s_addc_u32\t%H3, %H3, %1@rel32@hi+4\;s_swappc_b64\ts[18:19], %3
@@ -921,11 +921,11 @@  (define_insn "gcn_call_value"
    (set_attr "length" "24")])
 
 (define_insn "gcn_call_value_indirect"
-  [(set (match_operand 0 "register_operand" "=Sg")
-	(call (mem (match_operand:DI 1 "register_operand" "Sg"))
+  [(set (match_operand 0 "register_operand"		  "=Sgv")
+	(call (mem (match_operand:DI 1 "register_operand" "  Sg"))
 	      (match_operand 2 "" "")))
    (clobber (reg:DI LR_REGNUM))
-   (clobber (match_scratch:DI 3 "=X"))]
+   (clobber (match_scratch:DI 3				  "=  X"))]
   ""
   "s_swappc_b64\ts[18:19], %1"
   [(set_attr "type" "sop1")