s390: Change SET rtx_cost handling.

Message ID 3eed88a1-3a38-068e-fdff-87fbebabe049@linux.ibm.com
State New
Headers
Series s390: Change SET rtx_cost handling. |

Commit Message

Robin Dapp Feb. 25, 2022, 11:38 a.m. UTC
  Hi,

the IF_THEN_ELSE detection currently prevents us from properly costing
register-register moves which causes the lower-subreg pass to assume
that a VR-VR move is as expensive as two GPR-GPR moves.

This patch adds handling for SETs containing REGs as well as MEMs and is
inspired by the aarch64 implementation.

Bootstrapped and regtested on z900 up to z15. Is it OK?

Regards
 Robin

--

gcc/ChangeLog:

	* config/s390/s390.cc (s390_address_cost): Declare.
       	(s390_hard_regno_nregs): Declare.
       	(s390_rtx_costs): Add handling for REG and MEM in SET.

gcc/testsuite/ChangeLog:

       	* gcc.target/s390/vector/vec-sum-across-no-lower-subreg-1.c: New
test.


From 8c4c6f029dbf0c9db12c2877189a5ec0ce0a9c89 Mon Sep 17 00:00:00 2001
From: Robin Dapp <rdapp@linux.ibm.com>
Date: Thu, 3 Feb 2022 12:50:04 +0100
Subject: [PATCH] s390: Change SET rtx_cost handling.

The IF_THEN_ELSE detection currently prevents us from properly costing
register-register moves which causes the lower-subreg pass to assume that
a VR-VR move is as expensive as two GPR-GPR moves.

This patch adds handling for SETs containing REGs as well as MEMs and is
inspired by the aarch64 implementation.

gcc/ChangeLog:

	* config/s390/s390.cc (s390_address_cost): Declare.
       	(s390_hard_regno_nregs): Declare.
       	(s390_rtx_costs): Add handling for REG and MEM in SET.

gcc/testsuite/ChangeLog:

       	* gcc.target/s390/vector/vec-sum-across-no-lower-subreg-1.c: New
test.
---
 gcc/config/s390/s390.cc                       | 140 +++++++++++++-----
 .../vector/vec-sum-across-no-lower-subreg-1.c |  17 +++
 2 files changed, 118 insertions(+), 39 deletions(-)
 create mode 100644
gcc/testsuite/gcc.target/s390/vector/vec-sum-across-no-lower-subreg-1.c

+++
b/gcc/testsuitgcc.target/s390/vector/vec-sum-across-no-lower-subreg-1.ce/gcc.target/s390/vector/vec-sum-across-no-lower-subreg-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -mzvector -march=z15 -fdump-rtl-subreg1" } */
+
+/* { dg-final { scan-rtl-dump-times "Skipping mode V2DI for copy
lowering" 2 "subreg1" } } */
+
+#include <vecintrin.h>
+
+#define STYPE long long
+#define VTYPE __attribute__((vector_size(16))) STYPE
+
+STYPE foo1 (VTYPE a)
+{
+  /* { dg-final { scan-assembler-not "vst\t.*" } } */
+  /* { dg-final { scan-assembler-not "lg\t.*" } } */
+  /* { dg-final { scan-assembler-not "lgr\t.*" } } */
+  return a[0] + a[1];
+}
  

Comments

Andreas Krebbel Feb. 25, 2022, 5:50 p.m. UTC | #1
On 2/25/22 12:38, Robin Dapp wrote:
> Hi,
> 
> the IF_THEN_ELSE detection currently prevents us from properly costing
> register-register moves which causes the lower-subreg pass to assume
> that a VR-VR move is as expensive as two GPR-GPR moves.
> 
> This patch adds handling for SETs containing REGs as well as MEMs and is
> inspired by the aarch64 implementation.
> 
> Bootstrapped and regtested on z900 up to z15. Is it OK?
> 
> Regards
>  Robin
> 
> --
> 
> gcc/ChangeLog:
> 
> 	* config/s390/s390.cc (s390_address_cost): Declare.
>        	(s390_hard_regno_nregs): Declare.
>        	(s390_rtx_costs): Add handling for REG and MEM in SET.
> 
> gcc/testsuite/ChangeLog:
> 
>        	* gcc.target/s390/vector/vec-sum-across-no-lower-subreg-1.c: New
> test.

Ok. Thanks

Andreas
  

Patch

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index d2af6d8813d..e647c90ab29 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -429,6 +429,14 @@  struct s390_address
    bytes on a z10 (or higher) CPU.  */
 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)

+static int
+s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
+		   addr_space_t as ATTRIBUTE_UNUSED,
+		   bool speed ATTRIBUTE_UNUSED);
+
+static unsigned int
+s390_hard_regno_nregs (unsigned int regno, machine_mode mode);
+
 /* Masks per jump target register indicating which thunk need to be
    generated.  */
 static GTY(()) int indirect_branch_prez10thunk_mask = 0;
@@ -3619,50 +3627,104 @@  s390_rtx_costs (rtx x, machine_mode mode, int
outer_code,
     case MEM:
       *total = 0;
       return true;
-
     case SET:
-      {
-	/* Without this a conditional move instruction would be
-	   accounted as 3 * COSTS_N_INSNS (set, if_then_else,
-	   comparison operator).  That's a bit pessimistic.  */
+	{
+	  rtx dest = SET_DEST (x);
+	  rtx src = SET_SRC (x);

-	if (!TARGET_Z196 || GET_CODE (SET_SRC (x)) != IF_THEN_ELSE)
-	  return false;
+	  switch (GET_CODE (src))
+	    {
+	    case IF_THEN_ELSE:
+		{
+		  /* Without this a conditional move instruction would be
+		     accounted as 3 * COSTS_N_INSNS (set, if_then_else,
+		     comparison operator).  That's a bit pessimistic.  */
+
+		  if (!TARGET_Z196)
+		    return false;
+
+		  rtx cond = XEXP (src, 0);
+		  if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1)))
+		    return false;
+
+		  /* It is going to be a load/store on condition.  Make it
+		     slightly more expensive than a normal load.  */
+		  *total = COSTS_N_INSNS (1) + 2;
+
+		  rtx dst = SET_DEST (src);
+		  rtx then = XEXP (src, 1);
+		  rtx els = XEXP (src, 2);
+
+		  /* It is a real IF-THEN-ELSE.  An additional move will be
+		     needed to implement that.  */
+		  if (!TARGET_Z15
+		      && reload_completed
+		      && !rtx_equal_p (dst, then)
+		      && !rtx_equal_p (dst, els))
+		    *total += COSTS_N_INSNS (1) / 2;
+
+		  /* A minor penalty for constants we cannot directly handle.  */
+		  if ((CONST_INT_P (then) || CONST_INT_P (els))
+		      && (!TARGET_Z13 || MEM_P (dst)
+			  || (CONST_INT_P (then) && !satisfies_constraint_K (then))
+			  || (CONST_INT_P (els) && !satisfies_constraint_K (els))))
+		    *total += COSTS_N_INSNS (1) / 2;
+
+		  /* A store on condition can only handle register src operands.  */
+		  if (MEM_P (dst) && (!REG_P (then) || !REG_P (els)))
+		    *total += COSTS_N_INSNS (1) / 2;
+
+		  return true;
+		}
+	    default:
+	      break;
+	    }

-	rtx cond = XEXP (SET_SRC (x), 0);
+	  switch (GET_CODE (dest))
+	    {
+	    case SUBREG:
+	      if (!REG_P (SUBREG_REG (dest)))
+		*total += rtx_cost (SUBREG_REG (src), VOIDmode, SET, 0, speed);
+	      /* fallthrough */
+	    case REG:
+	      /* If this is a VR -> VR copy, count the number of
+		 registers.  */
+	      if (VECTOR_MODE_P (GET_MODE (dest)) && REG_P (src))
+		{
+		  int nregs = s390_hard_regno_nregs (VR0_REGNUM, GET_MODE
+						     (dest));
+		  *total = COSTS_N_INSNS (nregs);
+		}
+	      /* Same for GPRs.  */
+	      else if (REG_P (src))
+		{
+		  int nregs = s390_hard_regno_nregs (GPR0_REGNUM, GET_MODE
+						     (dest));
+		  *total = COSTS_N_INSNS (nregs);
+		}
+	      else
+		/* Otherwise just cost the src.  */
+		*total += rtx_cost (src, mode, SET, 1, speed);
+	      return true;
+	    case MEM:
+		{
+		  rtx address = XEXP (dest, 0);
+		  rtx tmp;
+		  long tmp2;
+		  if (s390_loadrelative_operand_p (address, &tmp, &tmp2))
+		    *total = COSTS_N_INSNS (1);
+		  else
+		    *total = s390_address_cost (address, mode, 0, speed);
+		  return true;
+		}
+	    default:
+	      /* Not handled for now, assume default costs.  */
+	      *total = COSTS_N_INSNS (1);
+	      return false;
+	    }

-	if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1)))
 	  return false;
-
-	/* It is going to be a load/store on condition.  Make it
-	   slightly more expensive than a normal load.  */
-	*total = COSTS_N_INSNS (1) + 2;
-
-	rtx dst = SET_DEST (x);
-	rtx then = XEXP (SET_SRC (x), 1);
-	rtx els = XEXP (SET_SRC (x), 2);
-
-	/* It is a real IF-THEN-ELSE.  An additional move will be
-	   needed to implement that.  */
-	if (!TARGET_Z15
-	    && reload_completed
-	    && !rtx_equal_p (dst, then)
-	    && !rtx_equal_p (dst, els))
-	  *total += COSTS_N_INSNS (1) / 2;
-
-	/* A minor penalty for constants we cannot directly handle.  */
-	if ((CONST_INT_P (then) || CONST_INT_P (els))
-	    && (!TARGET_Z13 || MEM_P (dst)
-		|| (CONST_INT_P (then) && !satisfies_constraint_K (then))
-		|| (CONST_INT_P (els) && !satisfies_constraint_K (els))))
-	  *total += COSTS_N_INSNS (1) / 2;
-
-	/* A store on condition can only handle register src operands.  */
-	if (MEM_P (dst) && (!REG_P (then) || !REG_P (els)))
-	  *total += COSTS_N_INSNS (1) / 2;
-
-	return true;
-      }
+	}
     case IOR:

       /* nnrk, nngrk */
diff --git
a/gcc/testsuite/gcc.target/s390/vector/vec-sum-across-no-lower-subreg-1.c
b/gcc/testsuite/gcc.target/s390/vector/vec-sum-across-no-lower-subreg-1.c
new file mode 100644
index 00000000000..17870f397ed
--- /dev/null