use subreg for movsf_from_si and remove UNSPEC_SF_FROM_SI

Message ID 20230224083008.1082527-1-guojiufu@linux.ibm.com
State New
Headers
Series use subreg for movsf_from_si and remove UNSPEC_SF_FROM_SI |

Commit Message

Jiufu Guo Feb. 24, 2023, 8:30 a.m. UTC
  Hi,

In patch https://gcc.gnu.org/pipermail/gcc-patches/2023-February/612168.html,
we improved the bictcast from lowpart/highpart of DI to SF by using mtvsrws
or mtvsrd.

As investigating this functionality, we may improve the related code by using
bitcast subreg from SI to SF, and avoid generating UNSPEC_SF_FROM_SI.

We can also improve the cases like "subreg:SI(reg:SF)=reg:SI" which is cast
SI to SF (e.g. pr48335-1.c).

This patch also reduce clobber usage, only adding clobber for p8 where additional
register is required.

This patch pass bootstrap and regtest for ppc64(p7,p8 and p9) and ppc64le(p10,p9).

Is this patch ok for trunk (or maybe stage1)? Thanks for comments and sugguestions!


BR,
Jeff (Jiufu)

gcc/ChangeLog:

	* config/rs6000/predicates.md: Rename TARGET_NO_SF_SUBREG to
	BITCAST_SI_SF_IN_REGS, and rename TARGET_ALLOW_SF_SUBREG to
	BITCAST_SI_SF_IN_MEM.
	* config/rs6000/rs6000.cc (valid_sf_si_move): Likewise.
	(is_lfs_stfs_insn): Split to is_stfs_insn and is_lfs_insn.
	(is_stfs_insn): Split from is_lfs_stfs_insn.
	(is_lfs_insn): Split from is_lfs_stfs_insn.
	(prefixed_load_p): Call is_lfs_insn.
	(prefixed_store_p): Call is_stfs_insn.
	* config/rs6000/rs6000.h (TARGET_NO_SF_SUBREG): Rename to ...
	(BITCAST_SI_SF_IN_REGS): ... this.
	(TARGET_ALLOW_SF_SUBREG): Rename to ...
	(BITCAST_SI_SF_IN_MEM): ... this.
	* config/rs6000/rs6000.md (movsf_from_si_p8): New define_insn.

---
 gcc/config/rs6000/predicates.md | 16 +++---
 gcc/config/rs6000/rs6000.cc     | 36 ++++++++----
 gcc/config/rs6000/rs6000.h      |  4 +-
 gcc/config/rs6000/rs6000.md     | 98 +++++++++++++++++++++------------
 4 files changed, 97 insertions(+), 57 deletions(-)
  

Patch

diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index e57c9d99c6b..4a7d5893126 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -47,7 +47,7 @@  (define_predicate "sf_subreg_operand"
   rtx inner_reg = SUBREG_REG (op);
   machine_mode inner_mode = GET_MODE (inner_reg);
 
-  if (TARGET_ALLOW_SF_SUBREG || !REG_P (inner_reg))
+  if (BITCAST_SI_SF_IN_MEM || !REG_P (inner_reg))
     return 0;
 
   if ((mode == SFmode && GET_MODE_CLASS (inner_mode) == MODE_INT)
@@ -67,7 +67,7 @@  (define_predicate "altivec_register_operand"
 {
   if (SUBREG_P (op))
     {
-      if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode))
+      if (BITCAST_SI_SF_IN_REGS && sf_subreg_operand (op, mode))
 	return 0;
 
       op = SUBREG_REG (op);
@@ -88,7 +88,7 @@  (define_predicate "vsx_register_operand"
 {
   if (SUBREG_P (op))
     {
-      if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode))
+      if (BITCAST_SI_SF_IN_REGS && sf_subreg_operand (op, mode))
 	return 0;
 
       op = SUBREG_REG (op);
@@ -126,7 +126,7 @@  (define_predicate "vfloat_operand"
 {
   if (SUBREG_P (op))
     {
-      if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode))
+      if (BITCAST_SI_SF_IN_REGS && sf_subreg_operand (op, mode))
 	return 0;
 
       op = SUBREG_REG (op);
@@ -148,7 +148,7 @@  (define_predicate "vint_operand"
 {
   if (SUBREG_P (op))
     {
-      if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode))
+      if (BITCAST_SI_SF_IN_REGS && sf_subreg_operand (op, mode))
 	return 0;
 
       op = SUBREG_REG (op);
@@ -170,7 +170,7 @@  (define_predicate "vlogical_operand"
 {
   if (SUBREG_P (op))
     {
-      if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode))
+      if (BITCAST_SI_SF_IN_REGS && sf_subreg_operand (op, mode))
 	return 0;
 
       op = SUBREG_REG (op);
@@ -346,7 +346,7 @@  (define_predicate "gpc_reg_operand"
 {
   if (SUBREG_P (op))
     {
-      if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode))
+      if (BITCAST_SI_SF_IN_REGS && sf_subreg_operand (op, mode))
 	return 0;
 
       op = SUBREG_REG (op);
@@ -375,7 +375,7 @@  (define_predicate "int_reg_operand"
 {
   if (SUBREG_P (op))
     {
-      if (TARGET_NO_SF_SUBREG && sf_subreg_operand (op, mode))
+      if (BITCAST_SI_SF_IN_REGS && sf_subreg_operand (op, mode))
 	return 0;
 
       op = SUBREG_REG (op);
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 16ca3a31757..b8a9f01cbfa 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -10565,7 +10565,7 @@  rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
 bool
 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
 {
-  if (TARGET_ALLOW_SF_SUBREG)
+  if (BITCAST_SI_SF_IN_MEM)
     return true;
 
   if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
@@ -26425,13 +26425,10 @@  pcrel_opt_valid_mem_p (rtx reg, machine_mode mode, rtx mem)
    - stfs:
     - SET is from UNSPEC_SI_FROM_SF to MEM:SI
     - CLOBBER is a V4SF
-   - lfs:
-    - SET is from UNSPEC_SF_FROM_SI to REG:SF
-    - CLOBBER is a DI
  */
 
 static bool
-is_lfs_stfs_insn (rtx_insn *insn)
+is_stfs_insn (rtx_insn *insn)
 {
   rtx pattern = PATTERN (insn);
   if (GET_CODE (pattern) != PARALLEL)
@@ -26466,10 +26463,27 @@  is_lfs_stfs_insn (rtx_insn *insn)
       && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode)
     return true;
 
-  /* lfs case.  */
-  if (XINT (src, 1) == UNSPEC_SF_FROM_SI
-      && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
-      && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode)
+  return false;
+}
+
+
+static bool
+is_lfs_insn (rtx_insn *insn)
+{
+  rtx set = PATTERN (insn);
+  if (GET_CODE (set) != SET)
+    return false;
+
+  rtx dest = SET_DEST (set);
+  rtx src = SET_SRC (set);
+
+  if (!SUBREG_P (src))
+    return false;
+
+   /* lfs case.  */
+  if (GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
+      && GET_MODE (SUBREG_REG (src)) == SImode
+      && GET_CODE (SUBREG_REG (src)) == MEM)
     return true;
 
   return false;
@@ -26585,7 +26599,7 @@  prefixed_load_p (rtx_insn *insn)
   else
     non_prefixed = reg_to_non_prefixed (reg, mem_mode);
 
-  if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
+  if (non_prefixed == NON_PREFIXED_X && is_lfs_insn (insn))
     return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT);
   else
     return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
@@ -26623,7 +26637,7 @@  prefixed_store_p (rtx_insn *insn)
   /* Need to make sure we aren't looking at a stfs which doesn't look
      like the other things reg_to_non_prefixed/address_is_prefixed
      looks for.  */
-  if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
+  if (non_prefixed == NON_PREFIXED_X && is_stfs_insn (insn))
     return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
   else
     return address_is_prefixed (addr, mem_mode, non_prefixed);
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 3503614efbd..03b20fb8d66 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -480,8 +480,8 @@  extern int rs6000_vector_align[];
 				 && TARGET_POWERPC64)
 
 /* Whether we should avoid (SUBREG:SI (REG:SF) and (SUBREG:SF (REG:SI).  */
-#define TARGET_NO_SF_SUBREG	TARGET_DIRECT_MOVE_64BIT
-#define TARGET_ALLOW_SF_SUBREG	(!TARGET_DIRECT_MOVE_64BIT)
+#define BITCAST_SI_SF_IN_REGS	TARGET_DIRECT_MOVE_64BIT
+#define BITCAST_SI_SF_IN_MEM	(!TARGET_DIRECT_MOVE_64BIT)
 
 /* This wants to be set for p8 and newer.  On p7, overlapping unaligned
    loads are slow. */
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 74b1c9cee6a..90ee0d566ab 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -145,7 +145,6 @@  (define_c_enum "unspec"
    UNSPEC_SQRT_ROUND_TO_ODD
    UNSPEC_TRUNC_ROUND_TO_ODD
    UNSPEC_SIGNBIT
-   UNSPEC_SF_FROM_SI
    UNSPEC_SI_FROM_SF
    UNSPEC_PLTSEQ
    UNSPEC_PLT16_HA
@@ -7655,7 +7654,7 @@  (define_insn_and_split "movsi_from_sf"
 		"=X,         X,           X,           X,        X,
 		 X,          X,           X,           wa,       X,
 		 X"))]
-  "TARGET_NO_SF_SUBREG
+  "BITCAST_SI_SF_IN_REGS
    && (register_operand (operands[0], SImode)
        || register_operand (operands[1], SFmode))"
   "@
@@ -7761,7 +7760,7 @@  (define_insn "*movsi_from_df"
 	(unspec:SI [(float_truncate:SF
 		     (match_operand:DF 1 "gpc_reg_operand" "wa"))]
 		    UNSPEC_SI_FROM_SF))]
-  "TARGET_NO_SF_SUBREG"
+  "BITCAST_SI_SF_IN_REGS"
   "xscvdpsp %x0,%x1"
   [(set_attr "type" "fp")])
 
@@ -8053,7 +8052,7 @@  (define_insn "movsf_hardfloat"
   "(register_operand (operands[0], SFmode)
    || register_operand (operands[1], SFmode))
    && TARGET_HARD_FLOAT
-   && (TARGET_ALLOW_SF_SUBREG
+   && (BITCAST_SI_SF_IN_MEM
        || valid_sf_si_move (operands[0], operands[1], SFmode))"
   "@
    lwz%U1%X1 %0,%1
@@ -8171,14 +8170,10 @@  (define_insn_and_split "movsf_from_si"
   [(set (match_operand:SF 0 "nonimmediate_operand"
 	    "=!r,       f,         v,         wa,        m,         Z,
 	     Z,         wa,        ?r,        !r")
-	(unspec:SF [(match_operand:SI 1 "input_operand" 
+	(subreg:SF (match_operand:SI 1 "input_operand"
 	    "m,         m,         wY,        Z,         r,         f,
-	     wa,        r,         wa,        r")]
-		   UNSPEC_SF_FROM_SI))
-   (clobber (match_scratch:DI 2
-	    "=X,        X,         X,         X,         X,         X,
-             X,         r,         X,         X"))]
-  "TARGET_NO_SF_SUBREG
+	     wa,        r,         wa,        r") 0))]
+  "BITCAST_SI_SF_IN_REGS
    && (register_operand (operands[0], SFmode)
        || register_operand (operands[1], SImode))"
   "@
@@ -8192,31 +8187,25 @@  (define_insn_and_split "movsf_from_si"
    #
    mfvsrwz %0,%x1
    mr %0,%1"
-
-  "&& reload_completed
-   && vsx_reg_sfsubreg_ok (operands[0], SFmode)
-   && int_reg_operand_not_pseudo (operands[1], SImode)"
+  "&& ((!reload_completed && !TARGET_P9_VECTOR
+        && gpc_reg_operand (operands[0], SFmode)
+        && gpc_reg_operand (operands[1], SImode))
+       || (reload_completed && TARGET_P9_VECTOR
+	   && vsx_reg_sfsubreg_ok (operands[0], SFmode)
+	   && int_reg_operand_not_pseudo (operands[1], SImode)))"
   [(const_int 0)]
 {
-  rtx op0 = operands[0];
-  rtx op1 = operands[1];
-
-  if (TARGET_P9_VECTOR)
+  if (reload_completed)
     {
+      rtx op0 = operands[0];
+      rtx op1 = operands[1];
+
       rtx op0_v = gen_rtx_REG (V4SImode, REGNO (op0));
       emit_insn (gen_vsx_splat_v4si (op0_v, op1));
       emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
     }
   else
-    {
-      rtx op2 = operands[2];
-      rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
-
-      /* Move SF value to upper 32-bits for xscvspdpn.  */
-      emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
-      emit_insn (gen_p8_mtvsrd_sf (op0, op2));
-      emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
-    }
+    emit_insn (gen_movsf_from_si_p8 (operands[0], operands[1]));
 
   DONE;
 }
@@ -8230,6 +8219,46 @@  (define_insn_and_split "movsf_from_si"
 	    "*,          *,         p9v,       p8v,       *,         *,
 	     p8v,        p8v,       p8v,       *")])
 
+(define_insn_and_split "movsf_from_si_p8"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
+	(subreg:SF (match_operand:SI 1 "gpc_reg_operand" "r") 0))
+   (clobber (match_scratch:DI 2 "=r"))]
+  "BITCAST_SI_SF_IN_REGS"
+  "#"
+  "&& reload_completed
+   && vsx_reg_sfsubreg_ok (operands[0], SFmode)
+   && int_reg_operand_not_pseudo (operands[1], SImode)"
+  [(const_int 0)]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
+
+  /* Move SF value to upper 32-bits for xscvspdpn.  */
+  emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
+  emit_insn (gen_p8_mtvsrd_sf (op0, op2));
+  emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+
+  DONE;
+}
+  [(set_attr "length" "12")
+  (set_attr "type" "vecfloat")
+  (set_attr "isa" "p8v")])
+
+(define_split
+  [(set (subreg:SI (match_operand:SF 0 "gpc_reg_operand") 0)
+	(match_operand:SI 1 "gpc_reg_operand"))]
+  "BITCAST_SI_SF_IN_REGS"
+  [(const_int 0)]
+{
+  if (TARGET_P9_VECTOR)
+    emit_insn (gen_movsf_from_si (operands[0], operands[1]));
+  else
+    emit_insn (gen_movsf_from_si_p8 (operands[0], operands[1]));
+  DONE;
+})
+
 (define_code_iterator any_rshift [ashiftrt lshiftrt])
 
 ;; For extracting high part element from DImode register like:
@@ -8237,15 +8266,12 @@  (define_code_iterator any_rshift [ashiftrt lshiftrt])
 ;; split it before reload with "and mask" to avoid generating shift right
 ;; 32 bit then shift left 32 bit.
 (define_insn_and_split "movsf_from_si2_<code>"
-  [(set (match_operand:SF 0 "gpc_reg_operand" "=wa")
-	    (unspec:SF
-	     [(match_operator:SI 3 "lowpart_subreg_operator"
-	       [(any_rshift:DI
-		(match_operand:DI 1 "input_operand" "r")
-		(const_int 32))])]
-	     UNSPEC_SF_FROM_SI))
+  [(set (subreg:SI (match_operand:SF 0 "gpc_reg_operand" "=wa") 0)
+	(match_operator:SI 3 "lowpart_subreg_operator"
+	  [(any_rshift:DI (match_operand:DI 1 "input_operand" "r")
+			  (const_int 32))]))
   (clobber (match_scratch:DI 2 "=r"))]
-  "TARGET_NO_SF_SUBREG"
+  "BITCAST_SI_SF_IN_REGS"
   "#"
   "&& 1"
   [(const_int 0)]