[44/61] Autovectorization failures on BE targets

Message ID 20250131171232.1018281-46-aleksandar.rakic@htecgroup.com
State New
Headers
Series Improve Mips target |

Commit Message

Aleksandar Rakic Jan. 31, 2025, 5:13 p.m. UTC
  From: "dragan.mladjenovic" <dragan.mladjenovic@rt-rk.com>

GCC assumes that taking a vector mode B SUBREG of vector mode A register
allows it to interpret its memory layout as if in A vector mode.

We currently allow this mode change to be no-op on MSA registers. This
works on little-endian because MSA register layout matches that of
vector value in memory. This breaks on big-endian because ordering of
bytes within the lane depends of target endianes.

We now conservatively disallow direct MSA register mode change via
TARGET_CAN_CHANGE_MODE_CLASS making it go through memory.

gcc/
    * config/mips/mips-msa.md (UNSPEC_MSA_CHANGE_MODE): New unspec.
    (msa_change_mode): New expand pattern.
    (msa_change_mode_<mode>): New insn pattern.
    * config/mips/mips.cc (mips_split_128bit_move): Replace MSA mode
    changing uses of simplify_gen_subreg with gen_rtx_REG.
    (mips_split_msa_copy_d): Ditto.
    (mips_split_msa_insert_d): Ditto.
    (mips_split_msa_fill_d): Ditto.
    (mips_can_change_mode_class): Disallow change of MSA modes with
    different lane width on big-endian targets.
    (mips_expand_vec_unpack): Use gen_msa_change_mode instead of
    gen_lowpart for MSA modes.

Cherry-picked c00d34621429f31926e0c72e027b0c1028d046f0
from https://github.com/MIPS/gcc

Signed-off-by: Dragan Mladjenovic <dragan.mladjenovic@rt-rk.com>
Signed-off-by: Faraz Shahbazker <fshahbazker@wavecomp.com>
Signed-off-by: Aleksandar Rakic <aleksandar.rakic@htecgroup.com>
---
 gcc/config/mips/mips-msa.md | 37 +++++++++++++++++++++++++++++++++++++
 gcc/config/mips/mips.cc     | 29 ++++++++++++++++++-----------
 2 files changed, 55 insertions(+), 11 deletions(-)
  

Patch

diff --git a/gcc/config/mips/mips-msa.md b/gcc/config/mips/mips-msa.md
index f6edd5897a4..5ac4fa4bf24 100644
--- a/gcc/config/mips/mips-msa.md
+++ b/gcc/config/mips/mips-msa.md
@@ -90,6 +90,7 @@ 
   UNSPEC_MSA_SUBSUU_S
   UNSPEC_MSA_SUBSUS_U
   UNSPEC_MSA_VSHF
+  UNSPEC_MSA_CHANGE_MODE
 ])
 
 ;; All vector modes with 128 bits.
@@ -2930,3 +2931,39 @@ 
 					      const0_rtx));
   DONE;
 })
+
+;; On big-endian targets we cannot use subregs to refer to MSA register
+;; in different mode.  See mips_can_change_mode_class.
+(define_expand "msa_change_mode"
+  [(match_operand 0 "register_operand")
+   (match_operand 1 "register_operand")]
+  "ISA_HAS_MSA"
+{
+  gcc_assert (MSA_SUPPORTED_MODE_P (GET_MODE (operands[0]))
+	      && MSA_SUPPORTED_MODE_P (GET_MODE (operands[1])));
+
+  if (!TARGET_BIG_ENDIAN)
+      emit_move_insn (operands[0],
+		      gen_lowpart (GET_MODE (operands[0]), operands[1]));
+    else
+      emit_move_insn (operands[0],
+		      gen_rtx_UNSPEC (GET_MODE (operands[0]),
+				      gen_rtvec (1, operands[1]),
+				      UNSPEC_MSA_CHANGE_MODE));
+  DONE;
+})
+
+(define_insn_and_split "msa_change_mode_<mode>"
+  [(set (match_operand:MSA 0 "register_operand" "=f")
+    (unspec:MSA [(match_operand 1 "register_operand" "f")]
+	  UNSPEC_MSA_CHANGE_MODE))]
+  "ISA_HAS_MSA && TARGET_BIG_ENDIAN
+   && MSA_SUPPORTED_MODE_P (GET_MODE (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+    operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
+}
+  [(set_attr "move_type" "fmove")
+   (set_attr "mode" "<MODE>")])
diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 10f302e0790..e0b357a651a 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -5998,12 +5998,12 @@  mips_split_128bit_move (rtx dest, rtx src)
       if (!TARGET_64BIT)
 	{
 	  if (GET_MODE (dest) != V4SImode)
-	    new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0);
+	    new_dest = gen_rtx_REG (V4SImode, REGNO (dest));
 	}
       else
 	{
 	  if (GET_MODE (dest) != V2DImode)
-	    new_dest = simplify_gen_subreg (V2DImode, dest, GET_MODE (dest), 0);
+	    new_dest = gen_rtx_REG (V2DImode, REGNO (dest));
 	}
 
       for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode);
@@ -6026,12 +6026,12 @@  mips_split_128bit_move (rtx dest, rtx src)
       if (!TARGET_64BIT)
 	{
 	  if (GET_MODE (src) != V4SImode)
-	    new_src = simplify_gen_subreg (V4SImode, src, GET_MODE (src), 0);
+	    new_src = gen_rtx_REG (V4SImode, REGNO (src));
 	}
       else
 	{
 	  if (GET_MODE (src) != V2DImode)
-	    new_src = simplify_gen_subreg (V2DImode, src, GET_MODE (src), 0);
+	    new_src = gen_rtx_REG (V2DImode, REGNO (src));
 	}
 
       for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode);
@@ -6087,7 +6087,8 @@  mips_split_msa_copy_d (rtx dest, rtx src, rtx index,
      from the higher index.  */
   rtx low = mips_subword (dest, false);
   rtx high = mips_subword (dest, true);
-  rtx new_src = simplify_gen_subreg (V4SImode, src, GET_MODE (src), 0);
+
+  rtx new_src = gen_rtx_REG (V4SImode, REGNO (src));
 
   emit_insn (gen_fn (low, new_src, GEN_INT (INTVAL (index) * 2)));
   emit_insn (gen_fn (high, new_src, GEN_INT (INTVAL (index) * 2 + 1)));
@@ -6108,8 +6109,8 @@  mips_split_msa_insert_d (rtx dest, rtx src1, rtx index, rtx src2)
      from the higher index.  */
   rtx low = mips_subword (src2, false);
   rtx high = mips_subword (src2, true);
-  rtx new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0);
-  rtx new_src1 = simplify_gen_subreg (V4SImode, src1, GET_MODE (src1), 0);
+  rtx new_dest = gen_rtx_REG (V4SImode, REGNO (dest));
+  rtx new_src1 = gen_rtx_REG (V4SImode, REGNO (src1));
   i = exact_log2 (INTVAL (index));
   gcc_assert (i != -1);
 
@@ -6141,7 +6142,7 @@  mips_split_msa_fill_d (rtx dest, rtx src)
       low = mips_subword (src, false);
       high = mips_subword (src, true);
     }
-  rtx new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0);
+  rtx new_dest = gen_rtx_REG (V4SImode, REGNO (dest));
   emit_insn (gen_msa_fill_w (new_dest, low));
   emit_insn (gen_msa_insert_w (new_dest, high, new_dest, GEN_INT (1 << 1)));
   emit_insn (gen_msa_insert_w (new_dest, high, new_dest, GEN_INT (1 << 3)));
@@ -14774,9 +14775,15 @@  mips_can_change_mode_class (machine_mode from,
       && INTEGRAL_MODE_P (from) && INTEGRAL_MODE_P (to))
     return true;
 
-  /* Allow conversions between different MSA vector modes.  */
+  /* Allow conversions between different MSA vector modes.
+     On big-endian targets the MSA register layout doesn't
+     match its memory layout, so we disallow mode change that
+     would result in lane width change.  */
   if (MSA_SUPPORTED_MODE_P (from) && MSA_SUPPORTED_MODE_P (to))
-    return true;
+   {
+    return !TARGET_BIG_ENDIAN
+	   || (GET_MODE_UNIT_SIZE (from) == GET_MODE_UNIT_SIZE (to));
+   }
 
   /* Otherwise, there are several problems with changing the modes of
      values in floating-point registers:
@@ -24584,7 +24591,7 @@  mips_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p)
       dest = gen_reg_rtx (imode);
 
       emit_insn (unpack (dest, operands[1], tmp));
-      emit_move_insn (operands[0], gen_lowpart (GET_MODE (operands[0]), dest));
+      emit_insn (gen_msa_change_mode (operands[0], dest));
       return;
     }