[54/61] fmadd.w should be restricted to mipsr6

Message ID 20250131171232.1018281-56-aleksandar.rakic@htecgroup.com
State New
Headers
Series Improve Mips target |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm fail Patch failed to apply
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 fail Patch failed to apply

Commit Message

Aleksandar Rakic Jan. 31, 2025, 5:13 p.m. UTC
  From: "dragan.mladjenovic" <dragan.mladjenovic@rt-rk.com>

This patch prevents middle-end from using MSA fma on pre-r6 targets
in order to avoid subtle inconsistencies with auto-vectorized code that
might mix MSA fma with unfused scalar multiply-add.

There might be Loongson targets that support MSA while having scalar
multiply-add that is fused (contrary to ISA spec). This patch doesn't
handle those cases.

gcc/
	* config/mips/mips-msa.md (fma<mode>4, fnma<mode>4): Transform
	into empty expander. Conditionalize on ISA_HAS_FUSED_MADDF.
	Move the body into ...
	(msa_fmadd_<msafmt>, msa_fmsub_<msafmt>): New insn patterns.
	* config/mips/mips.cc (CODE_FOR_msa_fmadd_*): Remove.
	(CODE_FOR_msa_fmsub_*): Ditto.

gcc/testsuite/
	* gcc.target/mips/msa-fuse-madd-double.c: New test.
	* gcc.target/mips/msa-fuse-madd-single.c: New test.
	* gcc.target/mips/msa.c: Do not match fmadd/fmsub on
	!mipsisar6 targets.
	* lib/target-supports.exp: Define mipsisar6 target.

Cherry-picked 7a48948f245a5e46f55d59c6ac0982a815665ccf
from https://github.com/MIPS/gcc

Signed-off-by: Dragan Mladjenovic <dragan.mladjenovic@rt-rk.com>
Signed-off-by: Faraz Shahbazker <fshahbazker@wavecomp.com>
Signed-off-by: Aleksandar Rakic <aleksandar.rakic@htecgroup.com>
---
 gcc/config/mips/mips-msa.md                   | 26 +++++++---
 gcc/config/mips/mips.cc                       |  4 --
 .../gcc.target/mips/msa-fuse-madd-double.c    | 52 +++++++++++++++++++
 .../gcc.target/mips/msa-fuse-madd-single.c    | 51 ++++++++++++++++++
 gcc/testsuite/gcc.target/mips/msa.c           | 14 +++--
 gcc/testsuite/lib/target-supports.exp         | 10 ++++
 6 files changed, 143 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/mips/msa-fuse-madd-double.c
 create mode 100644 gcc/testsuite/gcc.target/mips/msa-fuse-madd-single.c
  

Patch

diff --git a/gcc/config/mips/mips-msa.md b/gcc/config/mips/mips-msa.md
index e2fdf8e191e..34f140e159c 100644
--- a/gcc/config/mips/mips-msa.md
+++ b/gcc/config/mips/mips-msa.md
@@ -973,21 +973,35 @@ 
   [(set_attr "type" "simd_fdiv")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "fma<mode>4"
+(define_expand "fma<mode>4"
   [(set (match_operand:FMSA 0 "msa_reg_operand" "=f")
 	(fma:FMSA (match_operand:FMSA 1 "msa_reg_operand" "f")
 		  (match_operand:FMSA 2 "msa_reg_operand" "f")
 		  (match_operand:FMSA 3 "msa_reg_operand" "0")))]
-  "ISA_HAS_MSA"
-  "fmadd.<msafmt>\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "<MODE>")])
+  "ISA_HAS_MSA && ISA_HAS_FUSED_MADDF")
 
-(define_insn "fnma<mode>4"
+(define_expand "fnma<mode>4"
   [(set (match_operand:FMSA 0 "msa_reg_operand" "=f")
 	(fma:FMSA (neg:FMSA (match_operand:FMSA 1 "msa_reg_operand" "f"))
 		  (match_operand:FMSA 2 "msa_reg_operand" "f")
 		  (match_operand:FMSA 3 "msa_reg_operand" "0")))]
+  "ISA_HAS_MSA && ISA_HAS_FUSED_MADDF")
+
+(define_insn "msa_fmadd_<msafmt>"
+  [(set (match_operand:FMSA 0 "msa_reg_operand" "=f")
+  (fma:FMSA (match_operand:FMSA 1 "msa_reg_operand" "f")
+      (match_operand:FMSA 2 "msa_reg_operand" "f")
+      (match_operand:FMSA 3 "msa_reg_operand" "0")))]
+ "ISA_HAS_MSA"
+  "fmadd.<msafmt>\t%w0,%w1,%w2"
+  [(set_attr "type" "simd_fmadd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "msa_fmsub_<msafmt>"
+  [(set (match_operand:FMSA 0 "msa_reg_operand" "=f")
+  (fma:FMSA (neg:FMSA (match_operand:FMSA 1 "msa_reg_operand" "f"))
+      (match_operand:FMSA 2 "msa_reg_operand" "f")
+      (match_operand:FMSA 3 "msa_reg_operand" "0")))]
   "ISA_HAS_MSA"
   "fmsub.<msafmt>\t%w0,%w1,%w2"
   [(set_attr "type" "simd_fmadd")
diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 4894e07f72c..4521cac15c7 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -17752,10 +17752,6 @@  AVAIL_NON_MIPS16 (msa, TARGET_MSA)
 #define CODE_FOR_msa_ffint_u_d CODE_FOR_floatunsv2div2df2
 #define CODE_FOR_msa_fsub_w CODE_FOR_subv4sf3
 #define CODE_FOR_msa_fsub_d CODE_FOR_subv2df3
-#define CODE_FOR_msa_fmadd_w CODE_FOR_fmav4sf4
-#define CODE_FOR_msa_fmadd_d CODE_FOR_fmav2df4
-#define CODE_FOR_msa_fmsub_w CODE_FOR_fnmav4sf4
-#define CODE_FOR_msa_fmsub_d CODE_FOR_fnmav2df4
 #define CODE_FOR_msa_fmul_w CODE_FOR_mulv4sf3
 #define CODE_FOR_msa_fmul_d CODE_FOR_mulv2df3
 #define CODE_FOR_msa_fdiv_w CODE_FOR_divv4sf3
diff --git a/gcc/testsuite/gcc.target/mips/msa-fuse-madd-double.c b/gcc/testsuite/gcc.target/mips/msa-fuse-madd-double.c
new file mode 100644
index 00000000000..e98bf017a6e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/msa-fuse-madd-double.c
@@ -0,0 +1,52 @@ 
+/* { dg-do run } */
+/* { dg-options "-mhard-float -mmsa" } */
+/* { dg-additional-options "-ffp-contract=fast" } */
+
+#define VSIZE 8
+
+typedef union
+{ double d; long long unsigned i; } double_ull_u;
+
+struct test_vec {
+  double_ull_u a;
+  double_ull_u b;
+  double_ull_u c;
+} test_bench[VSIZE] = {
+  {{.i=0x2c27173b4c9b0904ull}, {.i=0x6aa7b75c1df029d3ull}, {.i=0x5675ff363dd15094ull}},
+  {{.i=0x3a6f0e78379a5b56ull}, {.i=0x53b735d529784870ull}, {.i=0x4cdced4c10a30d9cull}},
+  {{.i=0x12d2eee56cc2b66aull}, {.i=0x60cd438558be66cdull}, {.i=0x335e9e8d425c189bull}},
+  {{.i=0x680d29830daea0c2ull}, {.i=0x4c5977b52c0d49efull}, {.i=0x7305e21c2165c647ull}},
+  {{.i=0x4e4add4115ecbebull}, {.i=0x401d6aed0c821feeull}, {.i=0x300832736663b62ull}},
+  {{.i=0x1f6f475265504cc9ull}, {.i=0x4e5785aa042408acull}, {.i=0x2ab32c6b25521f4aull}},
+  {{.i=0xd09c440443b602dull}, {.i=0x5f618fbb1fe650a2ull}, {.i=0x295aa9221841d645ull}},
+  {{.i=0x732612c95a91b01full}, {.i=0x268678105b8f78b5ull}, {.i=0x5973c32a350e1c23ull}},
+};
+
+int main (void)
+{
+  int i;
+  double __attribute__((aligned(16))) av [VSIZE];
+  double __attribute__((aligned(16))) bv[VSIZE];
+  double __attribute__((aligned(16))) cv[VSIZE];
+  double __attribute__((aligned(16))) res1[VSIZE];
+  double __attribute__((aligned(16))) res2[VSIZE - 1];
+  
+  for (i = 0; i < VSIZE; i++)
+    {
+      av[i] = test_bench[i].a.d;
+      bv[i] = test_bench[i].b.d;
+      cv[i] = test_bench[i].c.d;
+    }
+
+  for (i = 0; i < VSIZE; i++)
+    res1[i] = av[i] * bv[i] + cv[i];
+
+  for (i = 0; i < VSIZE - 1; i++)
+    res2[i] = av[i] * bv[i] + cv[i];
+
+  for (i = 0; i < VSIZE - 1; i++)
+    if (res2[i] != res1[i])
+      return 1;
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/mips/msa-fuse-madd-single.c b/gcc/testsuite/gcc.target/mips/msa-fuse-madd-single.c
new file mode 100644
index 00000000000..03828a8ffb7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/msa-fuse-madd-single.c
@@ -0,0 +1,51 @@ 
+/* { dg-do run } */
+/* { dg-options "-mhard-float -mmsa" } */
+/* { dg-additional-options "-ffp-contract=fast" } */
+
+#define VSIZE 8
+
+typedef union { float f; unsigned long i; } float_ul_u;
+
+struct test_vec {
+  float_ul_u a;
+  float_ul_u b;
+  float_ul_u c;
+} test_bench[VSIZE] = {
+  {{.i=0x42963e5aul}, {.i=0xa0382c5ul}, {.i=0x8f2b15eul}},
+  {{.i=0x1c695decul}, {.i=0x3fcfaed9ul}, {.i=0xf856867ul}},
+  {{.i=0x116ae494ul}, {.i=0x3494b2fbul}, {.i=0xb13a31ul}},
+  {{.i=0x683caad3ul}, {.i=0x313c7c99ul}, {.i=0x519eb94cul}},
+  {{.i=0x4a9554feul}, {.i=0x392edbe4ul}, {.i=0x3d1a2dd9ul}},
+  {{.i=0x4c4fff5bul}, {.i=0x51b76675ul}, {.i=0x59a4ba71ul}},
+  {{.i=0x17cfc87dul}, {.i=0x5d66dc65ul}, {.i=0x30bb2b99ul}},
+  {{.i=0x61c66e3ul}, {.i=0x69321f16ul}, {.i=0x2d96b714ul}},
+};
+
+int main (void)
+{
+  int i;
+  float __attribute__((aligned(16))) av [VSIZE];
+  float __attribute__((aligned(16))) bv[VSIZE];
+  float __attribute__((aligned(16))) cv[VSIZE];
+  float __attribute__((aligned(16))) res1[VSIZE];
+  float __attribute__((aligned(16))) res2[VSIZE - 1];
+  
+  for (i = 0; i < VSIZE; i++)
+    {
+      av[i] = test_bench[i].a.f;
+      bv[i] = test_bench[i].b.f;
+      cv[i] = test_bench[i].c.f;
+    }
+
+  for (i = 0; i < VSIZE; i++)
+    res1[i] = av[i] * bv[i] + cv[i];
+
+  for (i = 0; i < VSIZE - 1; i++)
+    res2[i] = av[i] * bv[i] + cv[i];
+
+  for (i = 0; i < VSIZE - 1; i++)
+    if (res2[i] != res1[i])
+      return 1;
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/mips/msa.c b/gcc/testsuite/gcc.target/mips/msa.c
index 8647b6d9530..b6aaa5e9921 100644
--- a/gcc/testsuite/gcc.target/mips/msa.c
+++ b/gcc/testsuite/gcc.target/mips/msa.c
@@ -362,8 +362,11 @@ 
 /* { dg-final { scan-assembler-times "test37_v8u16:.*maddv.h.*test37_v8u16" 1 } } */
 /* { dg-final { scan-assembler-times "test37_v4u32:.*maddv.w.*test37_v4u32" 1 } } */
 /* { dg-final { scan-assembler-times "test37_v2u64:.*maddv.d.*test37_v2u64" 1 } } */
-/* { dg-final { scan-assembler-times "test37_v4f32:.*fmadd.w.*test37_v4f32" 1 } } */
-/* { dg-final { scan-assembler-times "test37_v2f64:.*fmadd.d.*test37_v2f64" 1 } } */
+/* Note: We chose not to emit fmadd.* on pre-r6 targets that lack scalar fma.  */
+/* { dg-final { scan-assembler-times "test37_v4f32:.*fmadd.w.*test37_v4f32" 1 { target mipsisar6 } } } */
+/* { dg-final { scan-assembler-times "test37_v2f64:.*fmadd.d.*test37_v2f64" 1 { target mipsisar6 } } } */
+/* { dg-final { scan-assembler-times "test37_v4f32:.*fmul.w.*fadd.w.*test37_v4f32" 1 { target {! mipsisar6 } } } } */
+/* { dg-final { scan-assembler-times "test37_v2f64:.*fmul.d.*fadd.d.*test37_v2f64" 1 { target {! mipsisar6 } } } } */
 /* { dg-final { scan-assembler-times "test38_v16i8:.*msubv.b.*test38_v16i8" 1 } } */
 /* { dg-final { scan-assembler-times "test38_v8i16:.*msubv.h.*test38_v8i16" 1 } } */
 /* { dg-final { scan-assembler-times "test38_v4i32:.*msubv.w.*test38_v4i32" 1 } } */
@@ -372,8 +375,11 @@ 
 /* { dg-final { scan-assembler-times "test38_v8u16:.*msubv.h.*test38_v8u16" 1 } } */
 /* { dg-final { scan-assembler-times "test38_v4u32:.*msubv.w.*test38_v4u32" 1 } } */
 /* { dg-final { scan-assembler-times "test38_v2u64:.*msubv.d.*test38_v2u64" 1 } } */
-/* { dg-final { scan-assembler-times "test38_v4f32:.*fmsub.w.*test38_v4f32" 1 } } */
-/* { dg-final { scan-assembler-times "test38_v2f64:.*fmsub.d.*test38_v2f64" 1 } } */
+/* Note: We chose not to emit fmsub.* on pre-r6 targets that lack scalar fma.  */
+/* { dg-final { scan-assembler-times "test38_v4f32:.*fmsub.w.*test38_v4f32" 1 { target mipsisar6 } } } */
+/* { dg-final { scan-assembler-times "test38_v2f64:.*fmsub.d.*test38_v2f64" 1 { target mipsisar6 } } } */
+/* { dg-final { scan-assembler-times "test38_v4f32:.*fmul.w.*fsub.w.*test38_v4f32" 1 { target {! mipsisar6 } } } } */
+/* { dg-final { scan-assembler-times "test38_v2f64:.*fmul.d.*fsub.d.*test38_v2f64" 1 { target {! mipsisar6 } } } } */
 /* { dg-final { scan-assembler-times "test39_v16i8:.*ld.b.*test39_v16i8" 1 } } */
 /* { dg-final { scan-assembler-times "test39_v8i16:.*ld.h.*test39_v8i16" 1 } } */
 /* { dg-final { scan-assembler-times "test39_v4i32:.*ld.w.*test39_v4i32" 1 } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 4f005c5a7d2..72c2fa195b4 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -1514,6 +1514,16 @@  proc check_effective_target_mips64 { } {
     }]
 }
 
+# Return true if the target is a MIPS rev 6 target.
+
+proc check_effective_target_mipsisar6 { } {
+    return [check_no_compiler_messages mipsisar6 assembly {
+    #if __mips_isa_rev < 6
+    #error !__mips_isa_rev
+    #endif
+    }]
+}
+
 # Return true if the target is using a compressed MIPS ISA.
 
 proc check_effective_target_mips_compressed { } {