[v2,8/8] LoongArch: Implement [su]dot_prod* for LSX and LASX modes

Message ID 20250213094204.12290-9-xry111@xry111.site
State Committed
Commit cef5f23adb6f9f052d03286ad8ccf352eefccf86
Headers
Series LoongArch: SIMD odd/even/horizontal widening arithmetic cleanup and optimization |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_simplebootstrap_build--master-aarch64-bootstrap success Build passed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 fail Patch failed to apply
linaro-tcwg-bot/tcwg_simplebootstrap_build--master-arm-bootstrap fail Patch failed to apply

Commit Message

Xi Ruoyao Feb. 13, 2025, 9:42 a.m. UTC
  Despite it's just a special case of "a widening product of which the
result used for reduction," having these standard names allows to
recognize the dot product pattern earlier and it may be beneficial to
optimization.  Also fix some test failures with the test cases:

- gcc.dg/vect/vect-reduc-chain-2.c
- gcc.dg/vect/vect-reduc-chain-3.c
- gcc.dg/vect/vect-reduc-chain-dot-slp-3.c
- gcc.dg/vect/vect-reduc-chain-dot-slp-4.c

gcc/ChangeLog:

	* config/loongarch/simd.md (wvec_half): New define_mode_attr.
	(<su>dot_prod<wvec_half><mode>): New define_expand.

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/wide-mul-reduc-2.c (dg-final): Scan
	DOT_PROD_EXPR in optimized tree.
---
 gcc/config/loongarch/simd.md                  | 29 +++++++++++++++++++
 .../gcc.target/loongarch/wide-mul-reduc-2.c   |  3 +-
 2 files changed, 31 insertions(+), 1 deletion(-)
  

Patch

diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
index 661f5dc8dda..45d2bcaec2e 100644
--- a/gcc/config/loongarch/simd.md
+++ b/gcc/config/loongarch/simd.md
@@ -90,6 +90,12 @@  (define_mode_attr WVEC_HALF [(V2DI "V1TI") (V4DI "V2TI")
 			     (V8HI "V4SI") (V16HI "V8SI")
 			     (V16QI "V8HI") (V32QI "V16HI")])
 
+;; Lower-case version.
+(define_mode_attr wvec_half [(V2DI "v1ti") (V4DI "v2ti")
+			     (V4SI "v2di") (V8SI "v4di")
+			     (V8HI "v4si") (V16HI "v8si")
+			     (V16QI "v8hi") (V32QI "v16hi")])
+
 ;; Integer vector modes with the same length and unit size as a mode.
 (define_mode_attr VIMODE [(V2DI "V2DI") (V4SI "V4SI")
 			  (V8HI "V8HI") (V16QI "V16QI")
@@ -786,6 +792,29 @@  (define_expand "<simd_isa>_<x>vmaddw<ev_od>_<simdfmt_w>_<simdfmt><u>"
   DONE;
 })
 
+(define_expand "<su>dot_prod<wvec_half><mode>"
+  [(match_operand:<WVEC_HALF> 0 "register_operand" "=f,f")
+   (match_operand:IVEC	      1 "register_operand" " f,f")
+   (match_operand:IVEC	      2 "register_operand" " f,f")
+   (match_operand:<WVEC_HALF> 3 "reg_or_0_operand" " 0,YG")
+   (any_extend (const_int 0))]
+  ""
+{
+  auto [op0, op1, op2, op3] = operands;
+
+  if (op3 == CONST0_RTX (<WVEC_HALF>mode))
+    emit_insn (
+      gen_<simd_isa>_<x>vmulwev_<simdfmt_w>_<simdfmt><u> (op0, op1, op2));
+  else
+    emit_insn (
+      gen_<simd_isa>_<x>vmaddwev_<simdfmt_w>_<simdfmt><u> (op0, op3, op1,
+							   op2));
+
+  emit_insn (
+    gen_<simd_isa>_<x>vmaddwod_<simdfmt_w>_<simdfmt><u> (op0, op0, op1, op2));
+  DONE;
+})
+
 (define_insn "simd_maddw_evod_<mode>_hetero"
   [(set (match_operand:<WVEC_HALF> 0 "register_operand" "=f")
 	(plus:<WVEC_HALF>
diff --git a/gcc/testsuite/gcc.target/loongarch/wide-mul-reduc-2.c b/gcc/testsuite/gcc.target/loongarch/wide-mul-reduc-2.c
index 07a7601888a..61e92e58fc3 100644
--- a/gcc/testsuite/gcc.target/loongarch/wide-mul-reduc-2.c
+++ b/gcc/testsuite/gcc.target/loongarch/wide-mul-reduc-2.c
@@ -1,6 +1,7 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -mlasx" } */
+/* { dg-options "-O2 -mlasx -fdump-tree-optimized" } */
 /* { dg-final { scan-assembler "xvmaddw(ev|od)\\.d\\.w" } } */
+/* { dg-final { scan-tree-dump "DOT_PROD_EXPR" "optimized" } } */
 
 typedef __INT32_TYPE__ i32;
 typedef __INT64_TYPE__ i64;