[v2,8/8] LoongArch: Implement [su]dot_prod* for LSX and LASX modes
Checks
Context |
Check |
Description |
linaro-tcwg-bot/tcwg_gcc_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_simplebootstrap_build--master-aarch64-bootstrap |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 |
fail
|
Patch failed to apply
|
linaro-tcwg-bot/tcwg_simplebootstrap_build--master-arm-bootstrap |
fail
|
Patch failed to apply
|
Commit Message
Despite it's just a special case of "a widening product of which the
result used for reduction," having these standard names allows to
recognize the dot product pattern earlier and it may be beneficial to
optimization. Also fix some test failures with the test cases:
- gcc.dg/vect/vect-reduc-chain-2.c
- gcc.dg/vect/vect-reduc-chain-3.c
- gcc.dg/vect/vect-reduc-chain-dot-slp-3.c
- gcc.dg/vect/vect-reduc-chain-dot-slp-4.c
gcc/ChangeLog:
* config/loongarch/simd.md (wvec_half): New define_mode_attr.
(<su>dot_prod<wvec_half><mode>): New define_expand.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/wide-mul-reduc-2.c (dg-final): Scan
DOT_PROD_EXPR in optimized tree.
---
gcc/config/loongarch/simd.md | 29 +++++++++++++++++++
.../gcc.target/loongarch/wide-mul-reduc-2.c | 3 +-
2 files changed, 31 insertions(+), 1 deletion(-)
@@ -90,6 +90,12 @@ (define_mode_attr WVEC_HALF [(V2DI "V1TI") (V4DI "V2TI")
(V8HI "V4SI") (V16HI "V8SI")
(V16QI "V8HI") (V32QI "V16HI")])
+;; Lower-case version.
+(define_mode_attr wvec_half [(V2DI "v1ti") (V4DI "v2ti")
+ (V4SI "v2di") (V8SI "v4di")
+ (V8HI "v4si") (V16HI "v8si")
+ (V16QI "v8hi") (V32QI "v16hi")])
+
;; Integer vector modes with the same length and unit size as a mode.
(define_mode_attr VIMODE [(V2DI "V2DI") (V4SI "V4SI")
(V8HI "V8HI") (V16QI "V16QI")
@@ -786,6 +792,29 @@ (define_expand "<simd_isa>_<x>vmaddw<ev_od>_<simdfmt_w>_<simdfmt><u>"
DONE;
})
+(define_expand "<su>dot_prod<wvec_half><mode>"
+ [(match_operand:<WVEC_HALF> 0 "register_operand" "=f,f")
+ (match_operand:IVEC 1 "register_operand" " f,f")
+ (match_operand:IVEC 2 "register_operand" " f,f")
+ (match_operand:<WVEC_HALF> 3 "reg_or_0_operand" " 0,YG")
+ (any_extend (const_int 0))]
+ ""
+{
+ auto [op0, op1, op2, op3] = operands;
+
+ if (op3 == CONST0_RTX (<WVEC_HALF>mode))
+ emit_insn (
+ gen_<simd_isa>_<x>vmulwev_<simdfmt_w>_<simdfmt><u> (op0, op1, op2));
+ else
+ emit_insn (
+ gen_<simd_isa>_<x>vmaddwev_<simdfmt_w>_<simdfmt><u> (op0, op3, op1,
+ op2));
+
+ emit_insn (
+ gen_<simd_isa>_<x>vmaddwod_<simdfmt_w>_<simdfmt><u> (op0, op0, op1, op2));
+ DONE;
+})
+
(define_insn "simd_maddw_evod_<mode>_hetero"
[(set (match_operand:<WVEC_HALF> 0 "register_operand" "=f")
(plus:<WVEC_HALF>
@@ -1,6 +1,7 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mlasx" } */
+/* { dg-options "-O2 -mlasx -fdump-tree-optimized" } */
/* { dg-final { scan-assembler "xvmaddw(ev|od)\\.d\\.w" } } */
+/* { dg-final { scan-tree-dump "DOT_PROD_EXPR" "optimized" } } */
typedef __INT32_TYPE__ i32;
typedef __INT64_TYPE__ i64;