[4/5] RISC-V: Implement the MASK_LEN_STRIDED_LOAD{STORE}
Checks
Context |
Check |
Description |
rivoscibot/toolchain-ci-rivos-apply-patch |
success
|
Patch applied
|
rivoscibot/toolchain-ci-rivos-lint |
success
|
Lint passed
|
rivoscibot/toolchain-ci-rivos-build--newlib-rv64gcv-lp64d-multilib |
success
|
Build passed
|
rivoscibot/toolchain-ci-rivos-build--linux-rv64gcv-lp64d-multilib |
success
|
Build passed
|
rivoscibot/toolchain-ci-rivos-build--linux-rv64gc_zba_zbb_zbc_zbs-lp64d-multilib |
success
|
Build passed
|
rivoscibot/toolchain-ci-rivos-build--newlib-rv64gc-lp64d-non-multilib |
success
|
Build passed
|
rivoscibot/toolchain-ci-rivos-build--linux-rv64gc-lp64d-non-multilib |
success
|
Build passed
|
rivoscibot/toolchain-ci-rivos-test |
fail
|
Testing failed
|
linaro-tcwg-bot/tcwg_gcc_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 |
success
|
Build passed
|
Commit Message
From: Pan Li <pan2.li@intel.com>
This patch would like to implment the MASK_LEN_STRIDED_LOAD{STORE} in
the RISC-V backend by leveraging the vector strided load/store insn.
For example:
void foo (int * __restrict a, int * __restrict b, int stride, int n)
{
for (int i = 0; i < n; i++)
a[i*stride] = b[i*stride] + 100;
}
Before this patch:
38 │ vsetvli a5,a3,e32,m1,ta,ma
39 │ vluxei64.v v1,(a1),v4
40 │ mul a4,a2,a5
41 │ sub a3,a3,a5
42 │ vadd.vv v1,v1,v2
43 │ vsuxei64.v v1,(a0),v4
44 │ add a1,a1,a4
45 │ add a0,a0,a4
After this patch:
33 │ vsetvli a5,a3,e32,m1,ta,ma
34 │ vlse32.v v1,0(a1),a2
35 │ mul a4,a2,a5
36 │ sub a3,a3,a5
37 │ vadd.vv v1,v1,v2
38 │ vsse32.v v1,0(a0),a2
39 │ add a1,a1,a4
40 │ add a0,a0,a4
The below test suites are passed for this patch:
* The riscv fully regression test.
gcc/ChangeLog:
* config/riscv/autovec.md (mask_len_strided_load_<mode>): Add
new pattern for MASK_LEN_STRIDED_LOAD.
(mask_len_strided_store_<mode>): Ditto but for store.
* config/riscv/riscv-protos.h (expand_strided_load): Add new
func decl to expand strided load.
(expand_strided_store): Ditto but for store.
* config/riscv/riscv-v.cc (expand_strided_load): Add new
func impl to expand strided load.
(expand_strided_store): Ditto but for store.
Signed-off-by: Pan Li <pan2.li@intel.com>
Co-Authored-By: Juzhe-Zhong <juzhe.zhong@rivai.ai>
---
gcc/config/riscv/autovec.md | 29 ++++++++++++++++++
gcc/config/riscv/riscv-protos.h | 2 ++
gcc/config/riscv/riscv-v.cc | 52 +++++++++++++++++++++++++++++++++
3 files changed, 83 insertions(+)
Comments
> +(define_expand "mask_len_strided_store_<mode>"
> + [(match_operand 0 "pmode_reg_or_0_operand")
> + (match_operand 1 "pmode_reg_or_0_operand")
> + (match_operand:V 2 "register_operand")
> + (match_operand:<VM> 3 "vector_mask_operand")
> + (match_operand 4 "autovec_length_operand")
> + (match_operand 5 "const_0_operand")]
> + "TARGET_VECTOR"
> + {
> + riscv_vector::expand_strided_store(<MODE>mode, operands);
Nit, space before '('.
LGTM with that fixed and once the middle-end changes are in.
> Nit, space before '('.
> LGTM with that fixed and once the middle-end changes are in.
Got it, thanks Robin.
Pan
-----Original Message-----
From: Robin Dapp <rdapp.gcc@gmail.com>
Sent: Friday, October 25, 2024 11:56 PM
To: Li, Pan2 <pan2.li@intel.com>; gcc-patches@gcc.gnu.org
Cc: richard.guenther@gmail.com; Tamar.Christina@arm.com; juzhe.zhong@rivai.ai; kito.cheng@gmail.com; jeffreyalaw@gmail.com; Robin Dapp <rdapp.gcc@gmail.com>
Subject: Re: [PATCH 4/5] RISC-V: Implement the MASK_LEN_STRIDED_LOAD{STORE}
> +(define_expand "mask_len_strided_store_<mode>"
> + [(match_operand 0 "pmode_reg_or_0_operand")
> + (match_operand 1 "pmode_reg_or_0_operand")
> + (match_operand:V 2 "register_operand")
> + (match_operand:<VM> 3 "vector_mask_operand")
> + (match_operand 4 "autovec_length_operand")
> + (match_operand 5 "const_0_operand")]
> + "TARGET_VECTOR"
> + {
> + riscv_vector::expand_strided_store(<MODE>mode, operands);
Nit, space before '('.
LGTM with that fixed and once the middle-end changes are in.
--
Regards
Robin
@@ -2855,3 +2855,32 @@ (define_expand "v<bitmanip_optab><mode>3"
DONE;
}
)
+
+;; =========================================================================
+;; == Strided Load/Store
+;; =========================================================================
+(define_expand "mask_len_strided_load_<mode>"
+ [(match_operand:V 0 "register_operand")
+ (match_operand 1 "pmode_reg_or_0_operand")
+ (match_operand 2 "pmode_reg_or_0_operand")
+ (match_operand:<VM> 3 "vector_mask_operand")
+ (match_operand 4 "autovec_length_operand")
+ (match_operand 5 "const_0_operand")]
+ "TARGET_VECTOR"
+ {
+ riscv_vector::expand_strided_load (<MODE>mode, operands);
+ DONE;
+ })
+
+(define_expand "mask_len_strided_store_<mode>"
+ [(match_operand 0 "pmode_reg_or_0_operand")
+ (match_operand 1 "pmode_reg_or_0_operand")
+ (match_operand:V 2 "register_operand")
+ (match_operand:<VM> 3 "vector_mask_operand")
+ (match_operand 4 "autovec_length_operand")
+ (match_operand 5 "const_0_operand")]
+ "TARGET_VECTOR"
+ {
+ riscv_vector::expand_strided_store(<MODE>mode, operands);
+ DONE;
+ })
@@ -696,6 +696,8 @@ bool expand_strcmp (rtx, rtx, rtx, rtx, unsigned HOST_WIDE_INT, bool);
void emit_vec_extract (rtx, rtx, rtx);
bool expand_vec_setmem (rtx, rtx, rtx);
bool expand_vec_cmpmem (rtx, rtx, rtx, rtx);
+void expand_strided_load (machine_mode, rtx *);
+void expand_strided_store (machine_mode, rtx *);
/* Rounding mode bitfield for fixed point VXRM. */
enum fixed_point_rounding_mode
@@ -3833,6 +3833,58 @@ expand_load_store (rtx *ops, bool is_load)
}
}
+/* Expand MASK_LEN_STRIDED_LOAD. */
+void
+expand_strided_load (machine_mode mode, rtx *ops)
+{
+ rtx v_reg = ops[0];
+ rtx base = ops[1];
+ rtx stride = ops[2];
+ rtx mask = ops[3];
+ rtx len = ops[4];
+ poly_int64 len_val;
+
+ insn_code icode = code_for_pred_strided_load (mode);
+ rtx emit_ops[] = {v_reg, mask, gen_rtx_MEM (mode, base), stride};
+
+ if (poly_int_rtx_p (len, &len_val)
+ && known_eq (len_val, GET_MODE_NUNITS (mode)))
+ emit_vlmax_insn (icode, BINARY_OP_TAMA, emit_ops);
+ else
+ {
+ len = satisfies_constraint_K (len) ? len : force_reg (Pmode, len);
+ emit_nonvlmax_insn (icode, BINARY_OP_TAMA, emit_ops, len);
+ }
+}
+
+/* Expand MASK_LEN_STRIDED_STORE. */
+void
+expand_strided_store (machine_mode mode, rtx *ops)
+{
+ rtx v_reg = ops[2];
+ rtx base = ops[0];
+ rtx stride = ops[1];
+ rtx mask = ops[3];
+ rtx len = ops[4];
+ poly_int64 len_val;
+ rtx vl_type;
+
+ if (poly_int_rtx_p (len, &len_val)
+ && known_eq (len_val, GET_MODE_NUNITS (mode)))
+ {
+ len = gen_reg_rtx (Pmode);
+ emit_vlmax_vsetvl (mode, len);
+ vl_type = get_avl_type_rtx (VLMAX);
+ }
+ else
+ {
+ len = satisfies_constraint_K (len) ? len : force_reg (Pmode, len);
+ vl_type = get_avl_type_rtx (NONVLMAX);
+ }
+
+ emit_insn (gen_pred_strided_store (mode, gen_rtx_MEM (mode, base),
+ mask, stride, v_reg, len, vl_type));
+}
/* Return true if the operation is the floating-point operation need FRM. */
static bool