[v3,7/8] i386: Add else operand to masked loads.
Checks
Context |
Check |
Description |
linaro-tcwg-bot/tcwg_gcc_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 |
success
|
Test passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-arm |
success
|
Test passed
|
Commit Message
From: Robin Dapp <rdapp@ventanamicro.com>
This patch adds a zero else operand to masked loads, in particular the
masked gather load builtins that are used for gather vectorization.
gcc/ChangeLog:
* config/i386/i386-expand.cc (ix86_expand_special_args_builtin):
Add else-operand handling.
(ix86_expand_builtin): Ditto.
* config/i386/predicates.md (vcvtne2ps2bf_parallel): New
predicate.
(maskload_else_operand): Ditto.
* config/i386/sse.md: Use predicate.
---
gcc/config/i386/i386-expand.cc | 26 ++++++--
gcc/config/i386/predicates.md | 4 ++
gcc/config/i386/sse.md | 112 +++++++++++++++++++++------------
3 files changed, 97 insertions(+), 45 deletions(-)
Comments
On Sat, Nov 2, 2024 at 8:58 PM Robin Dapp <rdapp.gcc@gmail.com> wrote:
>
> From: Robin Dapp <rdapp@ventanamicro.com>
>
> This patch adds a zero else operand to masked loads, in particular the
> masked gather load builtins that are used for gather vectorization.
>
> gcc/ChangeLog:
>
> * config/i386/i386-expand.cc (ix86_expand_special_args_builtin):
> Add else-operand handling.
> (ix86_expand_builtin): Ditto.
> * config/i386/predicates.md (vcvtne2ps2bf_parallel): New
> predicate.
> (maskload_else_operand): Ditto.
> * config/i386/sse.md: Use predicate.
> ---
> gcc/config/i386/i386-expand.cc | 26 ++++++--
> gcc/config/i386/predicates.md | 4 ++
> gcc/config/i386/sse.md | 112 +++++++++++++++++++++------------
> 3 files changed, 97 insertions(+), 45 deletions(-)
>
> diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> index 0de0e842731..6c61f9f87c2 100644
> --- a/gcc/config/i386/i386-expand.cc
> +++ b/gcc/config/i386/i386-expand.cc
> @@ -12995,10 +12995,11 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
> {
> tree arg;
> rtx pat, op;
> - unsigned int i, nargs, arg_adjust, memory;
> + unsigned int i, nargs, arg_adjust, memory = -1;
> unsigned int constant = 100;
> bool aligned_mem = false;
> - rtx xops[4];
> + rtx xops[4] = {};
> + bool add_els = false;
> enum insn_code icode = d->icode;
> const struct insn_data_d *insn_p = &insn_data[icode];
> machine_mode tmode = insn_p->operand[0].mode;
> @@ -13125,6 +13126,9 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
> case V4DI_FTYPE_PCV4DI_V4DI:
> case V4SI_FTYPE_PCV4SI_V4SI:
> case V2DI_FTYPE_PCV2DI_V2DI:
> + /* Two actual args but an additional else operand. */
> + add_els = true;
> + /* Fallthru. */
> case VOID_FTYPE_INT_INT64:
> nargs = 2;
> klass = load;
> @@ -13397,6 +13401,12 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
> xops[i]= op;
> }
>
> + if (add_els)
> + {
> + xops[i] = CONST0_RTX (GET_MODE (xops[0]));
> + nargs++;
> + }
> +
> switch (nargs)
> {
> case 0:
> @@ -13653,7 +13663,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
> enum insn_code icode, icode2;
> tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
> tree arg0, arg1, arg2, arg3, arg4;
> - rtx op0, op1, op2, op3, op4, pat, pat2, insn;
> + rtx op0, op1, op2, op3, op4, opels, pat, pat2, insn;
> machine_mode mode0, mode1, mode2, mode3, mode4;
> unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
> HOST_WIDE_INT bisa, bisa2;
> @@ -15560,12 +15570,15 @@ rdseed_step:
> op3 = copy_to_reg (op3);
> op3 = lowpart_subreg (mode3, op3, GET_MODE (op3));
> }
> +
> if (!insn_data[icode].operand[5].predicate (op4, mode4))
> {
> - error ("the last argument must be scale 1, 2, 4, 8");
> - return const0_rtx;
> + error ("the last argument must be scale 1, 2, 4, 8");
> + return const0_rtx;
> }
>
> + opels = CONST0_RTX (GET_MODE (subtarget));
> +
> /* Optimize. If mask is known to have all high bits set,
> replace op0 with pc_rtx to signal that the instruction
> overwrites the whole destination and doesn't use its
> @@ -15634,7 +15647,8 @@ rdseed_step:
> }
> }
>
> - pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
> + pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4, opels);
> +
> if (! pat)
> return const0_rtx;
> emit_insn (pat);
> diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
> index 053312bbe27..7c7d8f61f11 100644
> --- a/gcc/config/i386/predicates.md
> +++ b/gcc/config/i386/predicates.md
> @@ -2346,3 +2346,7 @@ (define_predicate "apx_evex_add_memory_operand"
>
> return true;
> })
> +
> +(define_predicate "maskload_else_operand"
> + (and (match_code "const_int,const_vector")
> + (match_test "op == CONST0_RTX (GET_MODE (op))")))
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 36f8567b66f..41c1badbc00 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -28632,7 +28632,7 @@ (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
> (set_attr "btver2_decode" "vector")
> (set_attr "mode" "<sseinsnmode>")])
>
> -(define_expand "maskload<mode><sseintvecmodelower>"
> +(define_expand "maskload<mode><sseintvecmodelower>_1"
> [(set (match_operand:V48_128_256 0 "register_operand")
> (unspec:V48_128_256
> [(match_operand:<sseintvecmode> 2 "register_operand")
> @@ -28640,13 +28640,28 @@ (define_expand "maskload<mode><sseintvecmodelower>"
> UNSPEC_MASKMOV))]
> "TARGET_AVX")
>
> +(define_expand "maskload<mode><sseintvecmodelower>"
> + [(set (match_operand:V48_128_256 0 "register_operand")
> + (unspec:V48_128_256
> + [(match_operand:<sseintvecmode> 2 "register_operand")
> + (match_operand:V48_128_256 1 "memory_operand")
> + (match_operand:V48_128_256 3 "const0_operand")]
> + UNSPEC_MASKMOV))]
> + "TARGET_AVX"
> +{
> + emit_insn (gen_maskload<mode><sseintvecmodelower>_1 (operands[0],
> + operands[1],
> + operands[2]));
> + DONE;
> +})
> +
> (define_expand "maskload<mode><avx512fmaskmodelower>"
> [(set (match_operand:V48_AVX512VL 0 "register_operand")
> (vec_merge:V48_AVX512VL
> (unspec:V48_AVX512VL
> [(match_operand:V48_AVX512VL 1 "memory_operand")]
> UNSPEC_MASKLOAD)
> - (match_dup 0)
> + (match_operand:V48_AVX512VL 3 "const0_operand")
> (match_operand:<avx512fmaskmode> 2 "register_operand")))]
> "TARGET_AVX512F")
>
> @@ -28656,8 +28671,9 @@ (define_expand "maskload<mode><avx512fmaskmodelower>"
> (unspec:VI12HFBF_AVX512VL
> [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand")]
> UNSPEC_MASKLOAD)
> - (match_dup 0)
> - (match_operand:<avx512fmaskmode> 2 "register_operand")))]
> + (match_operand:VI12HFBF_AVX512VL 3 "const0_operand")
> + (match_operand:<avx512fmaskmode> 2 "register_operand")))
> + ]
> "TARGET_AVX512BW")
>
> (define_expand "maskstore<mode><sseintvecmodelower>"
> @@ -29223,20 +29239,22 @@ (define_expand "avx2_gathersi<mode>"
> (unspec:VEC_GATHER_MODE
> [(match_operand:VEC_GATHER_MODE 1 "register_operand")
> (mem:<ssescalarmode>
> - (match_par_dup 6
> + (match_par_dup 7
> [(match_operand 2 "vsib_address_operand")
> (match_operand:<VEC_GATHER_IDXSI>
> 3 "register_operand")
> - (match_operand:SI 5 "const1248_operand ")]))
> + (match_operand:SI 5 "const1248_operand ")
> + (match_operand:VEC_GATHER_MODE 6 "maskload_else_operand")]))
> (mem:BLK (scratch))
> (match_operand:VEC_GATHER_MODE 4 "register_operand")]
> UNSPEC_GATHER))
> - (clobber (match_scratch:VEC_GATHER_MODE 7))])]
> + (clobber (match_scratch:VEC_GATHER_MODE 8))])]
> "TARGET_AVX2"
> {
> - operands[6]
> - = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
> - operands[5]), UNSPEC_VSIBADDR);
> + operands[7]
> + = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3],
> + operands[5], operands[6]),
> + UNSPEC_VSIBADDR);
> })
>
> (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>"
> @@ -29247,7 +29265,8 @@ (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>"
> [(unspec:P
> [(match_operand:P 3 "vsib_address_operand" "jb")
> (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
> - (match_operand:SI 6 "const1248_operand")]
> + (match_operand:SI 6 "const1248_operand")
> + (match_operand:VEC_GATHER_MODE 8 "maskload_else_operand")]
> UNSPEC_VSIBADDR)])
> (mem:BLK (scratch))
> (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
> @@ -29268,7 +29287,8 @@ (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>_2"
> [(unspec:P
> [(match_operand:P 2 "vsib_address_operand" "jb")
> (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
> - (match_operand:SI 5 "const1248_operand")]
> + (match_operand:SI 5 "const1248_operand")
> + (match_operand:VEC_GATHER_MODE 7 "maskload_else_operand")]
> UNSPEC_VSIBADDR)])
> (mem:BLK (scratch))
> (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
> @@ -29286,20 +29306,22 @@ (define_expand "avx2_gatherdi<mode>"
> (unspec:VEC_GATHER_MODE
> [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
> (mem:<ssescalarmode>
> - (match_par_dup 6
> + (match_par_dup 7
> [(match_operand 2 "vsib_address_operand")
> (match_operand:<VEC_GATHER_IDXDI>
> 3 "register_operand")
> - (match_operand:SI 5 "const1248_operand ")]))
> + (match_operand:SI 5 "const1248_operand ")
> + (match_operand:VEC_GATHER_MODE 6 "maskload_else_operand")]))
> (mem:BLK (scratch))
> (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
> UNSPEC_GATHER))
> - (clobber (match_scratch:VEC_GATHER_MODE 7))])]
> + (clobber (match_scratch:VEC_GATHER_MODE 8))])]
> "TARGET_AVX2"
> {
> - operands[6]
> - = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
> - operands[5]), UNSPEC_VSIBADDR);
> + operands[7]
> + = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3],
> + operands[5], operands[6]),
> + UNSPEC_VSIBADDR);
> })
>
x86 doesn't define mask_gather_loadmn, so I think you can drop this
and all related, only keep the patch I give you in [1]
Sorry I didn't make that clear last time.
[1] https://gcc.gnu.org/pipermail/gcc-patches/2024-October/666814.html
> (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>"
> @@ -29310,7 +29332,8 @@ (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>"
> [(unspec:P
> [(match_operand:P 3 "vsib_address_operand" "jb")
> (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
> - (match_operand:SI 6 "const1248_operand")]
> + (match_operand:SI 6 "const1248_operand")
> + (match_operand:VEC_GATHER_MODE 8 "maskload_else_operand")]
> UNSPEC_VSIBADDR)])
> (mem:BLK (scratch))
> (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
> @@ -29331,7 +29354,8 @@ (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>_2"
> [(unspec:P
> [(match_operand:P 2 "vsib_address_operand" "jb")
> (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
> - (match_operand:SI 5 "const1248_operand")]
> + (match_operand:SI 5 "const1248_operand")
> + (match_operand:VEC_GATHER_MODE 7 "maskload_else_operand")]
> UNSPEC_VSIBADDR)])
> (mem:BLK (scratch))
> (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
> @@ -29357,7 +29381,8 @@ (define_insn "*avx2_gatherdi<VI4F_256:mode>_3"
> [(unspec:P
> [(match_operand:P 3 "vsib_address_operand" "jb")
> (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
> - (match_operand:SI 6 "const1248_operand")]
> + (match_operand:SI 6 "const1248_operand")
> + (match_operand:VI4F_256 8 "maskload_else_operand")]
> UNSPEC_VSIBADDR)])
> (mem:BLK (scratch))
> (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
> @@ -29381,7 +29406,8 @@ (define_insn "*avx2_gatherdi<VI4F_256:mode>_4"
> [(unspec:P
> [(match_operand:P 2 "vsib_address_operand" "jb")
> (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
> - (match_operand:SI 5 "const1248_operand")]
> + (match_operand:SI 5 "const1248_operand")
> + (match_operand:VI4F_256 7 "maskload_else_operand")]
> UNSPEC_VSIBADDR)])
> (mem:BLK (scratch))
> (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
> @@ -29402,17 +29428,19 @@ (define_expand "<avx512>_gathersi<mode>"
> [(match_operand:VI48F 1 "register_operand")
> (match_operand:<avx512fmaskmode> 4 "register_operand")
> (mem:<ssescalarmode>
> - (match_par_dup 6
> + (match_par_dup 7
> [(match_operand 2 "vsib_address_operand")
> (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
> - (match_operand:SI 5 "const1248_operand")]))]
> + (match_operand:SI 5 "const1248_operand")
> + (match_operand:VI48F 6 "maskload_else_operand")]))]
> UNSPEC_GATHER))
> - (clobber (match_scratch:<avx512fmaskmode> 7))])]
> + (clobber (match_scratch:<avx512fmaskmode> 8))])]
> "TARGET_AVX512F"
> {
> - operands[6]
> - = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
> - operands[5]), UNSPEC_VSIBADDR);
> + operands[7]
> + = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3],
> + operands[5], operands[6]),
> + UNSPEC_VSIBADDR);
> })
>
> (define_insn "*avx512f_gathersi<VI48F:mode>"
> @@ -29424,7 +29452,8 @@ (define_insn "*avx512f_gathersi<VI48F:mode>"
> [(unspec:P
> [(match_operand:P 4 "vsib_address_operand" "Tv")
> (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
> - (match_operand:SI 5 "const1248_operand")]
> + (match_operand:SI 5 "const1248_operand")
> + (match_operand:VI48F 8 "maskload_else_operand")]
> UNSPEC_VSIBADDR)])]
> UNSPEC_GATHER))
> (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
> @@ -29445,7 +29474,8 @@ (define_insn "*avx512f_gathersi<VI48F:mode>_2"
> [(unspec:P
> [(match_operand:P 3 "vsib_address_operand" "Tv")
> (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
> - (match_operand:SI 4 "const1248_operand")]
> + (match_operand:SI 4 "const1248_operand")
> + (match_operand:VI48F 7 "maskload_else_operand")]
> UNSPEC_VSIBADDR)])]
> UNSPEC_GATHER))
> (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
> @@ -29464,17 +29494,19 @@ (define_expand "<avx512>_gatherdi<mode>"
> [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
> (match_operand:QI 4 "register_operand")
> (mem:<ssescalarmode>
> - (match_par_dup 6
> + (match_par_dup 7
> [(match_operand 2 "vsib_address_operand")
> (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
> - (match_operand:SI 5 "const1248_operand")]))]
> + (match_operand:SI 5 "const1248_operand")
> + (match_operand:VI48F 6 "maskload_else_operand")]))]
> UNSPEC_GATHER))
> - (clobber (match_scratch:QI 7))])]
> + (clobber (match_scratch:QI 8))])]
> "TARGET_AVX512F"
> {
> - operands[6]
> - = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
> - operands[5]), UNSPEC_VSIBADDR);
> + operands[7]
> + = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3],
> + operands[5], operands[6]),
> + UNSPEC_VSIBADDR);
> })
>
> (define_insn "*avx512f_gatherdi<VI48F:mode>"
> @@ -29486,7 +29518,8 @@ (define_insn "*avx512f_gatherdi<VI48F:mode>"
> [(unspec:P
> [(match_operand:P 4 "vsib_address_operand" "Tv")
> (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
> - (match_operand:SI 5 "const1248_operand")]
> + (match_operand:SI 5 "const1248_operand")
> + (match_operand:VI48F 8 "maskload_else_operand")]
> UNSPEC_VSIBADDR)])]
> UNSPEC_GATHER))
> (clobber (match_scratch:QI 2 "=&Yk"))]
> @@ -29507,7 +29540,8 @@ (define_insn "*avx512f_gatherdi<VI48F:mode>_2"
> [(unspec:P
> [(match_operand:P 3 "vsib_address_operand" "Tv")
> (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
> - (match_operand:SI 4 "const1248_operand")]
> + (match_operand:SI 4 "const1248_operand")
> + (match_operand:VI48F 7 "maskload_else_operand")]
> UNSPEC_VSIBADDR)])]
> UNSPEC_GATHER))
> (clobber (match_scratch:QI 1 "=&Yk"))]
> @@ -29544,7 +29578,7 @@ (define_expand "<avx512>_scattersi<mode>"
> operands[5]
> = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2],
> operands[4], operands[1]),
> - UNSPEC_VSIBADDR);
> + UNSPEC_VSIBADDR);
> })
>
> (define_insn "*avx512f_scattersi<VI48F:mode>"
> --
> 2.47.0
>
> x86 doesn't define mask_gather_loadmn, so I think you can drop this
> and all related, only keep the patch I give you in [1]
> Sorry I didn't make that clear last time.
Yes, that works, thanks. Will post a v4 soon.
@@ -12995,10 +12995,11 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
{
tree arg;
rtx pat, op;
- unsigned int i, nargs, arg_adjust, memory;
+ unsigned int i, nargs, arg_adjust, memory = -1;
unsigned int constant = 100;
bool aligned_mem = false;
- rtx xops[4];
+ rtx xops[4] = {};
+ bool add_els = false;
enum insn_code icode = d->icode;
const struct insn_data_d *insn_p = &insn_data[icode];
machine_mode tmode = insn_p->operand[0].mode;
@@ -13125,6 +13126,9 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
case V4DI_FTYPE_PCV4DI_V4DI:
case V4SI_FTYPE_PCV4SI_V4SI:
case V2DI_FTYPE_PCV2DI_V2DI:
+ /* Two actual args but an additional else operand. */
+ add_els = true;
+ /* Fallthru. */
case VOID_FTYPE_INT_INT64:
nargs = 2;
klass = load;
@@ -13397,6 +13401,12 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
xops[i]= op;
}
+ if (add_els)
+ {
+ xops[i] = CONST0_RTX (GET_MODE (xops[0]));
+ nargs++;
+ }
+
switch (nargs)
{
case 0:
@@ -13653,7 +13663,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
enum insn_code icode, icode2;
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
tree arg0, arg1, arg2, arg3, arg4;
- rtx op0, op1, op2, op3, op4, pat, pat2, insn;
+ rtx op0, op1, op2, op3, op4, opels, pat, pat2, insn;
machine_mode mode0, mode1, mode2, mode3, mode4;
unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
HOST_WIDE_INT bisa, bisa2;
@@ -15560,12 +15570,15 @@ rdseed_step:
op3 = copy_to_reg (op3);
op3 = lowpart_subreg (mode3, op3, GET_MODE (op3));
}
+
if (!insn_data[icode].operand[5].predicate (op4, mode4))
{
- error ("the last argument must be scale 1, 2, 4, 8");
- return const0_rtx;
+ error ("the last argument must be scale 1, 2, 4, 8");
+ return const0_rtx;
}
+ opels = CONST0_RTX (GET_MODE (subtarget));
+
/* Optimize. If mask is known to have all high bits set,
replace op0 with pc_rtx to signal that the instruction
overwrites the whole destination and doesn't use its
@@ -15634,7 +15647,8 @@ rdseed_step:
}
}
- pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
+ pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4, opels);
+
if (! pat)
return const0_rtx;
emit_insn (pat);
@@ -2346,3 +2346,7 @@ (define_predicate "apx_evex_add_memory_operand"
return true;
})
+
+(define_predicate "maskload_else_operand"
+ (and (match_code "const_int,const_vector")
+ (match_test "op == CONST0_RTX (GET_MODE (op))")))
@@ -28632,7 +28632,7 @@ (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
(set_attr "btver2_decode" "vector")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "maskload<mode><sseintvecmodelower>"
+(define_expand "maskload<mode><sseintvecmodelower>_1"
[(set (match_operand:V48_128_256 0 "register_operand")
(unspec:V48_128_256
[(match_operand:<sseintvecmode> 2 "register_operand")
@@ -28640,13 +28640,28 @@ (define_expand "maskload<mode><sseintvecmodelower>"
UNSPEC_MASKMOV))]
"TARGET_AVX")
+(define_expand "maskload<mode><sseintvecmodelower>"
+ [(set (match_operand:V48_128_256 0 "register_operand")
+ (unspec:V48_128_256
+ [(match_operand:<sseintvecmode> 2 "register_operand")
+ (match_operand:V48_128_256 1 "memory_operand")
+ (match_operand:V48_128_256 3 "const0_operand")]
+ UNSPEC_MASKMOV))]
+ "TARGET_AVX"
+{
+ emit_insn (gen_maskload<mode><sseintvecmodelower>_1 (operands[0],
+ operands[1],
+ operands[2]));
+ DONE;
+})
+
(define_expand "maskload<mode><avx512fmaskmodelower>"
[(set (match_operand:V48_AVX512VL 0 "register_operand")
(vec_merge:V48_AVX512VL
(unspec:V48_AVX512VL
[(match_operand:V48_AVX512VL 1 "memory_operand")]
UNSPEC_MASKLOAD)
- (match_dup 0)
+ (match_operand:V48_AVX512VL 3 "const0_operand")
(match_operand:<avx512fmaskmode> 2 "register_operand")))]
"TARGET_AVX512F")
@@ -28656,8 +28671,9 @@ (define_expand "maskload<mode><avx512fmaskmodelower>"
(unspec:VI12HFBF_AVX512VL
[(match_operand:VI12HFBF_AVX512VL 1 "memory_operand")]
UNSPEC_MASKLOAD)
- (match_dup 0)
- (match_operand:<avx512fmaskmode> 2 "register_operand")))]
+ (match_operand:VI12HFBF_AVX512VL 3 "const0_operand")
+ (match_operand:<avx512fmaskmode> 2 "register_operand")))
+ ]
"TARGET_AVX512BW")
(define_expand "maskstore<mode><sseintvecmodelower>"
@@ -29223,20 +29239,22 @@ (define_expand "avx2_gathersi<mode>"
(unspec:VEC_GATHER_MODE
[(match_operand:VEC_GATHER_MODE 1 "register_operand")
(mem:<ssescalarmode>
- (match_par_dup 6
+ (match_par_dup 7
[(match_operand 2 "vsib_address_operand")
(match_operand:<VEC_GATHER_IDXSI>
3 "register_operand")
- (match_operand:SI 5 "const1248_operand ")]))
+ (match_operand:SI 5 "const1248_operand ")
+ (match_operand:VEC_GATHER_MODE 6 "maskload_else_operand")]))
(mem:BLK (scratch))
(match_operand:VEC_GATHER_MODE 4 "register_operand")]
UNSPEC_GATHER))
- (clobber (match_scratch:VEC_GATHER_MODE 7))])]
+ (clobber (match_scratch:VEC_GATHER_MODE 8))])]
"TARGET_AVX2"
{
- operands[6]
- = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
- operands[5]), UNSPEC_VSIBADDR);
+ operands[7]
+ = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3],
+ operands[5], operands[6]),
+ UNSPEC_VSIBADDR);
})
(define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>"
@@ -29247,7 +29265,8 @@ (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>"
[(unspec:P
[(match_operand:P 3 "vsib_address_operand" "jb")
(match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
- (match_operand:SI 6 "const1248_operand")]
+ (match_operand:SI 6 "const1248_operand")
+ (match_operand:VEC_GATHER_MODE 8 "maskload_else_operand")]
UNSPEC_VSIBADDR)])
(mem:BLK (scratch))
(match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
@@ -29268,7 +29287,8 @@ (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>_2"
[(unspec:P
[(match_operand:P 2 "vsib_address_operand" "jb")
(match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
- (match_operand:SI 5 "const1248_operand")]
+ (match_operand:SI 5 "const1248_operand")
+ (match_operand:VEC_GATHER_MODE 7 "maskload_else_operand")]
UNSPEC_VSIBADDR)])
(mem:BLK (scratch))
(match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
@@ -29286,20 +29306,22 @@ (define_expand "avx2_gatherdi<mode>"
(unspec:VEC_GATHER_MODE
[(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
(mem:<ssescalarmode>
- (match_par_dup 6
+ (match_par_dup 7
[(match_operand 2 "vsib_address_operand")
(match_operand:<VEC_GATHER_IDXDI>
3 "register_operand")
- (match_operand:SI 5 "const1248_operand ")]))
+ (match_operand:SI 5 "const1248_operand ")
+ (match_operand:VEC_GATHER_MODE 6 "maskload_else_operand")]))
(mem:BLK (scratch))
(match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
UNSPEC_GATHER))
- (clobber (match_scratch:VEC_GATHER_MODE 7))])]
+ (clobber (match_scratch:VEC_GATHER_MODE 8))])]
"TARGET_AVX2"
{
- operands[6]
- = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
- operands[5]), UNSPEC_VSIBADDR);
+ operands[7]
+ = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3],
+ operands[5], operands[6]),
+ UNSPEC_VSIBADDR);
})
(define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>"
@@ -29310,7 +29332,8 @@ (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>"
[(unspec:P
[(match_operand:P 3 "vsib_address_operand" "jb")
(match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
- (match_operand:SI 6 "const1248_operand")]
+ (match_operand:SI 6 "const1248_operand")
+ (match_operand:VEC_GATHER_MODE 8 "maskload_else_operand")]
UNSPEC_VSIBADDR)])
(mem:BLK (scratch))
(match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
@@ -29331,7 +29354,8 @@ (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>_2"
[(unspec:P
[(match_operand:P 2 "vsib_address_operand" "jb")
(match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
- (match_operand:SI 5 "const1248_operand")]
+ (match_operand:SI 5 "const1248_operand")
+ (match_operand:VEC_GATHER_MODE 7 "maskload_else_operand")]
UNSPEC_VSIBADDR)])
(mem:BLK (scratch))
(match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
@@ -29357,7 +29381,8 @@ (define_insn "*avx2_gatherdi<VI4F_256:mode>_3"
[(unspec:P
[(match_operand:P 3 "vsib_address_operand" "jb")
(match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
- (match_operand:SI 6 "const1248_operand")]
+ (match_operand:SI 6 "const1248_operand")
+ (match_operand:VI4F_256 8 "maskload_else_operand")]
UNSPEC_VSIBADDR)])
(mem:BLK (scratch))
(match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
@@ -29381,7 +29406,8 @@ (define_insn "*avx2_gatherdi<VI4F_256:mode>_4"
[(unspec:P
[(match_operand:P 2 "vsib_address_operand" "jb")
(match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
- (match_operand:SI 5 "const1248_operand")]
+ (match_operand:SI 5 "const1248_operand")
+ (match_operand:VI4F_256 7 "maskload_else_operand")]
UNSPEC_VSIBADDR)])
(mem:BLK (scratch))
(match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
@@ -29402,17 +29428,19 @@ (define_expand "<avx512>_gathersi<mode>"
[(match_operand:VI48F 1 "register_operand")
(match_operand:<avx512fmaskmode> 4 "register_operand")
(mem:<ssescalarmode>
- (match_par_dup 6
+ (match_par_dup 7
[(match_operand 2 "vsib_address_operand")
(match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
- (match_operand:SI 5 "const1248_operand")]))]
+ (match_operand:SI 5 "const1248_operand")
+ (match_operand:VI48F 6 "maskload_else_operand")]))]
UNSPEC_GATHER))
- (clobber (match_scratch:<avx512fmaskmode> 7))])]
+ (clobber (match_scratch:<avx512fmaskmode> 8))])]
"TARGET_AVX512F"
{
- operands[6]
- = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
- operands[5]), UNSPEC_VSIBADDR);
+ operands[7]
+ = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3],
+ operands[5], operands[6]),
+ UNSPEC_VSIBADDR);
})
(define_insn "*avx512f_gathersi<VI48F:mode>"
@@ -29424,7 +29452,8 @@ (define_insn "*avx512f_gathersi<VI48F:mode>"
[(unspec:P
[(match_operand:P 4 "vsib_address_operand" "Tv")
(match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
- (match_operand:SI 5 "const1248_operand")]
+ (match_operand:SI 5 "const1248_operand")
+ (match_operand:VI48F 8 "maskload_else_operand")]
UNSPEC_VSIBADDR)])]
UNSPEC_GATHER))
(clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
@@ -29445,7 +29474,8 @@ (define_insn "*avx512f_gathersi<VI48F:mode>_2"
[(unspec:P
[(match_operand:P 3 "vsib_address_operand" "Tv")
(match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
- (match_operand:SI 4 "const1248_operand")]
+ (match_operand:SI 4 "const1248_operand")
+ (match_operand:VI48F 7 "maskload_else_operand")]
UNSPEC_VSIBADDR)])]
UNSPEC_GATHER))
(clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
@@ -29464,17 +29494,19 @@ (define_expand "<avx512>_gatherdi<mode>"
[(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
(match_operand:QI 4 "register_operand")
(mem:<ssescalarmode>
- (match_par_dup 6
+ (match_par_dup 7
[(match_operand 2 "vsib_address_operand")
(match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
- (match_operand:SI 5 "const1248_operand")]))]
+ (match_operand:SI 5 "const1248_operand")
+ (match_operand:VI48F 6 "maskload_else_operand")]))]
UNSPEC_GATHER))
- (clobber (match_scratch:QI 7))])]
+ (clobber (match_scratch:QI 8))])]
"TARGET_AVX512F"
{
- operands[6]
- = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
- operands[5]), UNSPEC_VSIBADDR);
+ operands[7]
+ = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3],
+ operands[5], operands[6]),
+ UNSPEC_VSIBADDR);
})
(define_insn "*avx512f_gatherdi<VI48F:mode>"
@@ -29486,7 +29518,8 @@ (define_insn "*avx512f_gatherdi<VI48F:mode>"
[(unspec:P
[(match_operand:P 4 "vsib_address_operand" "Tv")
(match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
- (match_operand:SI 5 "const1248_operand")]
+ (match_operand:SI 5 "const1248_operand")
+ (match_operand:VI48F 8 "maskload_else_operand")]
UNSPEC_VSIBADDR)])]
UNSPEC_GATHER))
(clobber (match_scratch:QI 2 "=&Yk"))]
@@ -29507,7 +29540,8 @@ (define_insn "*avx512f_gatherdi<VI48F:mode>_2"
[(unspec:P
[(match_operand:P 3 "vsib_address_operand" "Tv")
(match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
- (match_operand:SI 4 "const1248_operand")]
+ (match_operand:SI 4 "const1248_operand")
+ (match_operand:VI48F 7 "maskload_else_operand")]
UNSPEC_VSIBADDR)])]
UNSPEC_GATHER))
(clobber (match_scratch:QI 1 "=&Yk"))]
@@ -29544,7 +29578,7 @@ (define_expand "<avx512>_scattersi<mode>"
operands[5]
= gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2],
operands[4], operands[1]),
- UNSPEC_VSIBADDR);
+ UNSPEC_VSIBADDR);
})
(define_insn "*avx512f_scattersi<VI48F:mode>"