[v3,7/8] i386: Add else operand to masked loads.

Message ID 20241102125828.29183-8-rdapp.gcc@gmail.com
State New
Headers
Series Rebased: Add maskload else operand. |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 success Test passed
linaro-tcwg-bot/tcwg_gcc_check--master-arm success Test passed

Commit Message

Robin Dapp Nov. 2, 2024, 12:58 p.m. UTC
  From: Robin Dapp <rdapp@ventanamicro.com>

This patch adds a zero else operand to masked loads, in particular the
masked gather load builtins that are used for gather vectorization.

gcc/ChangeLog:

	* config/i386/i386-expand.cc (ix86_expand_special_args_builtin):
	Add else-operand handling.
	(ix86_expand_builtin): Ditto.
	* config/i386/predicates.md (vcvtne2ps2bf_parallel): New
	predicate.
	(maskload_else_operand): Ditto.
	* config/i386/sse.md: Use predicate.
---
 gcc/config/i386/i386-expand.cc |  26 ++++++--
 gcc/config/i386/predicates.md  |   4 ++
 gcc/config/i386/sse.md         | 112 +++++++++++++++++++++------------
 3 files changed, 97 insertions(+), 45 deletions(-)
  

Comments

Hongtao Liu Nov. 4, 2024, 7:25 a.m. UTC | #1
On Sat, Nov 2, 2024 at 8:58 PM Robin Dapp <rdapp.gcc@gmail.com> wrote:
>
> From: Robin Dapp <rdapp@ventanamicro.com>
>
> This patch adds a zero else operand to masked loads, in particular the
> masked gather load builtins that are used for gather vectorization.
>
> gcc/ChangeLog:
>
>         * config/i386/i386-expand.cc (ix86_expand_special_args_builtin):
>         Add else-operand handling.
>         (ix86_expand_builtin): Ditto.
>         * config/i386/predicates.md (vcvtne2ps2bf_parallel): New
>         predicate.
>         (maskload_else_operand): Ditto.
>         * config/i386/sse.md: Use predicate.
> ---
>  gcc/config/i386/i386-expand.cc |  26 ++++++--
>  gcc/config/i386/predicates.md  |   4 ++
>  gcc/config/i386/sse.md         | 112 +++++++++++++++++++++------------
>  3 files changed, 97 insertions(+), 45 deletions(-)
>
> diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> index 0de0e842731..6c61f9f87c2 100644
> --- a/gcc/config/i386/i386-expand.cc
> +++ b/gcc/config/i386/i386-expand.cc
> @@ -12995,10 +12995,11 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
>  {
>    tree arg;
>    rtx pat, op;
> -  unsigned int i, nargs, arg_adjust, memory;
> +  unsigned int i, nargs, arg_adjust, memory = -1;
>    unsigned int constant = 100;
>    bool aligned_mem = false;
> -  rtx xops[4];
> +  rtx xops[4] = {};
> +  bool add_els = false;
>    enum insn_code icode = d->icode;
>    const struct insn_data_d *insn_p = &insn_data[icode];
>    machine_mode tmode = insn_p->operand[0].mode;
> @@ -13125,6 +13126,9 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
>      case V4DI_FTYPE_PCV4DI_V4DI:
>      case V4SI_FTYPE_PCV4SI_V4SI:
>      case V2DI_FTYPE_PCV2DI_V2DI:
> +      /* Two actual args but an additional else operand.  */
> +      add_els = true;
> +      /* Fallthru.  */
>      case VOID_FTYPE_INT_INT64:
>        nargs = 2;
>        klass = load;
> @@ -13397,6 +13401,12 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
>        xops[i]= op;
>      }
>
> +  if (add_els)
> +    {
> +      xops[i] = CONST0_RTX (GET_MODE (xops[0]));
> +      nargs++;
> +    }
> +
>    switch (nargs)
>      {
>      case 0:
> @@ -13653,7 +13663,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
>    enum insn_code icode, icode2;
>    tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
>    tree arg0, arg1, arg2, arg3, arg4;
> -  rtx op0, op1, op2, op3, op4, pat, pat2, insn;
> +  rtx op0, op1, op2, op3, op4, opels, pat, pat2, insn;
>    machine_mode mode0, mode1, mode2, mode3, mode4;
>    unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
>    HOST_WIDE_INT bisa, bisa2;
> @@ -15560,12 +15570,15 @@ rdseed_step:
>           op3 = copy_to_reg (op3);
>           op3 = lowpart_subreg (mode3, op3, GET_MODE (op3));
>         }
> +
>        if (!insn_data[icode].operand[5].predicate (op4, mode4))
>         {
> -          error ("the last argument must be scale 1, 2, 4, 8");
> -          return const0_rtx;
> +         error ("the last argument must be scale 1, 2, 4, 8");
> +         return const0_rtx;
>         }
>
> +      opels = CONST0_RTX (GET_MODE (subtarget));
> +
>        /* Optimize.  If mask is known to have all high bits set,
>          replace op0 with pc_rtx to signal that the instruction
>          overwrites the whole destination and doesn't use its
> @@ -15634,7 +15647,8 @@ rdseed_step:
>             }
>         }
>
> -      pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
> +      pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4, opels);
> +
>        if (! pat)
>         return const0_rtx;
>        emit_insn (pat);
> diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
> index 053312bbe27..7c7d8f61f11 100644
> --- a/gcc/config/i386/predicates.md
> +++ b/gcc/config/i386/predicates.md
> @@ -2346,3 +2346,7 @@ (define_predicate "apx_evex_add_memory_operand"
>
>    return true;
>  })
> +
> +(define_predicate "maskload_else_operand"
> +  (and (match_code "const_int,const_vector")
> +       (match_test "op == CONST0_RTX (GET_MODE (op))")))
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 36f8567b66f..41c1badbc00 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -28632,7 +28632,7 @@ (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
>     (set_attr "btver2_decode" "vector")
>     (set_attr "mode" "<sseinsnmode>")])
>
> -(define_expand "maskload<mode><sseintvecmodelower>"
> +(define_expand "maskload<mode><sseintvecmodelower>_1"
>    [(set (match_operand:V48_128_256 0 "register_operand")
>         (unspec:V48_128_256
>           [(match_operand:<sseintvecmode> 2 "register_operand")
> @@ -28640,13 +28640,28 @@ (define_expand "maskload<mode><sseintvecmodelower>"
>           UNSPEC_MASKMOV))]
>    "TARGET_AVX")
>
> +(define_expand "maskload<mode><sseintvecmodelower>"
> +  [(set (match_operand:V48_128_256 0 "register_operand")
> +       (unspec:V48_128_256
> +         [(match_operand:<sseintvecmode> 2 "register_operand")
> +          (match_operand:V48_128_256 1 "memory_operand")
> +          (match_operand:V48_128_256 3 "const0_operand")]
> +         UNSPEC_MASKMOV))]
> +  "TARGET_AVX"
> +{
> +  emit_insn (gen_maskload<mode><sseintvecmodelower>_1 (operands[0],
> +                                                      operands[1],
> +                                                      operands[2]));
> +  DONE;
> +})
> +
>  (define_expand "maskload<mode><avx512fmaskmodelower>"
>    [(set (match_operand:V48_AVX512VL 0 "register_operand")
>         (vec_merge:V48_AVX512VL
>           (unspec:V48_AVX512VL
>             [(match_operand:V48_AVX512VL 1 "memory_operand")]
>             UNSPEC_MASKLOAD)
> -         (match_dup 0)
> +         (match_operand:V48_AVX512VL 3 "const0_operand")
>           (match_operand:<avx512fmaskmode> 2 "register_operand")))]
>    "TARGET_AVX512F")
>
> @@ -28656,8 +28671,9 @@ (define_expand "maskload<mode><avx512fmaskmodelower>"
>           (unspec:VI12HFBF_AVX512VL
>             [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand")]
>             UNSPEC_MASKLOAD)
> -         (match_dup 0)
> -         (match_operand:<avx512fmaskmode> 2 "register_operand")))]
> +         (match_operand:VI12HFBF_AVX512VL 3 "const0_operand")
> +         (match_operand:<avx512fmaskmode> 2 "register_operand")))
> +         ]
>    "TARGET_AVX512BW")
>
>  (define_expand "maskstore<mode><sseintvecmodelower>"
> @@ -29223,20 +29239,22 @@ (define_expand "avx2_gathersi<mode>"
>                    (unspec:VEC_GATHER_MODE
>                      [(match_operand:VEC_GATHER_MODE 1 "register_operand")
>                       (mem:<ssescalarmode>
> -                       (match_par_dup 6
> +                       (match_par_dup 7
>                           [(match_operand 2 "vsib_address_operand")
>                            (match_operand:<VEC_GATHER_IDXSI>
>                               3 "register_operand")
> -                          (match_operand:SI 5 "const1248_operand ")]))
> +                          (match_operand:SI 5 "const1248_operand ")
> +                          (match_operand:VEC_GATHER_MODE 6 "maskload_else_operand")]))
>                       (mem:BLK (scratch))
>                       (match_operand:VEC_GATHER_MODE 4 "register_operand")]
>                      UNSPEC_GATHER))
> -             (clobber (match_scratch:VEC_GATHER_MODE 7))])]
> +             (clobber (match_scratch:VEC_GATHER_MODE 8))])]
>    "TARGET_AVX2"
>  {
> -  operands[6]
> -    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
> -                                       operands[5]), UNSPEC_VSIBADDR);
> +  operands[7]
> +    = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3],
> +                                       operands[5], operands[6]),
> +                     UNSPEC_VSIBADDR);
>  })
>
>  (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>"
> @@ -29247,7 +29265,8 @@ (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>"
>              [(unspec:P
>                 [(match_operand:P 3 "vsib_address_operand" "jb")
>                  (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
> -                (match_operand:SI 6 "const1248_operand")]
> +                (match_operand:SI 6 "const1248_operand")
> +                (match_operand:VEC_GATHER_MODE 8 "maskload_else_operand")]
>                 UNSPEC_VSIBADDR)])
>            (mem:BLK (scratch))
>            (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
> @@ -29268,7 +29287,8 @@ (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>_2"
>              [(unspec:P
>                 [(match_operand:P 2 "vsib_address_operand" "jb")
>                  (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
> -                (match_operand:SI 5 "const1248_operand")]
> +                (match_operand:SI 5 "const1248_operand")
> +                (match_operand:VEC_GATHER_MODE 7 "maskload_else_operand")]
>                 UNSPEC_VSIBADDR)])
>            (mem:BLK (scratch))
>            (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
> @@ -29286,20 +29306,22 @@ (define_expand "avx2_gatherdi<mode>"
>                    (unspec:VEC_GATHER_MODE
>                      [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
>                       (mem:<ssescalarmode>
> -                       (match_par_dup 6
> +                       (match_par_dup 7
>                           [(match_operand 2 "vsib_address_operand")
>                            (match_operand:<VEC_GATHER_IDXDI>
>                               3 "register_operand")
> -                          (match_operand:SI 5 "const1248_operand ")]))
> +                          (match_operand:SI 5 "const1248_operand ")
> +                          (match_operand:VEC_GATHER_MODE 6 "maskload_else_operand")]))
>                       (mem:BLK (scratch))
>                       (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
>                      UNSPEC_GATHER))
> -             (clobber (match_scratch:VEC_GATHER_MODE 7))])]
> +             (clobber (match_scratch:VEC_GATHER_MODE 8))])]
>    "TARGET_AVX2"
>  {
> -  operands[6]
> -    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
> -                                       operands[5]), UNSPEC_VSIBADDR);
> +  operands[7]
> +    = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3],
> +                                       operands[5], operands[6]),
> +                     UNSPEC_VSIBADDR);
>  })
>
x86 doesn't define mask_gather_loadmn, so I think you can drop this
and all related, only keep the patch I give you in [1]
Sorry I didn't make that clear last time.

[1] https://gcc.gnu.org/pipermail/gcc-patches/2024-October/666814.html

>  (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>"
> @@ -29310,7 +29332,8 @@ (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>"
>              [(unspec:P
>                 [(match_operand:P 3 "vsib_address_operand" "jb")
>                  (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
> -                (match_operand:SI 6 "const1248_operand")]
> +                (match_operand:SI 6 "const1248_operand")
> +                (match_operand:VEC_GATHER_MODE 8 "maskload_else_operand")]
>                 UNSPEC_VSIBADDR)])
>            (mem:BLK (scratch))
>            (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
> @@ -29331,7 +29354,8 @@ (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>_2"
>              [(unspec:P
>                 [(match_operand:P 2 "vsib_address_operand" "jb")
>                  (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
> -                (match_operand:SI 5 "const1248_operand")]
> +                (match_operand:SI 5 "const1248_operand")
> +                (match_operand:VEC_GATHER_MODE 7 "maskload_else_operand")]
>                 UNSPEC_VSIBADDR)])
>            (mem:BLK (scratch))
>            (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
> @@ -29357,7 +29381,8 @@ (define_insn "*avx2_gatherdi<VI4F_256:mode>_3"
>                [(unspec:P
>                   [(match_operand:P 3 "vsib_address_operand" "jb")
>                    (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
> -                  (match_operand:SI 6 "const1248_operand")]
> +                  (match_operand:SI 6 "const1248_operand")
> +                  (match_operand:VI4F_256 8 "maskload_else_operand")]
>                   UNSPEC_VSIBADDR)])
>              (mem:BLK (scratch))
>              (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
> @@ -29381,7 +29406,8 @@ (define_insn "*avx2_gatherdi<VI4F_256:mode>_4"
>                [(unspec:P
>                   [(match_operand:P 2 "vsib_address_operand" "jb")
>                    (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
> -                  (match_operand:SI 5 "const1248_operand")]
> +                  (match_operand:SI 5 "const1248_operand")
> +                  (match_operand:VI4F_256 7 "maskload_else_operand")]
>                   UNSPEC_VSIBADDR)])
>              (mem:BLK (scratch))
>              (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
> @@ -29402,17 +29428,19 @@ (define_expand "<avx512>_gathersi<mode>"
>                      [(match_operand:VI48F 1 "register_operand")
>                       (match_operand:<avx512fmaskmode> 4 "register_operand")
>                       (mem:<ssescalarmode>
> -                       (match_par_dup 6
> +                       (match_par_dup 7
>                           [(match_operand 2 "vsib_address_operand")
>                            (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
> -                          (match_operand:SI 5 "const1248_operand")]))]
> +                          (match_operand:SI 5 "const1248_operand")
> +                          (match_operand:VI48F 6 "maskload_else_operand")]))]
>                      UNSPEC_GATHER))
> -             (clobber (match_scratch:<avx512fmaskmode> 7))])]
> +             (clobber (match_scratch:<avx512fmaskmode> 8))])]
>    "TARGET_AVX512F"
>  {
> -  operands[6]
> -    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
> -                                       operands[5]), UNSPEC_VSIBADDR);
> +  operands[7]
> +    = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3],
> +                                       operands[5], operands[6]),
> +                     UNSPEC_VSIBADDR);
>  })
>
>  (define_insn "*avx512f_gathersi<VI48F:mode>"
> @@ -29424,7 +29452,8 @@ (define_insn "*avx512f_gathersi<VI48F:mode>"
>              [(unspec:P
>                 [(match_operand:P 4 "vsib_address_operand" "Tv")
>                  (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
> -                (match_operand:SI 5 "const1248_operand")]
> +                (match_operand:SI 5 "const1248_operand")
> +                (match_operand:VI48F 8 "maskload_else_operand")]
>                 UNSPEC_VSIBADDR)])]
>           UNSPEC_GATHER))
>     (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
> @@ -29445,7 +29474,8 @@ (define_insn "*avx512f_gathersi<VI48F:mode>_2"
>              [(unspec:P
>                 [(match_operand:P 3 "vsib_address_operand" "Tv")
>                  (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
> -                (match_operand:SI 4 "const1248_operand")]
> +                (match_operand:SI 4 "const1248_operand")
> +                (match_operand:VI48F 7 "maskload_else_operand")]
>                 UNSPEC_VSIBADDR)])]
>           UNSPEC_GATHER))
>     (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
> @@ -29464,17 +29494,19 @@ (define_expand "<avx512>_gatherdi<mode>"
>                      [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
>                       (match_operand:QI 4 "register_operand")
>                       (mem:<ssescalarmode>
> -                       (match_par_dup 6
> +                       (match_par_dup 7
>                           [(match_operand 2 "vsib_address_operand")
>                            (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
> -                          (match_operand:SI 5 "const1248_operand")]))]
> +                          (match_operand:SI 5 "const1248_operand")
> +                          (match_operand:VI48F 6 "maskload_else_operand")]))]
>                      UNSPEC_GATHER))
> -             (clobber (match_scratch:QI 7))])]
> +             (clobber (match_scratch:QI 8))])]
>    "TARGET_AVX512F"
>  {
> -  operands[6]
> -    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
> -                                       operands[5]), UNSPEC_VSIBADDR);
> +  operands[7]
> +    = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3],
> +                                       operands[5], operands[6]),
> +                     UNSPEC_VSIBADDR);
>  })
>
>  (define_insn "*avx512f_gatherdi<VI48F:mode>"
> @@ -29486,7 +29518,8 @@ (define_insn "*avx512f_gatherdi<VI48F:mode>"
>              [(unspec:P
>                 [(match_operand:P 4 "vsib_address_operand" "Tv")
>                  (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
> -                (match_operand:SI 5 "const1248_operand")]
> +                (match_operand:SI 5 "const1248_operand")
> +                (match_operand:VI48F 8 "maskload_else_operand")]
>                 UNSPEC_VSIBADDR)])]
>           UNSPEC_GATHER))
>     (clobber (match_scratch:QI 2 "=&Yk"))]
> @@ -29507,7 +29540,8 @@ (define_insn "*avx512f_gatherdi<VI48F:mode>_2"
>              [(unspec:P
>                 [(match_operand:P 3 "vsib_address_operand" "Tv")
>                  (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
> -                (match_operand:SI 4 "const1248_operand")]
> +                (match_operand:SI 4 "const1248_operand")
> +                (match_operand:VI48F 7 "maskload_else_operand")]
>                 UNSPEC_VSIBADDR)])]
>           UNSPEC_GATHER))
>     (clobber (match_scratch:QI 1 "=&Yk"))]
> @@ -29544,7 +29578,7 @@ (define_expand "<avx512>_scattersi<mode>"
>    operands[5]
>      = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2],
>                                         operands[4], operands[1]),
> -                                       UNSPEC_VSIBADDR);
> +                     UNSPEC_VSIBADDR);
>  })
>
>  (define_insn "*avx512f_scattersi<VI48F:mode>"
> --
> 2.47.0
>
  
Robin Dapp Nov. 6, 2024, 10:04 a.m. UTC | #2
> x86 doesn't define mask_gather_loadmn, so I think you can drop this
> and all related, only keep the patch I give you in [1]
> Sorry I didn't make that clear last time.

Yes, that works, thanks.  Will post a v4 soon.
  

Patch

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 0de0e842731..6c61f9f87c2 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -12995,10 +12995,11 @@  ix86_expand_special_args_builtin (const struct builtin_description *d,
 {
   tree arg;
   rtx pat, op;
-  unsigned int i, nargs, arg_adjust, memory;
+  unsigned int i, nargs, arg_adjust, memory = -1;
   unsigned int constant = 100;
   bool aligned_mem = false;
-  rtx xops[4];
+  rtx xops[4] = {};
+  bool add_els = false;
   enum insn_code icode = d->icode;
   const struct insn_data_d *insn_p = &insn_data[icode];
   machine_mode tmode = insn_p->operand[0].mode;
@@ -13125,6 +13126,9 @@  ix86_expand_special_args_builtin (const struct builtin_description *d,
     case V4DI_FTYPE_PCV4DI_V4DI:
     case V4SI_FTYPE_PCV4SI_V4SI:
     case V2DI_FTYPE_PCV2DI_V2DI:
+      /* Two actual args but an additional else operand.  */
+      add_els = true;
+      /* Fallthru.  */
     case VOID_FTYPE_INT_INT64:
       nargs = 2;
       klass = load;
@@ -13397,6 +13401,12 @@  ix86_expand_special_args_builtin (const struct builtin_description *d,
       xops[i]= op;
     }
 
+  if (add_els)
+    {
+      xops[i] = CONST0_RTX (GET_MODE (xops[0]));
+      nargs++;
+    }
+
   switch (nargs)
     {
     case 0:
@@ -13653,7 +13663,7 @@  ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
   enum insn_code icode, icode2;
   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
   tree arg0, arg1, arg2, arg3, arg4;
-  rtx op0, op1, op2, op3, op4, pat, pat2, insn;
+  rtx op0, op1, op2, op3, op4, opels, pat, pat2, insn;
   machine_mode mode0, mode1, mode2, mode3, mode4;
   unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
   HOST_WIDE_INT bisa, bisa2;
@@ -15560,12 +15570,15 @@  rdseed_step:
 	  op3 = copy_to_reg (op3);
 	  op3 = lowpart_subreg (mode3, op3, GET_MODE (op3));
 	}
+
       if (!insn_data[icode].operand[5].predicate (op4, mode4))
 	{
-          error ("the last argument must be scale 1, 2, 4, 8");
-          return const0_rtx;
+	  error ("the last argument must be scale 1, 2, 4, 8");
+	  return const0_rtx;
 	}
 
+      opels = CONST0_RTX (GET_MODE (subtarget));
+
       /* Optimize.  If mask is known to have all high bits set,
 	 replace op0 with pc_rtx to signal that the instruction
 	 overwrites the whole destination and doesn't use its
@@ -15634,7 +15647,8 @@  rdseed_step:
 	    }
 	}
 
-      pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
+      pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4, opels);
+
       if (! pat)
 	return const0_rtx;
       emit_insn (pat);
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 053312bbe27..7c7d8f61f11 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -2346,3 +2346,7 @@  (define_predicate "apx_evex_add_memory_operand"
 
   return true;
 })
+
+(define_predicate "maskload_else_operand"
+  (and (match_code "const_int,const_vector")
+       (match_test "op == CONST0_RTX (GET_MODE (op))")))
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 36f8567b66f..41c1badbc00 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -28632,7 +28632,7 @@  (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
    (set_attr "btver2_decode" "vector") 
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_expand "maskload<mode><sseintvecmodelower>"
+(define_expand "maskload<mode><sseintvecmodelower>_1"
   [(set (match_operand:V48_128_256 0 "register_operand")
 	(unspec:V48_128_256
 	  [(match_operand:<sseintvecmode> 2 "register_operand")
@@ -28640,13 +28640,28 @@  (define_expand "maskload<mode><sseintvecmodelower>"
 	  UNSPEC_MASKMOV))]
   "TARGET_AVX")
 
+(define_expand "maskload<mode><sseintvecmodelower>"
+  [(set (match_operand:V48_128_256 0 "register_operand")
+       (unspec:V48_128_256
+         [(match_operand:<sseintvecmode> 2 "register_operand")
+          (match_operand:V48_128_256 1 "memory_operand")
+          (match_operand:V48_128_256 3 "const0_operand")]
+         UNSPEC_MASKMOV))]
+  "TARGET_AVX"
+{
+  emit_insn (gen_maskload<mode><sseintvecmodelower>_1 (operands[0],
+						       operands[1],
+						       operands[2]));
+  DONE;
+})
+
 (define_expand "maskload<mode><avx512fmaskmodelower>"
   [(set (match_operand:V48_AVX512VL 0 "register_operand")
 	(vec_merge:V48_AVX512VL
 	  (unspec:V48_AVX512VL
 	    [(match_operand:V48_AVX512VL 1 "memory_operand")]
 	    UNSPEC_MASKLOAD)
-	  (match_dup 0)
+	  (match_operand:V48_AVX512VL 3 "const0_operand")
 	  (match_operand:<avx512fmaskmode> 2 "register_operand")))]
   "TARGET_AVX512F")
 
@@ -28656,8 +28671,9 @@  (define_expand "maskload<mode><avx512fmaskmodelower>"
 	  (unspec:VI12HFBF_AVX512VL
 	    [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand")]
 	    UNSPEC_MASKLOAD)
-	  (match_dup 0)
-	  (match_operand:<avx512fmaskmode> 2 "register_operand")))]
+	  (match_operand:VI12HFBF_AVX512VL 3 "const0_operand")
+	  (match_operand:<avx512fmaskmode> 2 "register_operand")))
+	  ]
   "TARGET_AVX512BW")
 
 (define_expand "maskstore<mode><sseintvecmodelower>"
@@ -29223,20 +29239,22 @@  (define_expand "avx2_gathersi<mode>"
 		   (unspec:VEC_GATHER_MODE
 		     [(match_operand:VEC_GATHER_MODE 1 "register_operand")
 		      (mem:<ssescalarmode>
-			(match_par_dup 6
+			(match_par_dup 7
 			  [(match_operand 2 "vsib_address_operand")
 			   (match_operand:<VEC_GATHER_IDXSI>
 			      3 "register_operand")
-			   (match_operand:SI 5 "const1248_operand ")]))
+			   (match_operand:SI 5 "const1248_operand ")
+			   (match_operand:VEC_GATHER_MODE 6 "maskload_else_operand")]))
 		      (mem:BLK (scratch))
 		      (match_operand:VEC_GATHER_MODE 4 "register_operand")]
 		     UNSPEC_GATHER))
-	      (clobber (match_scratch:VEC_GATHER_MODE 7))])]
+	      (clobber (match_scratch:VEC_GATHER_MODE 8))])]
   "TARGET_AVX2"
 {
-  operands[6]
-    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
-					operands[5]), UNSPEC_VSIBADDR);
+  operands[7]
+    = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3],
+					operands[5], operands[6]),
+		      UNSPEC_VSIBADDR);
 })
 
 (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>"
@@ -29247,7 +29265,8 @@  (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>"
 	     [(unspec:P
 		[(match_operand:P 3 "vsib_address_operand" "jb")
 		 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
-		 (match_operand:SI 6 "const1248_operand")]
+		 (match_operand:SI 6 "const1248_operand")
+		 (match_operand:VEC_GATHER_MODE 8 "maskload_else_operand")]
 		UNSPEC_VSIBADDR)])
 	   (mem:BLK (scratch))
 	   (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
@@ -29268,7 +29287,8 @@  (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>_2"
 	     [(unspec:P
 		[(match_operand:P 2 "vsib_address_operand" "jb")
 		 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
-		 (match_operand:SI 5 "const1248_operand")]
+		 (match_operand:SI 5 "const1248_operand")
+		 (match_operand:VEC_GATHER_MODE 7 "maskload_else_operand")]
 		UNSPEC_VSIBADDR)])
 	   (mem:BLK (scratch))
 	   (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
@@ -29286,20 +29306,22 @@  (define_expand "avx2_gatherdi<mode>"
 		   (unspec:VEC_GATHER_MODE
 		     [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
 		      (mem:<ssescalarmode>
-			(match_par_dup 6
+			(match_par_dup 7
 			  [(match_operand 2 "vsib_address_operand")
 			   (match_operand:<VEC_GATHER_IDXDI>
 			      3 "register_operand")
-			   (match_operand:SI 5 "const1248_operand ")]))
+			   (match_operand:SI 5 "const1248_operand ")
+			   (match_operand:VEC_GATHER_MODE 6 "maskload_else_operand")]))
 		      (mem:BLK (scratch))
 		      (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
 		     UNSPEC_GATHER))
-	      (clobber (match_scratch:VEC_GATHER_MODE 7))])]
+	      (clobber (match_scratch:VEC_GATHER_MODE 8))])]
   "TARGET_AVX2"
 {
-  operands[6]
-    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
-					operands[5]), UNSPEC_VSIBADDR);
+  operands[7]
+    = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3],
+					operands[5], operands[6]),
+		      UNSPEC_VSIBADDR);
 })
 
 (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>"
@@ -29310,7 +29332,8 @@  (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>"
 	     [(unspec:P
 		[(match_operand:P 3 "vsib_address_operand" "jb")
 		 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
-		 (match_operand:SI 6 "const1248_operand")]
+		 (match_operand:SI 6 "const1248_operand")
+		 (match_operand:VEC_GATHER_MODE 8 "maskload_else_operand")]
 		UNSPEC_VSIBADDR)])
 	   (mem:BLK (scratch))
 	   (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
@@ -29331,7 +29354,8 @@  (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>_2"
 	     [(unspec:P
 		[(match_operand:P 2 "vsib_address_operand" "jb")
 		 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
-		 (match_operand:SI 5 "const1248_operand")]
+		 (match_operand:SI 5 "const1248_operand")
+		 (match_operand:VEC_GATHER_MODE 7 "maskload_else_operand")]
 		UNSPEC_VSIBADDR)])
 	   (mem:BLK (scratch))
 	   (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
@@ -29357,7 +29381,8 @@  (define_insn "*avx2_gatherdi<VI4F_256:mode>_3"
 	       [(unspec:P
 		  [(match_operand:P 3 "vsib_address_operand" "jb")
 		   (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
-		   (match_operand:SI 6 "const1248_operand")]
+		   (match_operand:SI 6 "const1248_operand")
+		   (match_operand:VI4F_256 8 "maskload_else_operand")]
 		  UNSPEC_VSIBADDR)])
 	     (mem:BLK (scratch))
 	     (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
@@ -29381,7 +29406,8 @@  (define_insn "*avx2_gatherdi<VI4F_256:mode>_4"
 	       [(unspec:P
 		  [(match_operand:P 2 "vsib_address_operand" "jb")
 		   (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
-		   (match_operand:SI 5 "const1248_operand")]
+		   (match_operand:SI 5 "const1248_operand")
+		   (match_operand:VI4F_256 7 "maskload_else_operand")]
 		  UNSPEC_VSIBADDR)])
 	     (mem:BLK (scratch))
 	     (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
@@ -29402,17 +29428,19 @@  (define_expand "<avx512>_gathersi<mode>"
 		     [(match_operand:VI48F 1 "register_operand")
 		      (match_operand:<avx512fmaskmode> 4 "register_operand")
 		      (mem:<ssescalarmode>
-			(match_par_dup 6
+			(match_par_dup 7
 			  [(match_operand 2 "vsib_address_operand")
 			   (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
-			   (match_operand:SI 5 "const1248_operand")]))]
+			   (match_operand:SI 5 "const1248_operand")
+			   (match_operand:VI48F 6 "maskload_else_operand")]))]
 		     UNSPEC_GATHER))
-	      (clobber (match_scratch:<avx512fmaskmode> 7))])]
+	      (clobber (match_scratch:<avx512fmaskmode> 8))])]
   "TARGET_AVX512F"
 {
-  operands[6]
-    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
-					operands[5]), UNSPEC_VSIBADDR);
+  operands[7]
+    = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3],
+					operands[5], operands[6]),
+		      UNSPEC_VSIBADDR);
 })
 
 (define_insn "*avx512f_gathersi<VI48F:mode>"
@@ -29424,7 +29452,8 @@  (define_insn "*avx512f_gathersi<VI48F:mode>"
 	     [(unspec:P
 		[(match_operand:P 4 "vsib_address_operand" "Tv")
 		 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
-		 (match_operand:SI 5 "const1248_operand")]
+		 (match_operand:SI 5 "const1248_operand")
+		 (match_operand:VI48F 8 "maskload_else_operand")]
 		UNSPEC_VSIBADDR)])]
 	  UNSPEC_GATHER))
    (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
@@ -29445,7 +29474,8 @@  (define_insn "*avx512f_gathersi<VI48F:mode>_2"
 	     [(unspec:P
 		[(match_operand:P 3 "vsib_address_operand" "Tv")
 		 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
-		 (match_operand:SI 4 "const1248_operand")]
+		 (match_operand:SI 4 "const1248_operand")
+		 (match_operand:VI48F 7 "maskload_else_operand")]
 		UNSPEC_VSIBADDR)])]
 	  UNSPEC_GATHER))
    (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
@@ -29464,17 +29494,19 @@  (define_expand "<avx512>_gatherdi<mode>"
 		     [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
 		      (match_operand:QI 4 "register_operand")
 		      (mem:<ssescalarmode>
-			(match_par_dup 6
+			(match_par_dup 7
 			  [(match_operand 2 "vsib_address_operand")
 			   (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
-			   (match_operand:SI 5 "const1248_operand")]))]
+			   (match_operand:SI 5 "const1248_operand")
+			   (match_operand:VI48F 6 "maskload_else_operand")]))]
 		     UNSPEC_GATHER))
-	      (clobber (match_scratch:QI 7))])]
+	      (clobber (match_scratch:QI 8))])]
   "TARGET_AVX512F"
 {
-  operands[6]
-    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
-					operands[5]), UNSPEC_VSIBADDR);
+  operands[7]
+    = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[2], operands[3],
+					operands[5], operands[6]),
+		      UNSPEC_VSIBADDR);
 })
 
 (define_insn "*avx512f_gatherdi<VI48F:mode>"
@@ -29486,7 +29518,8 @@  (define_insn "*avx512f_gatherdi<VI48F:mode>"
 	     [(unspec:P
 		[(match_operand:P 4 "vsib_address_operand" "Tv")
 		 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
-		 (match_operand:SI 5 "const1248_operand")]
+		 (match_operand:SI 5 "const1248_operand")
+		 (match_operand:VI48F 8 "maskload_else_operand")]
 		UNSPEC_VSIBADDR)])]
 	  UNSPEC_GATHER))
    (clobber (match_scratch:QI 2 "=&Yk"))]
@@ -29507,7 +29540,8 @@  (define_insn "*avx512f_gatherdi<VI48F:mode>_2"
 	     [(unspec:P
 		[(match_operand:P 3 "vsib_address_operand" "Tv")
 		 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
-		 (match_operand:SI 4 "const1248_operand")]
+		 (match_operand:SI 4 "const1248_operand")
+		 (match_operand:VI48F 7 "maskload_else_operand")]
 		UNSPEC_VSIBADDR)])]
 	  UNSPEC_GATHER))
    (clobber (match_scratch:QI 1 "=&Yk"))]
@@ -29544,7 +29578,7 @@  (define_expand "<avx512>_scattersi<mode>"
   operands[5]
     = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2],
 					operands[4], operands[1]),
-					UNSPEC_VSIBADDR);
+		      UNSPEC_VSIBADDR);
 })
 
 (define_insn "*avx512f_scattersi<VI48F:mode>"