[05/10] i386: Fix dot_prod backend patterns for mmx and sse targets
Commit Message
Following the migration of the dot_prod optab from a direct to a
conversion-type optab, ensure all back-end patterns incorporate the
second machine mode into pattern names.
gcc/ChangeLog:
* config/i386/mmx.md (usdot_prodv8qi): Deleted.
(usdot_prodv2siv8qi): New.
(sdot_prodv8qi): Deleted.
(sdot_prodv2siv8qi): New.
(udot_prodv8qi): Deleted.
(udot_prodv2siv8qi): New.
(usdot_prodv4hi): Deleted.
(usdot_prodv2siv4hi): New.
(udot_prodv4hi): Deleted.
(udot_prodv2siv4hi): New.
(sdot_prodv4hi): Deleted.
(sdot_prodv2siv4hi): New.
* config/i386/sse.md (fourwayacc): New.
(twowayacc): New.
(sdot_prod<mode>): Deleted.
(sdot_prod<twowayacc><mode>): New.
(sdot_prodv4si): Deleted.
(sdot_prodv2div4si): New.
(usdot_prod<mode>): Deleted.
(usdot_prod<fourwayacc><mode>): New.
(sdot_prod<mode>): Deleted.
(sdot_prod<fourwayacc><mode>): New.
(sdot_prodv64qi): Deleted.
(sdot_prodv16siv64qi): New.
(udot_prod<mode>): Deleted.
(udot_prod<fourwayacc><mode>): New.
(udot_prodv64qi): Deleted.
(udot_prodv16qiv64qi): New.
(usdot_prod<mode>): Deleted.
(usdot_prod<twowayacc><mode>): New.
(udot_prod<mode>): Deleted.
(udot_prod<twowayacc><mode>): New.
---
gcc/config/i386/mmx.md | 30 +++++++++++++--------------
gcc/config/i386/sse.md | 47 +++++++++++++++++++++++++-----------------
2 files changed, 43 insertions(+), 34 deletions(-)
Comments
On Wed, Jul 10, 2024 at 10:10 PM Victor Do Nascimento
<victor.donascimento@arm.com> wrote:
>
> Following the migration of the dot_prod optab from a direct to a
> conversion-type optab, ensure all back-end patterns incorporate the
> second machine mode into pattern names.
The patch LGTM. BTW you can use existing <ssedvecmodelower> instead of
new <fourwayacc> and <sseunpackmodelower> instead of <twowayacc>
>
> gcc/ChangeLog:
>
> * config/i386/mmx.md (usdot_prodv8qi): Deleted.
> (usdot_prodv2siv8qi): New.
> (sdot_prodv8qi): Deleted.
> (sdot_prodv2siv8qi): New.
> (udot_prodv8qi): Deleted.
> (udot_prodv2siv8qi): New.
> (usdot_prodv4hi): Deleted.
> (usdot_prodv2siv4hi): New.
> (udot_prodv4hi): Deleted.
> (udot_prodv2siv4hi): New.
> (sdot_prodv4hi): Deleted.
> (sdot_prodv2siv4hi): New.
> * config/i386/sse.md (fourwayacc): New.
> (twowayacc): New.
> (sdot_prod<mode>): Deleted.
> (sdot_prod<twowayacc><mode>): New.
> (sdot_prodv4si): Deleted.
> (sdot_prodv2div4si): New.
> (usdot_prod<mode>): Deleted.
> (usdot_prod<fourwayacc><mode>): New.
> (sdot_prod<mode>): Deleted.
> (sdot_prod<fourwayacc><mode>): New.
> (sdot_prodv64qi): Deleted.
> (sdot_prodv16siv64qi): New.
> (udot_prod<mode>): Deleted.
> (udot_prod<fourwayacc><mode>): New.
> (udot_prodv64qi): Deleted.
> (udot_prodv16qiv64qi): New.
> (usdot_prod<mode>): Deleted.
> (usdot_prod<twowayacc><mode>): New.
> (udot_prod<mode>): Deleted.
> (udot_prod<twowayacc><mode>): New.
> ---
> gcc/config/i386/mmx.md | 30 +++++++++++++--------------
> gcc/config/i386/sse.md | 47 +++++++++++++++++++++++++-----------------
> 2 files changed, 43 insertions(+), 34 deletions(-)
>
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index 94d3a6e5692..d78739b033d 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -6344,7 +6344,7 @@ (define_expand "usadv8qi"
> DONE;
> })
>
> -(define_expand "usdot_prodv8qi"
> +(define_expand "usdot_prodv2siv8qi"
> [(match_operand:V2SI 0 "register_operand")
> (match_operand:V8QI 1 "register_operand")
> (match_operand:V8QI 2 "register_operand")
> @@ -6363,7 +6363,7 @@ (define_expand "usdot_prodv8qi"
> rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
> rtx op0 = gen_reg_rtx (V4SImode);
>
> - emit_insn (gen_usdot_prodv16qi (op0, op1, op2, op3));
> + emit_insn (gen_usdot_prodv4siv16qi (op0, op1, op2, op3));
> emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
> }
> else
> @@ -6377,7 +6377,7 @@ (define_expand "usdot_prodv8qi"
> emit_move_insn (op3, CONST0_RTX (V4SImode));
> emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1]));
> emit_insn (gen_extendv8qiv8hi2 (op2, operands[2]));
> - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
> + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
>
> /* vec_perm (op0, 2, 3, 0, 1); */
> emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78)));
> @@ -6388,7 +6388,7 @@ (define_expand "usdot_prodv8qi"
> DONE;
> })
>
> -(define_expand "sdot_prodv8qi"
> +(define_expand "sdot_prodv2siv8qi"
> [(match_operand:V2SI 0 "register_operand")
> (match_operand:V8QI 1 "register_operand")
> (match_operand:V8QI 2 "register_operand")
> @@ -6406,7 +6406,7 @@ (define_expand "sdot_prodv8qi"
> rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
> rtx op0 = gen_reg_rtx (V4SImode);
>
> - emit_insn (gen_sdot_prodv16qi (op0, op1, op2, op3));
> + emit_insn (gen_sdot_prodv4siv16qi (op0, op1, op2, op3));
> emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
> }
> else
> @@ -6420,7 +6420,7 @@ (define_expand "sdot_prodv8qi"
> emit_move_insn (op3, CONST0_RTX (V4SImode));
> emit_insn (gen_extendv8qiv8hi2 (op1, operands[1]));
> emit_insn (gen_extendv8qiv8hi2 (op2, operands[2]));
> - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
> + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
>
> /* vec_perm (op0, 2, 3, 0, 1); */
> emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78)));
> @@ -6432,7 +6432,7 @@ (define_expand "sdot_prodv8qi"
>
> })
>
> -(define_expand "udot_prodv8qi"
> +(define_expand "udot_prodv2siv8qi"
> [(match_operand:V2SI 0 "register_operand")
> (match_operand:V8QI 1 "register_operand")
> (match_operand:V8QI 2 "register_operand")
> @@ -6450,7 +6450,7 @@ (define_expand "udot_prodv8qi"
> rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
> rtx op0 = gen_reg_rtx (V4SImode);
>
> - emit_insn (gen_udot_prodv16qi (op0, op1, op2, op3));
> + emit_insn (gen_udot_prodv4siv16qi (op0, op1, op2, op3));
> emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
> }
> else
> @@ -6464,7 +6464,7 @@ (define_expand "udot_prodv8qi"
> emit_move_insn (op3, CONST0_RTX (V4SImode));
> emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1]));
> emit_insn (gen_zero_extendv8qiv8hi2 (op2, operands[2]));
> - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
> + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
>
> /* vec_perm (op0, 2, 3, 0, 1); */
> emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78)));
> @@ -6476,7 +6476,7 @@ (define_expand "udot_prodv8qi"
>
> })
>
> -(define_expand "usdot_prodv4hi"
> +(define_expand "usdot_prodv2siv4hi"
> [(match_operand:V2SI 0 "register_operand")
> (match_operand:V4HI 1 "register_operand")
> (match_operand:V4HI 2 "register_operand")
> @@ -6492,12 +6492,12 @@ (define_expand "usdot_prodv4hi"
> rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
> rtx op0 = gen_reg_rtx (V4SImode);
>
> - emit_insn (gen_usdot_prodv8hi (op0, op1, op2, op3));
> + emit_insn (gen_usdot_prodv4siv8hi (op0, op1, op2, op3));
> emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
> DONE;
> })
>
> -(define_expand "udot_prodv4hi"
> +(define_expand "udot_prodv2siv4hi"
> [(match_operand:V2SI 0 "register_operand")
> (match_operand:V4HI 1 "register_operand")
> (match_operand:V4HI 2 "register_operand")
> @@ -6513,12 +6513,12 @@ (define_expand "udot_prodv4hi"
> rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
> rtx op0 = gen_reg_rtx (V4SImode);
>
> - emit_insn (gen_udot_prodv8hi (op0, op1, op2, op3));
> + emit_insn (gen_udot_prodv4siv8hi (op0, op1, op2, op3));
> emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
> DONE;
> })
>
> -(define_expand "sdot_prodv4hi"
> +(define_expand "sdot_prodv2siv4hi"
> [(match_operand:V2SI 0 "register_operand")
> (match_operand:V4HI 1 "register_operand")
> (match_operand:V4HI 2 "register_operand")
> @@ -6534,7 +6534,7 @@ (define_expand "sdot_prodv4hi"
> rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
> rtx op0 = gen_reg_rtx (V4SImode);
>
> - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
> + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
> emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
> DONE;
> })
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index bda66d5e121..861b87bb50f 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -1195,6 +1195,15 @@ (define_mode_attr ssexmmmode
> (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
> (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
>
> +;; Mapping of input type to 4-way accumulated type
> +(define_mode_attr fourwayacc
> + [(V64QI "v16si") (V32QI "v8si") (V16QI "v4si")])
> +
> +;; Mapping of input type to 2-way accumulated type
> +(define_mode_attr twowayacc
> + [(V32HI "v16si") (V16HI "v8si") (V8HI "v4si")
> + (V32QI "v16hi") (V16QI "v8hi")])
> +
> ;; Pointer size override for scalar modes (Intel asm dialect)
> (define_mode_attr iptr
> [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
> @@ -16712,7 +16721,7 @@ (define_mode_attr SDOT_PMADD_SUF
> (define_mode_attr SDOT_VPDP_SUF
> [(V32HI "v16si") (V16HI "v8si") (V8HI "v4si")])
>
> -(define_expand "sdot_prod<mode>"
> +(define_expand "sdot_prod<twowayacc><mode>"
> [(match_operand:<sseunpackmode> 0 "register_operand")
> (match_operand:VI2_AVX512VNNIBW 1 "register_operand")
> (match_operand:VI2_AVX512VNNIBW 2 "register_operand")
> @@ -16747,7 +16756,7 @@ (define_expand "sdot_prod<mode>"
>
> ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
> ;; back together when madd is available.
> -(define_expand "sdot_prodv4si"
> +(define_expand "sdot_prodv2div4si"
> [(match_operand:V2DI 0 "register_operand")
> (match_operand:V4SI 1 "register_operand")
> (match_operand:V4SI 2 "register_operand")
> @@ -30290,7 +30299,7 @@ (define_insn "vpshldv_<mode>_maskz_1"
> [(set_attr ("prefix") ("evex"))
> (set_attr "mode" "<sseinsnmode>")])
>
> -(define_expand "usdot_prod<mode>"
> +(define_expand "usdot_prod<fourwayacc><mode>"
> [(match_operand:<ssedvecmode> 0 "register_operand")
> (match_operand:VI1_AVX512 1 "register_operand")
> (match_operand:VI1_AVX512 2 "register_operand")
> @@ -30328,9 +30337,9 @@ (define_expand "usdot_prod<mode>"
> rtx sum = gen_reg_rtx (<ssedvecmode>mode);
>
> emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode));
> - emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo,
> + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1, op1_lo,
> op2_lo, sum));
> - emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi,
> + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2, op1_hi,
> op2_hi, operands[3]));
> emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2));
> }
> @@ -31149,7 +31158,7 @@ (define_int_attr vpdotprodtype
> (UNSPEC_VPDPBSUD "bsud") (UNSPEC_VPDPBSUDS "bsuds")
> (UNSPEC_VPDPBUUD "buud") (UNSPEC_VPDPBUUDS "buuds")])
>
> -(define_expand "sdot_prod<mode>"
> +(define_expand "sdot_prod<fourwayacc><mode>"
> [(match_operand:<ssedvecmode> 0 "register_operand")
> (match_operand:VI1_AVX2 1 "register_operand")
> (match_operand:VI1_AVX2 2 "register_operand")
> @@ -31185,9 +31194,9 @@ (define_expand "sdot_prod<mode>"
> rtx sum = gen_reg_rtx (<ssedvecmode>mode);
>
> emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode));
> - emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo,
> + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1, op1_lo,
> op2_lo, sum));
> - emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi,
> + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2, op1_hi,
> op2_hi, operands[3]));
> emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2));
> }
> @@ -31195,7 +31204,7 @@ (define_expand "sdot_prod<mode>"
> DONE;
> })
>
> -(define_expand "sdot_prodv64qi"
> +(define_expand "sdot_prodv16siv64qi"
> [(match_operand:V16SI 0 "register_operand")
> (match_operand:V64QI 1 "register_operand")
> (match_operand:V64QI 2 "register_operand")
> @@ -31218,14 +31227,14 @@ (define_expand "sdot_prodv64qi"
> rtx sum = gen_reg_rtx (V16SImode);
>
> emit_move_insn (sum, CONST0_RTX (V16SImode));
> - emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum));
> - emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3]));
> + emit_insn (gen_sdot_prodv16siv32hi (res1, op1_lo, op2_lo, sum));
> + emit_insn (gen_sdot_prodv16siv32hi (res2, op1_hi, op2_hi, operands[3]));
>
> emit_insn (gen_addv16si3 (operands[0], res1, res2));
> DONE;
> })
>
> -(define_expand "udot_prod<mode>"
> +(define_expand "udot_prod<fourwayacc><mode>"
> [(match_operand:<ssedvecmode> 0 "register_operand")
> (match_operand:VI1_AVX2 1 "register_operand")
> (match_operand:VI1_AVX2 2 "register_operand")
> @@ -31261,9 +31270,9 @@ (define_expand "udot_prod<mode>"
> rtx sum = gen_reg_rtx (<ssedvecmode>mode);
>
> emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode));
> - emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo,
> + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1, op1_lo,
> op2_lo, sum));
> - emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi,
> + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2, op1_hi,
> op2_hi, operands[3]));
> emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2));
> }
> @@ -31271,7 +31280,7 @@ (define_expand "udot_prod<mode>"
> DONE;
> })
>
> -(define_expand "udot_prodv64qi"
> +(define_expand "udot_prodv16qiv64qi"
> [(match_operand:V16SI 0 "register_operand")
> (match_operand:V64QI 1 "register_operand")
> (match_operand:V64QI 2 "register_operand")
> @@ -31294,8 +31303,8 @@ (define_expand "udot_prodv64qi"
> rtx sum = gen_reg_rtx (V16SImode);
>
> emit_move_insn (sum, CONST0_RTX (V16SImode));
> - emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum));
> - emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3]));
> + emit_insn (gen_sdot_prodv16siv32hi (res1, op1_lo, op2_lo, sum));
> + emit_insn (gen_sdot_prodv16siv32hi (res2, op1_hi, op2_hi, operands[3]));
>
> emit_insn (gen_addv16si3 (operands[0], res1, res2));
> DONE;
> @@ -31401,7 +31410,7 @@ (define_int_attr vpdpwprodtype
> (UNSPEC_VPDPWSUD "wsud") (UNSPEC_VPDPWSUDS "wsuds")
> (UNSPEC_VPDPWUUD "wuud") (UNSPEC_VPDPWUUDS "wuuds")])
>
> -(define_expand "usdot_prod<mode>"
> +(define_expand "usdot_prod<twowayacc><mode>"
> [(match_operand:<sseunpackmode> 0 "register_operand")
> (match_operand:VI2_AVX2 1 "register_operand")
> (match_operand:VI2_AVX2 2 "register_operand")
> @@ -31419,7 +31428,7 @@ (define_expand "usdot_prod<mode>"
> DONE;
> })
>
> -(define_expand "udot_prod<mode>"
> +(define_expand "udot_prod<twowayacc><mode>"
> [(match_operand:<sseunpackmode> 0 "register_operand")
> (match_operand:VI2_AVX2 1 "register_operand")
> (match_operand:VI2_AVX2 2 "register_operand")
> --
> 2.34.1
>
> -----Original Message-----
> From: Hongtao Liu <crazylht@gmail.com>
> Sent: Thursday, July 11, 2024 9:45 AM
> To: Victor Do Nascimento <victor.donascimento@arm.com>
> Cc: gcc-patches@gcc.gnu.org; richard.sandiford@arm.com;
> Richard.Earnshaw@arm.com
> Subject: Re: [PATCH 05/10] i386: Fix dot_prod backend patterns for mmx and
> sse targets
>
> On Wed, Jul 10, 2024 at 10:10 PM Victor Do Nascimento
> <victor.donascimento@arm.com> wrote:
> >
> > Following the migration of the dot_prod optab from a direct to a
> > conversion-type optab, ensure all back-end patterns incorporate the
> > second machine mode into pattern names.
> The patch LGTM. BTW you can use existing <ssedvecmodelower> instead of
> new <fourwayacc> and <sseunpackmodelower> instead of <twowayacc>
> >
> > gcc/ChangeLog:
> >
> > * config/i386/mmx.md (usdot_prodv8qi): Deleted.
> > (usdot_prodv2siv8qi): New.
Hi Victor,
I suppose all the patterns are renamed not deleted and new right?
If that is the case, I suppose the log might be better and easier to understand
if changed to something like:
(old pattern): Renamed to ...
(new pattern): this.
Thx,
Haochen
> > (sdot_prodv8qi): Deleted.
> > (sdot_prodv2siv8qi): New.
> > (udot_prodv8qi): Deleted.
> > (udot_prodv2siv8qi): New.
> > (usdot_prodv4hi): Deleted.
> > (usdot_prodv2siv4hi): New.
> > (udot_prodv4hi): Deleted.
> > (udot_prodv2siv4hi): New.
> > (sdot_prodv4hi): Deleted.
> > (sdot_prodv2siv4hi): New.
> > * config/i386/sse.md (fourwayacc): New.
> > (twowayacc): New.
> > (sdot_prod<mode>): Deleted.
> > (sdot_prod<twowayacc><mode>): New.
> > (sdot_prodv4si): Deleted.
> > (sdot_prodv2div4si): New.
> > (usdot_prod<mode>): Deleted.
> > (usdot_prod<fourwayacc><mode>): New.
> > (sdot_prod<mode>): Deleted.
> > (sdot_prod<fourwayacc><mode>): New.
> > (sdot_prodv64qi): Deleted.
> > (sdot_prodv16siv64qi): New.
> > (udot_prod<mode>): Deleted.
> > (udot_prod<fourwayacc><mode>): New.
> > (udot_prodv64qi): Deleted.
> > (udot_prodv16qiv64qi): New.
> > (usdot_prod<mode>): Deleted.
> > (usdot_prod<twowayacc><mode>): New.
> > (udot_prod<mode>): Deleted.
> > (udot_prod<twowayacc><mode>): New.
> > ---
> > gcc/config/i386/mmx.md | 30 +++++++++++++--------------
> > gcc/config/i386/sse.md | 47 +++++++++++++++++++++++++----------------
> -
> > 2 files changed, 43 insertions(+), 34 deletions(-)
> >
> > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index
> > 94d3a6e5692..d78739b033d 100644
> > --- a/gcc/config/i386/mmx.md
> > +++ b/gcc/config/i386/mmx.md
> > @@ -6344,7 +6344,7 @@ (define_expand "usadv8qi"
> > DONE;
> > })
> >
> > -(define_expand "usdot_prodv8qi"
> > +(define_expand "usdot_prodv2siv8qi"
> > [(match_operand:V2SI 0 "register_operand")
> > (match_operand:V8QI 1 "register_operand")
> > (match_operand:V8QI 2 "register_operand") @@ -6363,7 +6363,7 @@
> > (define_expand "usdot_prodv8qi"
> > rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
> > rtx op0 = gen_reg_rtx (V4SImode);
> >
> > - emit_insn (gen_usdot_prodv16qi (op0, op1, op2, op3));
> > + emit_insn (gen_usdot_prodv4siv16qi (op0, op1, op2, op3));
> > emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0,
> V4SImode));
> > }
> > else
> > @@ -6377,7 +6377,7 @@ (define_expand "usdot_prodv8qi"
> > emit_move_insn (op3, CONST0_RTX (V4SImode));
> > emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1]));
> > emit_insn (gen_extendv8qiv8hi2 (op2, operands[2]));
> > - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
> > + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
> >
> > /* vec_perm (op0, 2, 3, 0, 1); */
> > emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78))); @@
> > -6388,7 +6388,7 @@ (define_expand "usdot_prodv8qi"
> > DONE;
> > })
> >
> > -(define_expand "sdot_prodv8qi"
> > +(define_expand "sdot_prodv2siv8qi"
> > [(match_operand:V2SI 0 "register_operand")
> > (match_operand:V8QI 1 "register_operand")
> > (match_operand:V8QI 2 "register_operand") @@ -6406,7 +6406,7 @@
> > (define_expand "sdot_prodv8qi"
> > rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
> > rtx op0 = gen_reg_rtx (V4SImode);
> >
> > - emit_insn (gen_sdot_prodv16qi (op0, op1, op2, op3));
> > + emit_insn (gen_sdot_prodv4siv16qi (op0, op1, op2, op3));
> > emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0,
> V4SImode));
> > }
> > else
> > @@ -6420,7 +6420,7 @@ (define_expand "sdot_prodv8qi"
> > emit_move_insn (op3, CONST0_RTX (V4SImode));
> > emit_insn (gen_extendv8qiv8hi2 (op1, operands[1]));
> > emit_insn (gen_extendv8qiv8hi2 (op2, operands[2]));
> > - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
> > + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
> >
> > /* vec_perm (op0, 2, 3, 0, 1); */
> > emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78))); @@
> > -6432,7 +6432,7 @@ (define_expand "sdot_prodv8qi"
> >
> > })
> >
> > -(define_expand "udot_prodv8qi"
> > +(define_expand "udot_prodv2siv8qi"
> > [(match_operand:V2SI 0 "register_operand")
> > (match_operand:V8QI 1 "register_operand")
> > (match_operand:V8QI 2 "register_operand") @@ -6450,7 +6450,7 @@
> > (define_expand "udot_prodv8qi"
> > rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
> > rtx op0 = gen_reg_rtx (V4SImode);
> >
> > - emit_insn (gen_udot_prodv16qi (op0, op1, op2, op3));
> > + emit_insn (gen_udot_prodv4siv16qi (op0, op1, op2, op3));
> > emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0,
> V4SImode));
> > }
> > else
> > @@ -6464,7 +6464,7 @@ (define_expand "udot_prodv8qi"
> > emit_move_insn (op3, CONST0_RTX (V4SImode));
> > emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1]));
> > emit_insn (gen_zero_extendv8qiv8hi2 (op2, operands[2]));
> > - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
> > + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
> >
> > /* vec_perm (op0, 2, 3, 0, 1); */
> > emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78))); @@
> > -6476,7 +6476,7 @@ (define_expand "udot_prodv8qi"
> >
> > })
> >
> > -(define_expand "usdot_prodv4hi"
> > +(define_expand "usdot_prodv2siv4hi"
> > [(match_operand:V2SI 0 "register_operand")
> > (match_operand:V4HI 1 "register_operand")
> > (match_operand:V4HI 2 "register_operand") @@ -6492,12 +6492,12
> @@
> > (define_expand "usdot_prodv4hi"
> > rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
> > rtx op0 = gen_reg_rtx (V4SImode);
> >
> > - emit_insn (gen_usdot_prodv8hi (op0, op1, op2, op3));
> > + emit_insn (gen_usdot_prodv4siv8hi (op0, op1, op2, op3));
> > emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0,
> V4SImode));
> > DONE;
> > })
> >
> > -(define_expand "udot_prodv4hi"
> > +(define_expand "udot_prodv2siv4hi"
> > [(match_operand:V2SI 0 "register_operand")
> > (match_operand:V4HI 1 "register_operand")
> > (match_operand:V4HI 2 "register_operand") @@ -6513,12 +6513,12
> @@
> > (define_expand "udot_prodv4hi"
> > rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
> > rtx op0 = gen_reg_rtx (V4SImode);
> >
> > - emit_insn (gen_udot_prodv8hi (op0, op1, op2, op3));
> > + emit_insn (gen_udot_prodv4siv8hi (op0, op1, op2, op3));
> > emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0,
> V4SImode));
> > DONE;
> > })
> >
> > -(define_expand "sdot_prodv4hi"
> > +(define_expand "sdot_prodv2siv4hi"
> > [(match_operand:V2SI 0 "register_operand")
> > (match_operand:V4HI 1 "register_operand")
> > (match_operand:V4HI 2 "register_operand") @@ -6534,7 +6534,7 @@
> > (define_expand "sdot_prodv4hi"
> > rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
> > rtx op0 = gen_reg_rtx (V4SImode);
> >
> > - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
> > + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
> > emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0,
> V4SImode));
> > DONE;
> > })
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index
> > bda66d5e121..861b87bb50f 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -1195,6 +1195,15 @@ (define_mode_attr ssexmmmode
> > (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
> > (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
> >
> > +;; Mapping of input type to 4-way accumulated type (define_mode_attr
> > +fourwayacc
> > + [(V64QI "v16si") (V32QI "v8si") (V16QI "v4si")])
> > +
> > +;; Mapping of input type to 2-way accumulated type (define_mode_attr
> > +twowayacc
> > + [(V32HI "v16si") (V16HI "v8si") (V8HI "v4si")
> > + (V32QI "v16hi") (V16QI "v8hi")])
> > +
> > ;; Pointer size override for scalar modes (Intel asm dialect)
> > (define_mode_attr iptr
> > [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q") @@ -16712,7
> > +16721,7 @@ (define_mode_attr SDOT_PMADD_SUF (define_mode_attr
> > SDOT_VPDP_SUF
> > [(V32HI "v16si") (V16HI "v8si") (V8HI "v4si")])
> >
> > -(define_expand "sdot_prod<mode>"
> > +(define_expand "sdot_prod<twowayacc><mode>"
> > [(match_operand:<sseunpackmode> 0 "register_operand")
> > (match_operand:VI2_AVX512VNNIBW 1 "register_operand")
> > (match_operand:VI2_AVX512VNNIBW 2 "register_operand") @@ -
> 16747,7
> > +16756,7 @@ (define_expand "sdot_prod<mode>"
> >
> > ;; Normally we use widen_mul_even/odd, but combine can't quite get it
> > all ;; back together when madd is available.
> > -(define_expand "sdot_prodv4si"
> > +(define_expand "sdot_prodv2div4si"
> > [(match_operand:V2DI 0 "register_operand")
> > (match_operand:V4SI 1 "register_operand")
> > (match_operand:V4SI 2 "register_operand") @@ -30290,7 +30299,7 @@
> > (define_insn "vpshldv_<mode>_maskz_1"
> > [(set_attr ("prefix") ("evex"))
> > (set_attr "mode" "<sseinsnmode>")])
> >
> > -(define_expand "usdot_prod<mode>"
> > +(define_expand "usdot_prod<fourwayacc><mode>"
> > [(match_operand:<ssedvecmode> 0 "register_operand")
> > (match_operand:VI1_AVX512 1 "register_operand")
> > (match_operand:VI1_AVX512 2 "register_operand") @@ -30328,9
> > +30337,9 @@ (define_expand "usdot_prod<mode>"
> > rtx sum = gen_reg_rtx (<ssedvecmode>mode);
> >
> > emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode));
> > - emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo,
> > + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1,
> > + op1_lo,
> > op2_lo, sum));
> > - emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi,
> > + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2,
> > + op1_hi,
> > op2_hi, operands[3]));
> > emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2));
> > }
> > @@ -31149,7 +31158,7 @@ (define_int_attr vpdotprodtype
> > (UNSPEC_VPDPBSUD "bsud") (UNSPEC_VPDPBSUDS "bsuds")
> > (UNSPEC_VPDPBUUD "buud") (UNSPEC_VPDPBUUDS "buuds")])
> >
> > -(define_expand "sdot_prod<mode>"
> > +(define_expand "sdot_prod<fourwayacc><mode>"
> > [(match_operand:<ssedvecmode> 0 "register_operand")
> > (match_operand:VI1_AVX2 1 "register_operand")
> > (match_operand:VI1_AVX2 2 "register_operand") @@ -31185,9
> +31194,9
> > @@ (define_expand "sdot_prod<mode>"
> > rtx sum = gen_reg_rtx (<ssedvecmode>mode);
> >
> > emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode));
> > - emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo,
> > + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1,
> > + op1_lo,
> > op2_lo, sum));
> > - emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi,
> > + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2,
> > + op1_hi,
> > op2_hi, operands[3]));
> > emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2));
> > }
> > @@ -31195,7 +31204,7 @@ (define_expand "sdot_prod<mode>"
> > DONE;
> > })
> >
> > -(define_expand "sdot_prodv64qi"
> > +(define_expand "sdot_prodv16siv64qi"
> > [(match_operand:V16SI 0 "register_operand")
> > (match_operand:V64QI 1 "register_operand")
> > (match_operand:V64QI 2 "register_operand") @@ -31218,14 +31227,14
> > @@ (define_expand "sdot_prodv64qi"
> > rtx sum = gen_reg_rtx (V16SImode);
> >
> > emit_move_insn (sum, CONST0_RTX (V16SImode));
> > - emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum));
> > - emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3]));
> > + emit_insn (gen_sdot_prodv16siv32hi (res1, op1_lo, op2_lo, sum));
> > + emit_insn (gen_sdot_prodv16siv32hi (res2, op1_hi, op2_hi,
> > + operands[3]));
> >
> > emit_insn (gen_addv16si3 (operands[0], res1, res2));
> > DONE;
> > })
> >
> > -(define_expand "udot_prod<mode>"
> > +(define_expand "udot_prod<fourwayacc><mode>"
> > [(match_operand:<ssedvecmode> 0 "register_operand")
> > (match_operand:VI1_AVX2 1 "register_operand")
> > (match_operand:VI1_AVX2 2 "register_operand") @@ -31261,9
> +31270,9
> > @@ (define_expand "udot_prod<mode>"
> > rtx sum = gen_reg_rtx (<ssedvecmode>mode);
> >
> > emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode));
> > - emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo,
> > + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1,
> > + op1_lo,
> > op2_lo, sum));
> > - emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi,
> > + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2,
> > + op1_hi,
> > op2_hi, operands[3]));
> > emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2));
> > }
> > @@ -31271,7 +31280,7 @@ (define_expand "udot_prod<mode>"
> > DONE;
> > })
> >
> > -(define_expand "udot_prodv64qi"
> > +(define_expand "udot_prodv16qiv64qi"
> > [(match_operand:V16SI 0 "register_operand")
> > (match_operand:V64QI 1 "register_operand")
> > (match_operand:V64QI 2 "register_operand") @@ -31294,8 +31303,8
> @@
> > (define_expand "udot_prodv64qi"
> > rtx sum = gen_reg_rtx (V16SImode);
> >
> > emit_move_insn (sum, CONST0_RTX (V16SImode));
> > - emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum));
> > - emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3]));
> > + emit_insn (gen_sdot_prodv16siv32hi (res1, op1_lo, op2_lo, sum));
> > + emit_insn (gen_sdot_prodv16siv32hi (res2, op1_hi, op2_hi,
> > + operands[3]));
> >
> > emit_insn (gen_addv16si3 (operands[0], res1, res2));
> > DONE;
> > @@ -31401,7 +31410,7 @@ (define_int_attr vpdpwprodtype
> > (UNSPEC_VPDPWSUD "wsud") (UNSPEC_VPDPWSUDS "wsuds")
> > (UNSPEC_VPDPWUUD "wuud") (UNSPEC_VPDPWUUDS "wuuds")])
> >
> > -(define_expand "usdot_prod<mode>"
> > +(define_expand "usdot_prod<twowayacc><mode>"
> > [(match_operand:<sseunpackmode> 0 "register_operand")
> > (match_operand:VI2_AVX2 1 "register_operand")
> > (match_operand:VI2_AVX2 2 "register_operand") @@ -31419,7
> +31428,7
> > @@ (define_expand "usdot_prod<mode>"
> > DONE;
> > })
> >
> > -(define_expand "udot_prod<mode>"
> > +(define_expand "udot_prod<twowayacc><mode>"
> > [(match_operand:<sseunpackmode> 0 "register_operand")
> > (match_operand:VI2_AVX2 1 "register_operand")
> > (match_operand:VI2_AVX2 2 "register_operand")
> > --
> > 2.34.1
> >
>
>
> --
> BR,
> Hongtao
On 7/12/24 03:23, Jiang, Haochen wrote:
>> -----Original Message-----
>> From: Hongtao Liu <crazylht@gmail.com>
>> Sent: Thursday, July 11, 2024 9:45 AM
>> To: Victor Do Nascimento <victor.donascimento@arm.com>
>> Cc: gcc-patches@gcc.gnu.org; richard.sandiford@arm.com;
>> Richard.Earnshaw@arm.com
>> Subject: Re: [PATCH 05/10] i386: Fix dot_prod backend patterns for mmx and
>> sse targets
>>
>> On Wed, Jul 10, 2024 at 10:10 PM Victor Do Nascimento
>> <victor.donascimento@arm.com> wrote:
>>>
>>> Following the migration of the dot_prod optab from a direct to a
>>> conversion-type optab, ensure all back-end patterns incorporate the
>>> second machine mode into pattern names.
>> The patch LGTM. BTW you can use existing <ssedvecmodelower> instead of
>> new <fourwayacc> and <sseunpackmodelower> instead of <twowayacc>
>>>
>>> gcc/ChangeLog:
>>>
>>> * config/i386/mmx.md (usdot_prodv8qi): Deleted.
>>> (usdot_prodv2siv8qi): New.
>
> Hi Victor,
>
> I suppose all the patterns are renamed not deleted and new right?
> If that is the case, I suppose the log might be better and easier to understand
> if changed to something like:
>
> (old pattern): Renamed to ...
> (new pattern): this.
>
> Thx,
> Haochen
You're right, it's a straight-forward renaming. I will amend the
changelogs as per your suggestion.
Thanks for the tip!,
Victor
>>> (sdot_prodv8qi): Deleted.
>>> (sdot_prodv2siv8qi): New.
>>> (udot_prodv8qi): Deleted.
>>> (udot_prodv2siv8qi): New.
>>> (usdot_prodv4hi): Deleted.
>>> (usdot_prodv2siv4hi): New.
>>> (udot_prodv4hi): Deleted.
>>> (udot_prodv2siv4hi): New.
>>> (sdot_prodv4hi): Deleted.
>>> (sdot_prodv2siv4hi): New.
>>> * config/i386/sse.md (fourwayacc): New.
>>> (twowayacc): New.
>>> (sdot_prod<mode>): Deleted.
>>> (sdot_prod<twowayacc><mode>): New.
>>> (sdot_prodv4si): Deleted.
>>> (sdot_prodv2div4si): New.
>>> (usdot_prod<mode>): Deleted.
>>> (usdot_prod<fourwayacc><mode>): New.
>>> (sdot_prod<mode>): Deleted.
>>> (sdot_prod<fourwayacc><mode>): New.
>>> (sdot_prodv64qi): Deleted.
>>> (sdot_prodv16siv64qi): New.
>>> (udot_prod<mode>): Deleted.
>>> (udot_prod<fourwayacc><mode>): New.
>>> (udot_prodv64qi): Deleted.
>>> (udot_prodv16qiv64qi): New.
>>> (usdot_prod<mode>): Deleted.
>>> (usdot_prod<twowayacc><mode>): New.
>>> (udot_prod<mode>): Deleted.
>>> (udot_prod<twowayacc><mode>): New.
>>> ---
>>> gcc/config/i386/mmx.md | 30 +++++++++++++--------------
>>> gcc/config/i386/sse.md | 47 +++++++++++++++++++++++++----------------
>> -
>>> 2 files changed, 43 insertions(+), 34 deletions(-)
>>>
>>> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index
>>> 94d3a6e5692..d78739b033d 100644
>>> --- a/gcc/config/i386/mmx.md
>>> +++ b/gcc/config/i386/mmx.md
>>> @@ -6344,7 +6344,7 @@ (define_expand "usadv8qi"
>>> DONE;
>>> })
>>>
>>> -(define_expand "usdot_prodv8qi"
>>> +(define_expand "usdot_prodv2siv8qi"
>>> [(match_operand:V2SI 0 "register_operand")
>>> (match_operand:V8QI 1 "register_operand")
>>> (match_operand:V8QI 2 "register_operand") @@ -6363,7 +6363,7 @@
>>> (define_expand "usdot_prodv8qi"
>>> rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
>>> rtx op0 = gen_reg_rtx (V4SImode);
>>>
>>> - emit_insn (gen_usdot_prodv16qi (op0, op1, op2, op3));
>>> + emit_insn (gen_usdot_prodv4siv16qi (op0, op1, op2, op3));
>>> emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0,
>> V4SImode));
>>> }
>>> else
>>> @@ -6377,7 +6377,7 @@ (define_expand "usdot_prodv8qi"
>>> emit_move_insn (op3, CONST0_RTX (V4SImode));
>>> emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1]));
>>> emit_insn (gen_extendv8qiv8hi2 (op2, operands[2]));
>>> - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
>>> + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
>>>
>>> /* vec_perm (op0, 2, 3, 0, 1); */
>>> emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78))); @@
>>> -6388,7 +6388,7 @@ (define_expand "usdot_prodv8qi"
>>> DONE;
>>> })
>>>
>>> -(define_expand "sdot_prodv8qi"
>>> +(define_expand "sdot_prodv2siv8qi"
>>> [(match_operand:V2SI 0 "register_operand")
>>> (match_operand:V8QI 1 "register_operand")
>>> (match_operand:V8QI 2 "register_operand") @@ -6406,7 +6406,7 @@
>>> (define_expand "sdot_prodv8qi"
>>> rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
>>> rtx op0 = gen_reg_rtx (V4SImode);
>>>
>>> - emit_insn (gen_sdot_prodv16qi (op0, op1, op2, op3));
>>> + emit_insn (gen_sdot_prodv4siv16qi (op0, op1, op2, op3));
>>> emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0,
>> V4SImode));
>>> }
>>> else
>>> @@ -6420,7 +6420,7 @@ (define_expand "sdot_prodv8qi"
>>> emit_move_insn (op3, CONST0_RTX (V4SImode));
>>> emit_insn (gen_extendv8qiv8hi2 (op1, operands[1]));
>>> emit_insn (gen_extendv8qiv8hi2 (op2, operands[2]));
>>> - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
>>> + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
>>>
>>> /* vec_perm (op0, 2, 3, 0, 1); */
>>> emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78))); @@
>>> -6432,7 +6432,7 @@ (define_expand "sdot_prodv8qi"
>>>
>>> })
>>>
>>> -(define_expand "udot_prodv8qi"
>>> +(define_expand "udot_prodv2siv8qi"
>>> [(match_operand:V2SI 0 "register_operand")
>>> (match_operand:V8QI 1 "register_operand")
>>> (match_operand:V8QI 2 "register_operand") @@ -6450,7 +6450,7 @@
>>> (define_expand "udot_prodv8qi"
>>> rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
>>> rtx op0 = gen_reg_rtx (V4SImode);
>>>
>>> - emit_insn (gen_udot_prodv16qi (op0, op1, op2, op3));
>>> + emit_insn (gen_udot_prodv4siv16qi (op0, op1, op2, op3));
>>> emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0,
>> V4SImode));
>>> }
>>> else
>>> @@ -6464,7 +6464,7 @@ (define_expand "udot_prodv8qi"
>>> emit_move_insn (op3, CONST0_RTX (V4SImode));
>>> emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1]));
>>> emit_insn (gen_zero_extendv8qiv8hi2 (op2, operands[2]));
>>> - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
>>> + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
>>>
>>> /* vec_perm (op0, 2, 3, 0, 1); */
>>> emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78))); @@
>>> -6476,7 +6476,7 @@ (define_expand "udot_prodv8qi"
>>>
>>> })
>>>
>>> -(define_expand "usdot_prodv4hi"
>>> +(define_expand "usdot_prodv2siv4hi"
>>> [(match_operand:V2SI 0 "register_operand")
>>> (match_operand:V4HI 1 "register_operand")
>>> (match_operand:V4HI 2 "register_operand") @@ -6492,12 +6492,12
>> @@
>>> (define_expand "usdot_prodv4hi"
>>> rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
>>> rtx op0 = gen_reg_rtx (V4SImode);
>>>
>>> - emit_insn (gen_usdot_prodv8hi (op0, op1, op2, op3));
>>> + emit_insn (gen_usdot_prodv4siv8hi (op0, op1, op2, op3));
>>> emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0,
>> V4SImode));
>>> DONE;
>>> })
>>>
>>> -(define_expand "udot_prodv4hi"
>>> +(define_expand "udot_prodv2siv4hi"
>>> [(match_operand:V2SI 0 "register_operand")
>>> (match_operand:V4HI 1 "register_operand")
>>> (match_operand:V4HI 2 "register_operand") @@ -6513,12 +6513,12
>> @@
>>> (define_expand "udot_prodv4hi"
>>> rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
>>> rtx op0 = gen_reg_rtx (V4SImode);
>>>
>>> - emit_insn (gen_udot_prodv8hi (op0, op1, op2, op3));
>>> + emit_insn (gen_udot_prodv4siv8hi (op0, op1, op2, op3));
>>> emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0,
>> V4SImode));
>>> DONE;
>>> })
>>>
>>> -(define_expand "sdot_prodv4hi"
>>> +(define_expand "sdot_prodv2siv4hi"
>>> [(match_operand:V2SI 0 "register_operand")
>>> (match_operand:V4HI 1 "register_operand")
>>> (match_operand:V4HI 2 "register_operand") @@ -6534,7 +6534,7 @@
>>> (define_expand "sdot_prodv4hi"
>>> rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
>>> rtx op0 = gen_reg_rtx (V4SImode);
>>>
>>> - emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
>>> + emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
>>> emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0,
>> V4SImode));
>>> DONE;
>>> })
>>> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index
>>> bda66d5e121..861b87bb50f 100644
>>> --- a/gcc/config/i386/sse.md
>>> +++ b/gcc/config/i386/sse.md
>>> @@ -1195,6 +1195,15 @@ (define_mode_attr ssexmmmode
>>> (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
>>> (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
>>>
>>> +;; Mapping of input type to 4-way accumulated type (define_mode_attr
>>> +fourwayacc
>>> + [(V64QI "v16si") (V32QI "v8si") (V16QI "v4si")])
>>> +
>>> +;; Mapping of input type to 2-way accumulated type (define_mode_attr
>>> +twowayacc
>>> + [(V32HI "v16si") (V16HI "v8si") (V8HI "v4si")
>>> + (V32QI "v16hi") (V16QI "v8hi")])
>>> +
>>> ;; Pointer size override for scalar modes (Intel asm dialect)
>>> (define_mode_attr iptr
>>> [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q") @@ -16712,7
>>> +16721,7 @@ (define_mode_attr SDOT_PMADD_SUF (define_mode_attr
>>> SDOT_VPDP_SUF
>>> [(V32HI "v16si") (V16HI "v8si") (V8HI "v4si")])
>>>
>>> -(define_expand "sdot_prod<mode>"
>>> +(define_expand "sdot_prod<twowayacc><mode>"
>>> [(match_operand:<sseunpackmode> 0 "register_operand")
>>> (match_operand:VI2_AVX512VNNIBW 1 "register_operand")
>>> (match_operand:VI2_AVX512VNNIBW 2 "register_operand") @@ -
>> 16747,7
>>> +16756,7 @@ (define_expand "sdot_prod<mode>"
>>>
>>> ;; Normally we use widen_mul_even/odd, but combine can't quite get it
>>> all ;; back together when madd is available.
>>> -(define_expand "sdot_prodv4si"
>>> +(define_expand "sdot_prodv2div4si"
>>> [(match_operand:V2DI 0 "register_operand")
>>> (match_operand:V4SI 1 "register_operand")
>>> (match_operand:V4SI 2 "register_operand") @@ -30290,7 +30299,7 @@
>>> (define_insn "vpshldv_<mode>_maskz_1"
>>> [(set_attr ("prefix") ("evex"))
>>> (set_attr "mode" "<sseinsnmode>")])
>>>
>>> -(define_expand "usdot_prod<mode>"
>>> +(define_expand "usdot_prod<fourwayacc><mode>"
>>> [(match_operand:<ssedvecmode> 0 "register_operand")
>>> (match_operand:VI1_AVX512 1 "register_operand")
>>> (match_operand:VI1_AVX512 2 "register_operand") @@ -30328,9
>>> +30337,9 @@ (define_expand "usdot_prod<mode>"
>>> rtx sum = gen_reg_rtx (<ssedvecmode>mode);
>>>
>>> emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode));
>>> - emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo,
>>> + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1,
>>> + op1_lo,
>>> op2_lo, sum));
>>> - emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi,
>>> + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2,
>>> + op1_hi,
>>> op2_hi, operands[3]));
>>> emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2));
>>> }
>>> @@ -31149,7 +31158,7 @@ (define_int_attr vpdotprodtype
>>> (UNSPEC_VPDPBSUD "bsud") (UNSPEC_VPDPBSUDS "bsuds")
>>> (UNSPEC_VPDPBUUD "buud") (UNSPEC_VPDPBUUDS "buuds")])
>>>
>>> -(define_expand "sdot_prod<mode>"
>>> +(define_expand "sdot_prod<fourwayacc><mode>"
>>> [(match_operand:<ssedvecmode> 0 "register_operand")
>>> (match_operand:VI1_AVX2 1 "register_operand")
>>> (match_operand:VI1_AVX2 2 "register_operand") @@ -31185,9
>> +31194,9
>>> @@ (define_expand "sdot_prod<mode>"
>>> rtx sum = gen_reg_rtx (<ssedvecmode>mode);
>>>
>>> emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode));
>>> - emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo,
>>> + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1,
>>> + op1_lo,
>>> op2_lo, sum));
>>> - emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi,
>>> + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2,
>>> + op1_hi,
>>> op2_hi, operands[3]));
>>> emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2));
>>> }
>>> @@ -31195,7 +31204,7 @@ (define_expand "sdot_prod<mode>"
>>> DONE;
>>> })
>>>
>>> -(define_expand "sdot_prodv64qi"
>>> +(define_expand "sdot_prodv16siv64qi"
>>> [(match_operand:V16SI 0 "register_operand")
>>> (match_operand:V64QI 1 "register_operand")
>>> (match_operand:V64QI 2 "register_operand") @@ -31218,14 +31227,14
>>> @@ (define_expand "sdot_prodv64qi"
>>> rtx sum = gen_reg_rtx (V16SImode);
>>>
>>> emit_move_insn (sum, CONST0_RTX (V16SImode));
>>> - emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum));
>>> - emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3]));
>>> + emit_insn (gen_sdot_prodv16siv32hi (res1, op1_lo, op2_lo, sum));
>>> + emit_insn (gen_sdot_prodv16siv32hi (res2, op1_hi, op2_hi,
>>> + operands[3]));
>>>
>>> emit_insn (gen_addv16si3 (operands[0], res1, res2));
>>> DONE;
>>> })
>>>
>>> -(define_expand "udot_prod<mode>"
>>> +(define_expand "udot_prod<fourwayacc><mode>"
>>> [(match_operand:<ssedvecmode> 0 "register_operand")
>>> (match_operand:VI1_AVX2 1 "register_operand")
>>> (match_operand:VI1_AVX2 2 "register_operand") @@ -31261,9
>> +31270,9
>>> @@ (define_expand "udot_prod<mode>"
>>> rtx sum = gen_reg_rtx (<ssedvecmode>mode);
>>>
>>> emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode));
>>> - emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo,
>>> + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1,
>>> + op1_lo,
>>> op2_lo, sum));
>>> - emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi,
>>> + emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2,
>>> + op1_hi,
>>> op2_hi, operands[3]));
>>> emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2));
>>> }
>>> @@ -31271,7 +31280,7 @@ (define_expand "udot_prod<mode>"
>>> DONE;
>>> })
>>>
>>> -(define_expand "udot_prodv64qi"
>>> +(define_expand "udot_prodv16qiv64qi"
>>> [(match_operand:V16SI 0 "register_operand")
>>> (match_operand:V64QI 1 "register_operand")
>>> (match_operand:V64QI 2 "register_operand") @@ -31294,8 +31303,8
>> @@
>>> (define_expand "udot_prodv64qi"
>>> rtx sum = gen_reg_rtx (V16SImode);
>>>
>>> emit_move_insn (sum, CONST0_RTX (V16SImode));
>>> - emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum));
>>> - emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3]));
>>> + emit_insn (gen_sdot_prodv16siv32hi (res1, op1_lo, op2_lo, sum));
>>> + emit_insn (gen_sdot_prodv16siv32hi (res2, op1_hi, op2_hi,
>>> + operands[3]));
>>>
>>> emit_insn (gen_addv16si3 (operands[0], res1, res2));
>>> DONE;
>>> @@ -31401,7 +31410,7 @@ (define_int_attr vpdpwprodtype
>>> (UNSPEC_VPDPWSUD "wsud") (UNSPEC_VPDPWSUDS "wsuds")
>>> (UNSPEC_VPDPWUUD "wuud") (UNSPEC_VPDPWUUDS "wuuds")])
>>>
>>> -(define_expand "usdot_prod<mode>"
>>> +(define_expand "usdot_prod<twowayacc><mode>"
>>> [(match_operand:<sseunpackmode> 0 "register_operand")
>>> (match_operand:VI2_AVX2 1 "register_operand")
>>> (match_operand:VI2_AVX2 2 "register_operand") @@ -31419,7
>> +31428,7
>>> @@ (define_expand "usdot_prod<mode>"
>>> DONE;
>>> })
>>>
>>> -(define_expand "udot_prod<mode>"
>>> +(define_expand "udot_prod<twowayacc><mode>"
>>> [(match_operand:<sseunpackmode> 0 "register_operand")
>>> (match_operand:VI2_AVX2 1 "register_operand")
>>> (match_operand:VI2_AVX2 2 "register_operand")
>>> --
>>> 2.34.1
>>>
>>
>>
>> --
>> BR,
>> Hongtao
@@ -6344,7 +6344,7 @@ (define_expand "usadv8qi"
DONE;
})
-(define_expand "usdot_prodv8qi"
+(define_expand "usdot_prodv2siv8qi"
[(match_operand:V2SI 0 "register_operand")
(match_operand:V8QI 1 "register_operand")
(match_operand:V8QI 2 "register_operand")
@@ -6363,7 +6363,7 @@ (define_expand "usdot_prodv8qi"
rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
rtx op0 = gen_reg_rtx (V4SImode);
- emit_insn (gen_usdot_prodv16qi (op0, op1, op2, op3));
+ emit_insn (gen_usdot_prodv4siv16qi (op0, op1, op2, op3));
emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
}
else
@@ -6377,7 +6377,7 @@ (define_expand "usdot_prodv8qi"
emit_move_insn (op3, CONST0_RTX (V4SImode));
emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1]));
emit_insn (gen_extendv8qiv8hi2 (op2, operands[2]));
- emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
+ emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
/* vec_perm (op0, 2, 3, 0, 1); */
emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78)));
@@ -6388,7 +6388,7 @@ (define_expand "usdot_prodv8qi"
DONE;
})
-(define_expand "sdot_prodv8qi"
+(define_expand "sdot_prodv2siv8qi"
[(match_operand:V2SI 0 "register_operand")
(match_operand:V8QI 1 "register_operand")
(match_operand:V8QI 2 "register_operand")
@@ -6406,7 +6406,7 @@ (define_expand "sdot_prodv8qi"
rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
rtx op0 = gen_reg_rtx (V4SImode);
- emit_insn (gen_sdot_prodv16qi (op0, op1, op2, op3));
+ emit_insn (gen_sdot_prodv4siv16qi (op0, op1, op2, op3));
emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
}
else
@@ -6420,7 +6420,7 @@ (define_expand "sdot_prodv8qi"
emit_move_insn (op3, CONST0_RTX (V4SImode));
emit_insn (gen_extendv8qiv8hi2 (op1, operands[1]));
emit_insn (gen_extendv8qiv8hi2 (op2, operands[2]));
- emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
+ emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
/* vec_perm (op0, 2, 3, 0, 1); */
emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78)));
@@ -6432,7 +6432,7 @@ (define_expand "sdot_prodv8qi"
})
-(define_expand "udot_prodv8qi"
+(define_expand "udot_prodv2siv8qi"
[(match_operand:V2SI 0 "register_operand")
(match_operand:V8QI 1 "register_operand")
(match_operand:V8QI 2 "register_operand")
@@ -6450,7 +6450,7 @@ (define_expand "udot_prodv8qi"
rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
rtx op0 = gen_reg_rtx (V4SImode);
- emit_insn (gen_udot_prodv16qi (op0, op1, op2, op3));
+ emit_insn (gen_udot_prodv4siv16qi (op0, op1, op2, op3));
emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
}
else
@@ -6464,7 +6464,7 @@ (define_expand "udot_prodv8qi"
emit_move_insn (op3, CONST0_RTX (V4SImode));
emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1]));
emit_insn (gen_zero_extendv8qiv8hi2 (op2, operands[2]));
- emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
+ emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
/* vec_perm (op0, 2, 3, 0, 1); */
emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78)));
@@ -6476,7 +6476,7 @@ (define_expand "udot_prodv8qi"
})
-(define_expand "usdot_prodv4hi"
+(define_expand "usdot_prodv2siv4hi"
[(match_operand:V2SI 0 "register_operand")
(match_operand:V4HI 1 "register_operand")
(match_operand:V4HI 2 "register_operand")
@@ -6492,12 +6492,12 @@ (define_expand "usdot_prodv4hi"
rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
rtx op0 = gen_reg_rtx (V4SImode);
- emit_insn (gen_usdot_prodv8hi (op0, op1, op2, op3));
+ emit_insn (gen_usdot_prodv4siv8hi (op0, op1, op2, op3));
emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
DONE;
})
-(define_expand "udot_prodv4hi"
+(define_expand "udot_prodv2siv4hi"
[(match_operand:V2SI 0 "register_operand")
(match_operand:V4HI 1 "register_operand")
(match_operand:V4HI 2 "register_operand")
@@ -6513,12 +6513,12 @@ (define_expand "udot_prodv4hi"
rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
rtx op0 = gen_reg_rtx (V4SImode);
- emit_insn (gen_udot_prodv8hi (op0, op1, op2, op3));
+ emit_insn (gen_udot_prodv4siv8hi (op0, op1, op2, op3));
emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
DONE;
})
-(define_expand "sdot_prodv4hi"
+(define_expand "sdot_prodv2siv4hi"
[(match_operand:V2SI 0 "register_operand")
(match_operand:V4HI 1 "register_operand")
(match_operand:V4HI 2 "register_operand")
@@ -6534,7 +6534,7 @@ (define_expand "sdot_prodv4hi"
rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
rtx op0 = gen_reg_rtx (V4SImode);
- emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
+ emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
DONE;
})
@@ -1195,6 +1195,15 @@ (define_mode_attr ssexmmmode
(V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
(V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
+;; Mapping of input type to 4-way accumulated type
+(define_mode_attr fourwayacc
+ [(V64QI "v16si") (V32QI "v8si") (V16QI "v4si")])
+
+;; Mapping of input type to 2-way accumulated type
+(define_mode_attr twowayacc
+ [(V32HI "v16si") (V16HI "v8si") (V8HI "v4si")
+ (V32QI "v16hi") (V16QI "v8hi")])
+
;; Pointer size override for scalar modes (Intel asm dialect)
(define_mode_attr iptr
[(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
@@ -16712,7 +16721,7 @@ (define_mode_attr SDOT_PMADD_SUF
(define_mode_attr SDOT_VPDP_SUF
[(V32HI "v16si") (V16HI "v8si") (V8HI "v4si")])
-(define_expand "sdot_prod<mode>"
+(define_expand "sdot_prod<twowayacc><mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
(match_operand:VI2_AVX512VNNIBW 1 "register_operand")
(match_operand:VI2_AVX512VNNIBW 2 "register_operand")
@@ -16747,7 +16756,7 @@ (define_expand "sdot_prod<mode>"
;; Normally we use widen_mul_even/odd, but combine can't quite get it all
;; back together when madd is available.
-(define_expand "sdot_prodv4si"
+(define_expand "sdot_prodv2div4si"
[(match_operand:V2DI 0 "register_operand")
(match_operand:V4SI 1 "register_operand")
(match_operand:V4SI 2 "register_operand")
@@ -30290,7 +30299,7 @@ (define_insn "vpshldv_<mode>_maskz_1"
[(set_attr ("prefix") ("evex"))
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "usdot_prod<mode>"
+(define_expand "usdot_prod<fourwayacc><mode>"
[(match_operand:<ssedvecmode> 0 "register_operand")
(match_operand:VI1_AVX512 1 "register_operand")
(match_operand:VI1_AVX512 2 "register_operand")
@@ -30328,9 +30337,9 @@ (define_expand "usdot_prod<mode>"
rtx sum = gen_reg_rtx (<ssedvecmode>mode);
emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode));
- emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo,
+ emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1, op1_lo,
op2_lo, sum));
- emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi,
+ emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2, op1_hi,
op2_hi, operands[3]));
emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2));
}
@@ -31149,7 +31158,7 @@ (define_int_attr vpdotprodtype
(UNSPEC_VPDPBSUD "bsud") (UNSPEC_VPDPBSUDS "bsuds")
(UNSPEC_VPDPBUUD "buud") (UNSPEC_VPDPBUUDS "buuds")])
-(define_expand "sdot_prod<mode>"
+(define_expand "sdot_prod<fourwayacc><mode>"
[(match_operand:<ssedvecmode> 0 "register_operand")
(match_operand:VI1_AVX2 1 "register_operand")
(match_operand:VI1_AVX2 2 "register_operand")
@@ -31185,9 +31194,9 @@ (define_expand "sdot_prod<mode>"
rtx sum = gen_reg_rtx (<ssedvecmode>mode);
emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode));
- emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo,
+ emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1, op1_lo,
op2_lo, sum));
- emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi,
+ emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2, op1_hi,
op2_hi, operands[3]));
emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2));
}
@@ -31195,7 +31204,7 @@ (define_expand "sdot_prod<mode>"
DONE;
})
-(define_expand "sdot_prodv64qi"
+(define_expand "sdot_prodv16siv64qi"
[(match_operand:V16SI 0 "register_operand")
(match_operand:V64QI 1 "register_operand")
(match_operand:V64QI 2 "register_operand")
@@ -31218,14 +31227,14 @@ (define_expand "sdot_prodv64qi"
rtx sum = gen_reg_rtx (V16SImode);
emit_move_insn (sum, CONST0_RTX (V16SImode));
- emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum));
- emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3]));
+ emit_insn (gen_sdot_prodv16siv32hi (res1, op1_lo, op2_lo, sum));
+ emit_insn (gen_sdot_prodv16siv32hi (res2, op1_hi, op2_hi, operands[3]));
emit_insn (gen_addv16si3 (operands[0], res1, res2));
DONE;
})
-(define_expand "udot_prod<mode>"
+(define_expand "udot_prod<fourwayacc><mode>"
[(match_operand:<ssedvecmode> 0 "register_operand")
(match_operand:VI1_AVX2 1 "register_operand")
(match_operand:VI1_AVX2 2 "register_operand")
@@ -31261,9 +31270,9 @@ (define_expand "udot_prod<mode>"
rtx sum = gen_reg_rtx (<ssedvecmode>mode);
emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode));
- emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo,
+ emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res1, op1_lo,
op2_lo, sum));
- emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi,
+ emit_insn (gen_sdot_prod<fourwayacc><sseunpackmodelower> (res2, op1_hi,
op2_hi, operands[3]));
emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2));
}
@@ -31271,7 +31280,7 @@ (define_expand "udot_prod<mode>"
DONE;
})
-(define_expand "udot_prodv64qi"
+(define_expand "udot_prodv16qiv64qi"
[(match_operand:V16SI 0 "register_operand")
(match_operand:V64QI 1 "register_operand")
(match_operand:V64QI 2 "register_operand")
@@ -31294,8 +31303,8 @@ (define_expand "udot_prodv64qi"
rtx sum = gen_reg_rtx (V16SImode);
emit_move_insn (sum, CONST0_RTX (V16SImode));
- emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum));
- emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3]));
+ emit_insn (gen_sdot_prodv16siv32hi (res1, op1_lo, op2_lo, sum));
+ emit_insn (gen_sdot_prodv16siv32hi (res2, op1_hi, op2_hi, operands[3]));
emit_insn (gen_addv16si3 (operands[0], res1, res2));
DONE;
@@ -31401,7 +31410,7 @@ (define_int_attr vpdpwprodtype
(UNSPEC_VPDPWSUD "wsud") (UNSPEC_VPDPWSUDS "wsuds")
(UNSPEC_VPDPWUUD "wuud") (UNSPEC_VPDPWUUDS "wuuds")])
-(define_expand "usdot_prod<mode>"
+(define_expand "usdot_prod<twowayacc><mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
(match_operand:VI2_AVX2 1 "register_operand")
(match_operand:VI2_AVX2 2 "register_operand")
@@ -31419,7 +31428,7 @@ (define_expand "usdot_prod<mode>"
DONE;
})
-(define_expand "udot_prod<mode>"
+(define_expand "udot_prod<twowayacc><mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
(match_operand:VI2_AVX2 1 "register_operand")
(match_operand:VI2_AVX2 2 "register_operand")