Fix regression introduced by r12-5536.

Message ID 20211129013246.77321-1-hongtao.liu@intel.com
State New
Headers
Series Fix regression introduced by r12-5536. |

Commit Message

Liu, Hongtao Nov. 29, 2021, 1:32 a.m. UTC
  There're several failures reported in [1]:
1.  unsupported instruction `pextrw` for "pextrw $0, %xmm31, 16(%rax)"
%vpextrw should be used in output templates.
2. ICE in get_attr_memory for movhi_internal since some alternatives
are marked as TYPE_SSELOG.
Explicitly set memory_attr for those alternatives.

Also this patch fixs a typo and some latent bugs which are related to
moving HImode from/to sse register w/o TARGET_AVX512FP16.

For optimization issues discussed in PR102811, I'll create another patch for
it.
[1] https://gcc.gnu.org/pipermail/gcc-regression/2021-November/075893.html


Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,} and
x86_64-pc-linux-gnu{-m32\ -march=cascadelake,\ -march=cascadelake}
Ok for trunk?

gcc/ChangeLog:

	* config/i386/i386.c (ix86_secondary_reload): Without
	TARGET_SSE4_1, General register is needed to move HImode from
	sse register to memory.
	* config/i386/sse.md (*vec_extrachf): Use %vpextrw instead of
	pextrw in output templates.
	* config/i386/i386.md (movhi_internal): Ditto, also fix typo of
	MEM_P (operands[1]) and adjust memory/mode/prefix/type
	attribute for alternatives related to sse register.
---
 gcc/config/i386/i386.c  |  2 +-
 gcc/config/i386/i386.md | 44 ++++++++++++++++++++++++++++++-----------
 gcc/config/i386/sse.md  |  6 +++---
 3 files changed, 36 insertions(+), 16 deletions(-)
  

Comments

Uros Bizjak Nov. 29, 2021, 7:53 a.m. UTC | #1
On Mon, Nov 29, 2021 at 2:32 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> There're several failures reported in [1]:
> 1.  unsupported instruction `pextrw` for "pextrw $0, %xmm31, 16(%rax)"
> %vpextrw should be used in output templates.
> 2. ICE in get_attr_memory for movhi_internal since some alternatives
> are marked as TYPE_SSELOG.
> Explicitly set memory_attr for those alternatives.
>
> Also this patch fixs a typo and some latent bugs which are related to
> moving HImode from/to sse register w/o TARGET_AVX512FP16.
>
> For optimization issues discussed in PR102811, I'll create another patch for
> it.
> [1] https://gcc.gnu.org/pipermail/gcc-regression/2021-November/075893.html
>
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,} and
> x86_64-pc-linux-gnu{-m32\ -march=cascadelake,\ -march=cascadelake}
> Ok for trunk?
>
> gcc/ChangeLog:
>
>         * config/i386/i386.c (ix86_secondary_reload): Without
>         TARGET_SSE4_1, General register is needed to move HImode from
>         sse register to memory.
>         * config/i386/sse.md (*vec_extrachf): Use %vpextrw instead of
>         pextrw in output templates.
>         * config/i386/i386.md (movhi_internal): Ditto, also fix typo of
>         MEM_P (operands[1]) and adjust memory/mode/prefix/type
>         attribute for alternatives related to sse register.

OK, but please use sselog1 type instead so you don't need to introduce
the memory attribute.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386.c  |  2 +-
>  gcc/config/i386/i386.md | 44 ++++++++++++++++++++++++++++++-----------
>  gcc/config/i386/sse.md  |  6 +++---
>  3 files changed, 36 insertions(+), 16 deletions(-)
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 3dedf522c42..7cf599f57f7 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -19277,7 +19277,7 @@ ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
>      }
>
>    /* Require movement to gpr, and then store to memory.  */
> -  if (mode == HFmode
> +  if ((mode == HFmode || mode == HImode)
>        && !TARGET_SSE4_1
>        && SSE_CLASS_P (rclass)
>        && !in_p && MEM_P (x))
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 68606e57e60..2cb3e727588 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -2528,12 +2528,12 @@ (define_insn "*movhi_internal"
>      case TYPE_SSELOG:
>        if (SSE_REG_P (operands[0]))
>         return MEM_P (operands[1])
> -         ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
> -         : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
> +         ? "%vpinsrw\t{$0, %1, %0|%0, %1, 0}"
> +         : "%vpinsrw\t{$0, %k1, %0|%0, %k1, 0}";
>        else
> -       return MEM_P (operands[1])
> -         ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
> -         : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
> +       return MEM_P (operands[0])
> +         ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}"
> +         : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}";
>
>      case TYPE_MSKLOG:
>        if (operands[1] == const0_rtx)
> @@ -2557,12 +2557,14 @@ (define_insn "*movhi_internal"
>                ]
>                (const_string "*")))
>     (set (attr "type")
> -     (cond [(eq_attr "alternative" "9,10,11,12,13")
> +     (cond [(eq_attr "alternative" "9,10,12,13")
>               (if_then_else (match_test "TARGET_AVX512FP16")
>                 (const_string "ssemov")
>                 (const_string "sselog"))
>             (eq_attr "alternative" "4,5,6,7")
>               (const_string "mskmov")
> +           (eq_attr "alternative" "11")
> +             (const_string "ssemov")
>             (eq_attr "alternative" "8")
>               (const_string "msklog")
>             (match_test "optimize_function_for_size_p (cfun)")
> @@ -2579,15 +2581,33 @@ (define_insn "*movhi_internal"
>               (const_string "imovx")
>            ]
>            (const_string "imov")))
> +    (set (attr "memory")
> +        (cond [(eq_attr "alternative" "9,10")
> +                 (const_string "none")
> +               (eq_attr "alternative" "12")
> +                 (const_string "load")
> +               (eq_attr "alternative" "13")
> +                 (const_string "store")
> +               ]
> +               (const_string "*")))

Please use sselog1 type instead, and the memory attribute will be
calculated correctly.

>      (set (attr "prefix")
> -      (if_then_else (eq_attr "alternative" "4,5,6,7,8")
> -       (const_string "vex")
> -       (const_string "orig")))
> +        (cond [(eq_attr "alternative" "9,10,11,12,13")
> +                 (const_string "maybe_evex")
> +               (eq_attr "alternative" "4,5,6,7,8")
> +                 (const_string "vex")
> +              ]
> +              (const_string "orig")))
>      (set (attr "mode")
>        (cond [(eq_attr "type" "imovx")
>                (const_string "SI")
> +            (eq_attr "alternative" "9,10,12,13")
> +              (if_then_else (match_test "TARGET_AVX512FP16")
> +                (const_string "HI")
> +                (const_string "TI"))
>              (eq_attr "alternative" "11")
> -              (const_string "HF")
> +              (if_then_else (match_test "TARGET_AVX512FP16")
> +                (const_string "HF")
> +                (const_string "SF"))
>              (and (eq_attr "alternative" "1,2")
>                   (match_operand:HI 1 "aligned_operand"))
>                (const_string "SI")
> @@ -3791,9 +3811,9 @@ (define_insn "*movhf_internal"
>                ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
>                : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
>        else
> -       return MEM_P (operands[1])
> +       return MEM_P (operands[0])
>                ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
> -              : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
> +              : "pextrw\t{$0, %1, %k0|%k0, %1, 0}";
>
>      default:
>        gcc_unreachable ();
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index b109c2aa8fa..5229b23af98 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -11315,9 +11315,9 @@ (define_insn "*vec_extracthf"
>    switch (which_alternative)
>      {
>      case 0:
> -      return "vpextrw\t{%2, %1, %k0|%k0, %1, %2}";
> +      return "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}";
>      case 1:
> -      return "vpextrw\t{%2, %1, %0|%0, %1, %2}";
> +      return "%vpextrw\t{%2, %1, %0|%0, %1, %2}";
>
>      case 2:
>        operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
> @@ -11330,7 +11330,7 @@ (define_insn "*vec_extracthf"
>        gcc_unreachable ();
>     }
>  }
> -  [(set_attr "isa" "*,*,noavx,avx")
> +  [(set_attr "isa" "*,sse4,noavx,avx")
>     (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1")
>     (set_attr "prefix" "maybe_evex")
>     (set_attr "mode" "TI")])
> --
> 2.18.1
>
  
Hongtao Liu Nov. 29, 2021, 9:55 a.m. UTC | #2
On Mon, Nov 29, 2021 at 3:53 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Mon, Nov 29, 2021 at 2:32 AM liuhongt <hongtao.liu@intel.com> wrote:
> >
> > There're several failures reported in [1]:
> > 1.  unsupported instruction `pextrw` for "pextrw $0, %xmm31, 16(%rax)"
> > %vpextrw should be used in output templates.
> > 2. ICE in get_attr_memory for movhi_internal since some alternatives
> > are marked as TYPE_SSELOG.
> > Explicitly set memory_attr for those alternatives.
> >
> > Also this patch fixs a typo and some latent bugs which are related to
> > moving HImode from/to sse register w/o TARGET_AVX512FP16.
> >
> > For optimization issues discussed in PR102811, I'll create another patch for
> > it.
> > [1] https://gcc.gnu.org/pipermail/gcc-regression/2021-November/075893.html
> >
> >
> > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,} and
> > x86_64-pc-linux-gnu{-m32\ -march=cascadelake,\ -march=cascadelake}
> > Ok for trunk?
> >
> > gcc/ChangeLog:
> >
> >         * config/i386/i386.c (ix86_secondary_reload): Without
> >         TARGET_SSE4_1, General register is needed to move HImode from
> >         sse register to memory.
> >         * config/i386/sse.md (*vec_extrachf): Use %vpextrw instead of
> >         pextrw in output templates.
> >         * config/i386/i386.md (movhi_internal): Ditto, also fix typo of
> >         MEM_P (operands[1]) and adjust memory/mode/prefix/type
> >         attribute for alternatives related to sse register.
>
> OK, but please use sselog1 type instead so you don't need to introduce
> the memory attribute.
Changed, committed as r12-5573
thanks for the review.
>
> Thanks,
> Uros.
>
> > ---
> >  gcc/config/i386/i386.c  |  2 +-
> >  gcc/config/i386/i386.md | 44 ++++++++++++++++++++++++++++++-----------
> >  gcc/config/i386/sse.md  |  6 +++---
> >  3 files changed, 36 insertions(+), 16 deletions(-)
> >
> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > index 3dedf522c42..7cf599f57f7 100644
> > --- a/gcc/config/i386/i386.c
> > +++ b/gcc/config/i386/i386.c
> > @@ -19277,7 +19277,7 @@ ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
> >      }
> >
> >    /* Require movement to gpr, and then store to memory.  */
> > -  if (mode == HFmode
> > +  if ((mode == HFmode || mode == HImode)
> >        && !TARGET_SSE4_1
> >        && SSE_CLASS_P (rclass)
> >        && !in_p && MEM_P (x))
> > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> > index 68606e57e60..2cb3e727588 100644
> > --- a/gcc/config/i386/i386.md
> > +++ b/gcc/config/i386/i386.md
> > @@ -2528,12 +2528,12 @@ (define_insn "*movhi_internal"
> >      case TYPE_SSELOG:
> >        if (SSE_REG_P (operands[0]))
> >         return MEM_P (operands[1])
> > -         ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
> > -         : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
> > +         ? "%vpinsrw\t{$0, %1, %0|%0, %1, 0}"
> > +         : "%vpinsrw\t{$0, %k1, %0|%0, %k1, 0}";
> >        else
> > -       return MEM_P (operands[1])
> > -         ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
> > -         : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
> > +       return MEM_P (operands[0])
> > +         ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}"
> > +         : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}";
> >
> >      case TYPE_MSKLOG:
> >        if (operands[1] == const0_rtx)
> > @@ -2557,12 +2557,14 @@ (define_insn "*movhi_internal"
> >                ]
> >                (const_string "*")))
> >     (set (attr "type")
> > -     (cond [(eq_attr "alternative" "9,10,11,12,13")
> > +     (cond [(eq_attr "alternative" "9,10,12,13")
> >               (if_then_else (match_test "TARGET_AVX512FP16")
> >                 (const_string "ssemov")
> >                 (const_string "sselog"))
> >             (eq_attr "alternative" "4,5,6,7")
> >               (const_string "mskmov")
> > +           (eq_attr "alternative" "11")
> > +             (const_string "ssemov")
> >             (eq_attr "alternative" "8")
> >               (const_string "msklog")
> >             (match_test "optimize_function_for_size_p (cfun)")
> > @@ -2579,15 +2581,33 @@ (define_insn "*movhi_internal"
> >               (const_string "imovx")
> >            ]
> >            (const_string "imov")))
> > +    (set (attr "memory")
> > +        (cond [(eq_attr "alternative" "9,10")
> > +                 (const_string "none")
> > +               (eq_attr "alternative" "12")
> > +                 (const_string "load")
> > +               (eq_attr "alternative" "13")
> > +                 (const_string "store")
> > +               ]
> > +               (const_string "*")))
>
> Please use sselog1 type instead, and the memory attribute will be
> calculated correctly.
>
> >      (set (attr "prefix")
> > -      (if_then_else (eq_attr "alternative" "4,5,6,7,8")
> > -       (const_string "vex")
> > -       (const_string "orig")))
> > +        (cond [(eq_attr "alternative" "9,10,11,12,13")
> > +                 (const_string "maybe_evex")
> > +               (eq_attr "alternative" "4,5,6,7,8")
> > +                 (const_string "vex")
> > +              ]
> > +              (const_string "orig")))
> >      (set (attr "mode")
> >        (cond [(eq_attr "type" "imovx")
> >                (const_string "SI")
> > +            (eq_attr "alternative" "9,10,12,13")
> > +              (if_then_else (match_test "TARGET_AVX512FP16")
> > +                (const_string "HI")
> > +                (const_string "TI"))
> >              (eq_attr "alternative" "11")
> > -              (const_string "HF")
> > +              (if_then_else (match_test "TARGET_AVX512FP16")
> > +                (const_string "HF")
> > +                (const_string "SF"))
> >              (and (eq_attr "alternative" "1,2")
> >                   (match_operand:HI 1 "aligned_operand"))
> >                (const_string "SI")
> > @@ -3791,9 +3811,9 @@ (define_insn "*movhf_internal"
> >                ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
> >                : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
> >        else
> > -       return MEM_P (operands[1])
> > +       return MEM_P (operands[0])
> >                ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
> > -              : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
> > +              : "pextrw\t{$0, %1, %k0|%k0, %1, 0}";
> >
> >      default:
> >        gcc_unreachable ();
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > index b109c2aa8fa..5229b23af98 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -11315,9 +11315,9 @@ (define_insn "*vec_extracthf"
> >    switch (which_alternative)
> >      {
> >      case 0:
> > -      return "vpextrw\t{%2, %1, %k0|%k0, %1, %2}";
> > +      return "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}";
> >      case 1:
> > -      return "vpextrw\t{%2, %1, %0|%0, %1, %2}";
> > +      return "%vpextrw\t{%2, %1, %0|%0, %1, %2}";
> >
> >      case 2:
> >        operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
> > @@ -11330,7 +11330,7 @@ (define_insn "*vec_extracthf"
> >        gcc_unreachable ();
> >     }
> >  }
> > -  [(set_attr "isa" "*,*,noavx,avx")
> > +  [(set_attr "isa" "*,sse4,noavx,avx")
> >     (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1")
> >     (set_attr "prefix" "maybe_evex")
> >     (set_attr "mode" "TI")])
> > --
> > 2.18.1
> >
  
Uros Bizjak Nov. 29, 2021, 9:20 p.m. UTC | #3
On Mon, Nov 29, 2021 at 10:48 AM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Mon, Nov 29, 2021 at 3:53 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > On Mon, Nov 29, 2021 at 2:32 AM liuhongt <hongtao.liu@intel.com> wrote:
> > >
> > > There're several failures reported in [1]:
> > > 1.  unsupported instruction `pextrw` for "pextrw $0, %xmm31, 16(%rax)"
> > > %vpextrw should be used in output templates.
> > > 2. ICE in get_attr_memory for movhi_internal since some alternatives
> > > are marked as TYPE_SSELOG.
> > > Explicitly set memory_attr for those alternatives.
> > >
> > > Also this patch fixs a typo and some latent bugs which are related to
> > > moving HImode from/to sse register w/o TARGET_AVX512FP16.

Here are some more fixes:

i386: Fix and improve movhi_internal and movhf_internal some more.

An (*v,C) alternative can be added to movhi_internal to directly load
HImode constant 0 to xmm register. Also, V4SFmode moves can be used
for xmm->xmm moves instead of TImode moves when optimizing for size.
Fix invalid %vpinsrw insn template, which needs to duplicate %xmm
register for AVX targets.

Optimize GPR moves in movhf_internal in the same way as in movhi_internal.
Fix pinsrw and pextrw templates for AVX targets. Use sselog1
instead of sselog type.  Also, handle TARGET_SSE_PARTIAL_REG_DEPENDENCY
and TARGET_SSE_SPLIT_REGS targets.

2021-11-29  Uroš Bizjak  <ubizjak@gmail.com>

gcc/ChangeLog:

    PR target/102811
    * config/i386/i386.md (*movhi_internal): Introduce (*v,C) alternative.
    Do not allocate non-GPR registers.  Optimize xmm->xmm moves when
    optimizing for size.  Fix vpinsrw insn template.
    (*movhf_internal): Fix pinsrw and pextrw insn templates for
    AVX targets. Use sselog1 type instead of sselog.  Optimize GPR moves.
    Optimize xmm->xmm moves for TARGET_SSE_PARTIAL_REG_DEPENDENCY
    and TARGET_SSE_SPLIT_REGS targets.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32} w/ and
w/o -mf16c.

Pushed to master.

Uros.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index a384dae23e2..c88374c9d2b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2495,11 +2495,12 @@
 	   (symbol_ref "true")))])
 
 (define_insn "*movhi_internal"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,*k,*k ,*r,*m,*k,?r,?v,*v,*v,*m")
-	(match_operand:HI 1 "general_operand"      "r ,rn,rm,rn,*r,*km,*k,*k,CBC,v, r, v, m, v"))]
+  [(set (match_operand:HI 0 "nonimmediate_operand"
+    "=r,r,r,m ,*k,*k ,r ,m ,*k ,?r,?*v,*v,*v,*v,m")
+	(match_operand:HI 1 "general_operand"
+    "r ,n,m,rn,r ,*km,*k,*k,CBC,*v,r  ,C ,*v,m ,*v"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && ix86_hardreg_mov_ok (operands[0], operands[1])"
-
 {
   switch (get_attr_type (insn))
     {
@@ -2526,10 +2527,13 @@
       return ix86_output_ssemov (insn, operands);
 
     case TYPE_SSELOG1:
+      if (satisfies_constraint_C (operands[1]))
+	return standard_sse_constant_opcode (insn, operands);
+
       if (SSE_REG_P (operands[0]))
 	return MEM_P (operands[1])
-	  ? "%vpinsrw\t{$0, %1, %0|%0, %1, 0}"
-	  : "%vpinsrw\t{$0, %k1, %0|%0, %k1, 0}";
+	  ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}"
+	  : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}";
       else
 	return MEM_P (operands[0])
 	  ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}"
@@ -2550,23 +2554,25 @@
     }
 }
   [(set (attr "isa")
-	(cond [(eq_attr "alternative" "9,10,11,12")
+	(cond [(eq_attr "alternative" "9,10,11,12,13")
 		  (const_string "sse2")
-	       (eq_attr "alternative" "13")
+	       (eq_attr "alternative" "14")
 		  (const_string "sse4")
 	       ]
 	       (const_string "*")))
    (set (attr "type")
-     (cond [(eq_attr "alternative" "9,10,12,13")
+     (cond [(eq_attr "alternative" "4,5,6,7")
+	      (const_string "mskmov")
+	    (eq_attr "alternative" "8")
+	      (const_string "msklog")
+	    (eq_attr "alternative" "9,10,13,14")
 	      (if_then_else (match_test "TARGET_AVX512FP16")
 		(const_string "ssemov")
 		(const_string "sselog1"))
-	    (eq_attr "alternative" "4,5,6,7")
-	      (const_string "mskmov")
 	    (eq_attr "alternative" "11")
+	      (const_string "sselog1")
+	    (eq_attr "alternative" "12")
 	      (const_string "ssemov")
-	    (eq_attr "alternative" "8")
-	      (const_string "msklog")
 	    (match_test "optimize_function_for_size_p (cfun)")
 	      (const_string "imov")
 	    (and (eq_attr "alternative" "0")
@@ -2581,33 +2587,54 @@
 	      (const_string "imovx")
 	   ]
 	   (const_string "imov")))
-    (set (attr "prefix")
-	 (cond [(eq_attr "alternative" "9,10,11,12,13")
-		  (const_string "maybe_evex")
-		(eq_attr "alternative" "4,5,6,7,8")
-		  (const_string "vex")
-	       ]
-	       (const_string "orig")))
-    (set (attr "mode")
-      (cond [(eq_attr "type" "imovx")
-	       (const_string "SI")
-	     (eq_attr "alternative" "9,10,12,13")
-	       (if_then_else (match_test "TARGET_AVX512FP16")
-		 (const_string "HI")
-		 (const_string "TI"))
-	     (eq_attr "alternative" "11")
-	       (if_then_else (match_test "TARGET_AVX512FP16")
-		 (const_string "HF")
-		 (const_string "SF"))
-	     (and (eq_attr "alternative" "1,2")
-		  (match_operand:HI 1 "aligned_operand"))
-	       (const_string "SI")
-	     (and (eq_attr "alternative" "0")
-		  (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
-		       (not (match_test "TARGET_HIMODE_MATH"))))
-	       (const_string "SI")
+   (set (attr "prefix")
+	(cond [(eq_attr "alternative" "4,5,6,7,8")
+		 (const_string "vex")
+	       (eq_attr "alternative" "9,10,11,12,13,14")
+		 (const_string "maybe_evex")
+	      ]
+	      (const_string "orig")))
+   (set (attr "mode")
+     (cond [(eq_attr "alternative" "9,10,13,14")
+	      (if_then_else (match_test "TARGET_AVX512FP16")
+		(const_string "HI")
+		(const_string "TI"))
+	    (eq_attr "alternative" "11")
+	      (cond [(match_test "TARGET_AVX")
+		       (const_string "TI")
+		     (ior (not (match_test "TARGET_SSE2"))
+			  (match_test "optimize_function_for_size_p (cfun)"))
+		       (const_string "V4SF")
+		    ]
+		    (const_string "TI"))
+	    (eq_attr "alternative" "12")
+	      (cond [(match_test "TARGET_AVX512FP16")
+		       (const_string "HI")
+		     (match_test "TARGET_AVX")
+		       (const_string "TI")
+		     (ior (not (match_test "TARGET_SSE2"))
+			  (match_test "optimize_function_for_size_p (cfun)"))
+		       (const_string "V4SF")
+		    ]
+		    (const_string "TI"))
+	    (eq_attr "type" "imovx")
+	      (const_string "SI")
+	    (and (eq_attr "alternative" "1,2")
+		 (match_operand:HI 1 "aligned_operand"))
+	      (const_string "SI")
+	    (and (eq_attr "alternative" "0")
+		 (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
+		      (not (match_test "TARGET_HIMODE_MATH"))))
+	      (const_string "SI")
 	    ]
-	    (const_string "HI")))])
+	    (const_string "HI")))
+   (set (attr "preferred_for_speed")
+     (cond [(eq_attr "alternative" "9")
+	      (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
+	    (eq_attr "alternative" "10")
+	      (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
+	   ]
+	   (symbol_ref "true")))])
 
 ;; Situation is quite tricky about when to choose full sized (SImode) move
 ;; over QImode moves.  For Q_REG -> Q_REG move we use full size only for
@@ -3774,92 +3801,110 @@
 
 (define_insn "*movhf_internal"
  [(set (match_operand:HF 0 "nonimmediate_operand"
-	 "=?r,?m,v,v,?r,m,?v,v")
+	 "=?r,?r,?r,?m,v,v,?r,m,?v,v")
        (match_operand:HF 1 "general_operand"
-	 "rmF,rF,C,v, v,v, r,m"))]
+	 "r  ,F ,m ,rF,C,v, v,v,r ,m"))]
  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
   && (lra_in_progress
       || reload_completed
       || !CONST_DOUBLE_P (operands[1])
-      || (TARGET_SSE && TARGET_SSE_MATH
+      || (TARGET_SSE2
 	  && standard_sse_constant_p (operands[1], HFmode) == 1)
       || memory_operand (operands[0], HFmode))"
 {
   switch (get_attr_type (insn))
     {
-    case TYPE_IMOV:
-      return "mov{w}\t{%1, %0|%0, %1}";
-
-    case TYPE_SSELOG1:
-      return standard_sse_constant_opcode (insn, operands);
+    case TYPE_IMOVX:
+      /* movzwl is faster than movw on p2 due to partial word stalls,
+	 though not as fast as an aligned movl.  */
+      return "movz{wl|x}\t{%1, %k0|%k0, %1}";
 
     case TYPE_SSEMOV:
       return ix86_output_ssemov (insn, operands);
 
-    case TYPE_SSELOG:
+    case TYPE_SSELOG1:
+      if (satisfies_constraint_C (operands[1]))
+	return standard_sse_constant_opcode (insn, operands);
+
       if (SSE_REG_P (operands[0]))
 	return MEM_P (operands[1])
-	       ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
-	       : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
+	       ? "%vpinsrw\t{$0, %1, %d0|%d0, %1, 0}"
+	       : "%vpinsrw\t{$0, %k1, %d0|%d0, %k1, 0}";
       else
 	return MEM_P (operands[0])
-	       ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
-	       : "pextrw\t{$0, %1, %k0|%k0, %1, 0}";
+	       ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}"
+	       : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}";
 
     default:
-      gcc_unreachable ();
+      if (get_attr_mode (insn) == MODE_SI)
+	return "mov{l}\t{%k1, %k0|%k0, %k1}";
+      else
+	return "mov{w}\t{%1, %0|%0, %1}";
     }
 }
   [(set (attr "isa")
-	(cond [(eq_attr "alternative" "2,3,4,6,7")
+	(cond [(eq_attr "alternative" "4,5,6,8,9")
 		 (const_string "sse2")
-	       (eq_attr "alternative" "5")
+	       (eq_attr "alternative" "7")
 		 (const_string "sse4")
 	      ]
 	      (const_string "*")))
    (set (attr "type")
-	(cond [(eq_attr "alternative" "0,1")
-		 (const_string "imov")
-	       (eq_attr "alternative" "2")
+	(cond [(eq_attr "alternative" "4")
 		 (const_string "sselog1")
-	       (eq_attr "alternative" "4,5,6,7")
+	       (eq_attr "alternative" "5")
+		 (const_string "ssemov")
+	       (eq_attr "alternative" "6,7,8,9")
 		 (if_then_else
 		   (match_test ("TARGET_AVX512FP16"))
 		   (const_string "ssemov")
-		   (const_string "sselog"))
-	      ]
-	      (const_string "ssemov")))
-   (set (attr "memory")
-	(cond [(eq_attr "alternative" "4,6")
-		 (const_string "none")
-	       (eq_attr "alternative" "5")
-		 (const_string "store")
-	       (eq_attr "alternative" "7")
-		 (const_string "load")
-	      ]
-	      (const_string "*")))
+		   (const_string "sselog1"))
+	       (match_test "optimize_function_for_size_p (cfun)")
+		 (const_string "imov")
+	       (and (eq_attr "alternative" "0")
+		    (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
+			 (not (match_test "TARGET_HIMODE_MATH"))))
+		 (const_string "imov")
+	       (and (eq_attr "alternative" "1,2")
+		    (match_operand:HI 1 "aligned_operand"))
+		 (const_string "imov")
+	       (and (match_test "TARGET_MOVX")
+		    (eq_attr "alternative" "0,2"))
+		 (const_string "imovx")
+		 ]
+	      (const_string "imov")))
    (set (attr "prefix")
-	(cond [(eq_attr "alternative" "0,1")
-		 (const_string "orig")
+	(cond [(eq_attr "alternative" "4,5,6,7,8,9")
+		 (const_string "maybe_vex")
 	      ]
-	      (const_string "maybe_vex")))
+	      (const_string "orig")))
    (set (attr "mode")
-	(cond [(eq_attr "alternative" "0,1")
-		 (const_string "HI")
-	       (eq_attr "alternative" "2")
+	(cond [(eq_attr "alternative" "4")
 		 (const_string "V4SF")
-	       (eq_attr "alternative" "4,5,6,7")
+	       (eq_attr "alternative" "6,7,8,9")
 		 (if_then_else
 		   (match_test "TARGET_AVX512FP16")
 		   (const_string "HI")
 		   (const_string "TI"))
-	       (eq_attr "alternative" "3")
-		 (if_then_else
-		   (match_test "TARGET_AVX512FP16")
-		   (const_string "HF")
-		   (const_string "SF"))
+	       (eq_attr "alternative" "5")
+		 (cond [(match_test "TARGET_AVX512FP16")
+			  (const_string "HF")
+			(ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+			     (match_test "TARGET_SSE_SPLIT_REGS"))
+			  (const_string "V4SF")
+		       ]
+		       (const_string "SF"))
+	       (eq_attr "type" "imovx")
+		 (const_string "SI")
+	       (and (eq_attr "alternative" "1,2")
+		    (match_operand:HI 1 "aligned_operand"))
+		 (const_string "SI")
+	       (and (eq_attr "alternative" "0")
+		    (ior (not (match_test "TARGET_PARTIAL_REG_STALL"))
+			 (not (match_test "TARGET_HIMODE_MATH"))))
+		 (const_string "SI")
 	      ]
-	      (const_string "*")))])
+	      (const_string "HI")))])
 
 (define_split
   [(set (match_operand 0 "any_fp_register_operand")
  
Hongtao Liu Nov. 30, 2021, 1:31 a.m. UTC | #4
On Tue, Nov 30, 2021 at 5:21 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Mon, Nov 29, 2021 at 10:48 AM Hongtao Liu <crazylht@gmail.com> wrote:
> >
> > On Mon, Nov 29, 2021 at 3:53 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> > >
> > > On Mon, Nov 29, 2021 at 2:32 AM liuhongt <hongtao.liu@intel.com> wrote:
> > > >
> > > > There're several failures reported in [1]:
> > > > 1.  unsupported instruction `pextrw` for "pextrw $0, %xmm31, 16(%rax)"
> > > > %vpextrw should be used in output templates.
> > > > 2. ICE in get_attr_memory for movhi_internal since some alternatives
> > > > are marked as TYPE_SSELOG.
> > > > Explicitly set memory_attr for those alternatives.
> > > >
> > > > Also this patch fixs a typo and some latent bugs which are related to
> > > > moving HImode from/to sse register w/o TARGET_AVX512FP16.
>
> Here are some more fixes:
>
thanks.
> i386: Fix and improve movhi_internal and movhf_internal some more.
>
> An (*v,C) alternative can be added to movhi_internal to directly load
> HImode constant 0 to xmm register. Also, V4SFmode moves can be used
> for xmm->xmm moves instead of TImode moves when optimizing for size.
> Fix invalid %vpinsrw insn template, which needs to duplicate %xmm
> register for AVX targets.
>
> Optimize GPR moves in movhf_internal in the same way as in movhi_internal.
> Fix pinsrw and pextrw templates for AVX targets. Use sselog1
> instead of sselog type.  Also, handle TARGET_SSE_PARTIAL_REG_DEPENDENCY
> and TARGET_SSE_SPLIT_REGS targets.
>
> 2021-11-29  Uroš Bizjak  <ubizjak@gmail.com>
>
> gcc/ChangeLog:
>
>     PR target/102811
>     * config/i386/i386.md (*movhi_internal): Introduce (*v,C) alternative.
>     Do not allocate non-GPR registers.  Optimize xmm->xmm moves when
>     optimizing for size.  Fix vpinsrw insn template.
>     (*movhf_internal): Fix pinsrw and pextrw insn templates for
>     AVX targets. Use sselog1 type instead of sselog.  Optimize GPR moves.
>     Optimize xmm->xmm moves for TARGET_SSE_PARTIAL_REG_DEPENDENCY
>     and TARGET_SSE_SPLIT_REGS targets.
>
> Bootstrapped and regression tested on x86_64-linux-gnu {,-m32} w/ and
> w/o -mf16c.
>
> Pushed to master.
>
> Uros.
  

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 3dedf522c42..7cf599f57f7 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19277,7 +19277,7 @@  ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
     }
 
   /* Require movement to gpr, and then store to memory.  */
-  if (mode == HFmode
+  if ((mode == HFmode || mode == HImode)
       && !TARGET_SSE4_1
       && SSE_CLASS_P (rclass)
       && !in_p && MEM_P (x))
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 68606e57e60..2cb3e727588 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2528,12 +2528,12 @@  (define_insn "*movhi_internal"
     case TYPE_SSELOG:
       if (SSE_REG_P (operands[0]))
 	return MEM_P (operands[1])
-	  ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
-	  : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
+	  ? "%vpinsrw\t{$0, %1, %0|%0, %1, 0}"
+	  : "%vpinsrw\t{$0, %k1, %0|%0, %k1, 0}";
       else
-	return MEM_P (operands[1])
-	  ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
-	  : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
+	return MEM_P (operands[0])
+	  ? "%vpextrw\t{$0, %1, %0|%0, %1, 0}"
+	  : "%vpextrw\t{$0, %1, %k0|%k0, %1, 0}";
 
     case TYPE_MSKLOG:
       if (operands[1] == const0_rtx)
@@ -2557,12 +2557,14 @@  (define_insn "*movhi_internal"
 	       ]
 	       (const_string "*")))
    (set (attr "type")
-     (cond [(eq_attr "alternative" "9,10,11,12,13")
+     (cond [(eq_attr "alternative" "9,10,12,13")
 	      (if_then_else (match_test "TARGET_AVX512FP16")
 		(const_string "ssemov")
 		(const_string "sselog"))
 	    (eq_attr "alternative" "4,5,6,7")
 	      (const_string "mskmov")
+	    (eq_attr "alternative" "11")
+	      (const_string "ssemov")
 	    (eq_attr "alternative" "8")
 	      (const_string "msklog")
 	    (match_test "optimize_function_for_size_p (cfun)")
@@ -2579,15 +2581,33 @@  (define_insn "*movhi_internal"
 	      (const_string "imovx")
 	   ]
 	   (const_string "imov")))
+    (set (attr "memory")
+	 (cond [(eq_attr "alternative" "9,10")
+		  (const_string "none")
+		(eq_attr "alternative" "12")
+		  (const_string "load")
+		(eq_attr "alternative" "13")
+		  (const_string "store")
+		]
+		(const_string "*")))
     (set (attr "prefix")
-      (if_then_else (eq_attr "alternative" "4,5,6,7,8")
-	(const_string "vex")
-	(const_string "orig")))
+	 (cond [(eq_attr "alternative" "9,10,11,12,13")
+		  (const_string "maybe_evex")
+		(eq_attr "alternative" "4,5,6,7,8")
+		  (const_string "vex")
+	       ]
+	       (const_string "orig")))
     (set (attr "mode")
       (cond [(eq_attr "type" "imovx")
 	       (const_string "SI")
+	     (eq_attr "alternative" "9,10,12,13")
+	       (if_then_else (match_test "TARGET_AVX512FP16")
+		 (const_string "HI")
+		 (const_string "TI"))
 	     (eq_attr "alternative" "11")
-	       (const_string "HF")
+	       (if_then_else (match_test "TARGET_AVX512FP16")
+		 (const_string "HF")
+		 (const_string "SF"))
 	     (and (eq_attr "alternative" "1,2")
 		  (match_operand:HI 1 "aligned_operand"))
 	       (const_string "SI")
@@ -3791,9 +3811,9 @@  (define_insn "*movhf_internal"
 	       ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
 	       : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
       else
-	return MEM_P (operands[1])
+	return MEM_P (operands[0])
 	       ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
-	       : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
+	       : "pextrw\t{$0, %1, %k0|%k0, %1, 0}";
 
     default:
       gcc_unreachable ();
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index b109c2aa8fa..5229b23af98 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -11315,9 +11315,9 @@  (define_insn "*vec_extracthf"
   switch (which_alternative)
     {
     case 0:
-      return "vpextrw\t{%2, %1, %k0|%k0, %1, %2}";
+      return "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}";
     case 1:
-      return "vpextrw\t{%2, %1, %0|%0, %1, %2}";
+      return "%vpextrw\t{%2, %1, %0|%0, %1, %2}";
 
     case 2:
       operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
@@ -11330,7 +11330,7 @@  (define_insn "*vec_extracthf"
       gcc_unreachable ();
    }
 }
-  [(set_attr "isa" "*,*,noavx,avx")
+  [(set_attr "isa" "*,sse4,noavx,avx")
    (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1")
    (set_attr "prefix" "maybe_evex")
    (set_attr "mode" "TI")])