[3/4,3/4] x86: Properly handle USE_VECTOR_FP_CONVERTS/USE_VECTOR_CONVERTS

Message ID 20210915080951.10362-4-lili.cui@intel.com
State New
Headers
Series Update mtune=tremont |

Commit Message

Li, Pan2 via Gcc-patches Sept. 15, 2021, 8:09 a.m. UTC
  From: "H.J. Lu" <hjl.tools@gmail.com>

Check TARGET_USE_VECTOR_FP_CONVERTS or TARGET_USE_VECTOR_CONVERTS when
handling avx_partial_xmm_update attribute.  Don't convert AVX partial
XMM register update if vector packed SSE conversion should be used.

gcc/

	PR target/101900
	* config/i386/i386-features.c (remove_partial_avx_dependency):
	Check TARGET_USE_VECTOR_FP_CONVERTS and TARGET_USE_VECTOR_CONVERTS
	before generating vxorps.

gcc/

	PR target/101900
	* testsuite/gcc.target/i386/pr101900-1.c: New test.
	* testsuite/gcc.target/i386/pr101900-2.c: Likewise.
	* testsuite/gcc.target/i386/pr101900-3.c: Likewise.
---
 gcc/config/i386/i386-features.c            | 21 ++++++++++++++++++---
 gcc/testsuite/gcc.target/i386/pr101900-1.c | 18 ++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr101900-2.c | 18 ++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr101900-3.c | 19 +++++++++++++++++++
 4 files changed, 73 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-3.c
  

Comments

Uros Bizjak Sept. 16, 2021, 6:27 a.m. UTC | #1
On Wed, Sep 15, 2021 at 10:10 AM <lili.cui@intel.com> wrote:
>
> From: "H.J. Lu" <hjl.tools@gmail.com>
>
> Check TARGET_USE_VECTOR_FP_CONVERTS or TARGET_USE_VECTOR_CONVERTS when
> handling avx_partial_xmm_update attribute.  Don't convert AVX partial
> XMM register update if vector packed SSE conversion should be used.
>
> gcc/
>
>         PR target/101900
>         * config/i386/i386-features.c (remove_partial_avx_dependency):
>         Check TARGET_USE_VECTOR_FP_CONVERTS and TARGET_USE_VECTOR_CONVERTS
>         before generating vxorps.
>
> gcc/
>
>         PR target/101900
>         * testsuite/gcc.target/i386/pr101900-1.c: New test.
>         * testsuite/gcc.target/i386/pr101900-2.c: Likewise.
>         * testsuite/gcc.target/i386/pr101900-3.c: Likewise.
> ---
>  gcc/config/i386/i386-features.c            | 21 ++++++++++++++++++---
>  gcc/testsuite/gcc.target/i386/pr101900-1.c | 18 ++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr101900-2.c | 18 ++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr101900-3.c | 19 +++++++++++++++++++
>  4 files changed, 73 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-3.c
>
> diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c
> index 5a99ea7c046..ae5ea02a002 100644
> --- a/gcc/config/i386/i386-features.c
> +++ b/gcc/config/i386/i386-features.c
> @@ -2210,15 +2210,30 @@ remove_partial_avx_dependency (void)
>               != AVX_PARTIAL_XMM_UPDATE_TRUE)
>             continue;
>
> -         if (!v4sf_const0)
> -           v4sf_const0 = gen_reg_rtx (V4SFmode);
> -
>           /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF,
>              SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and
>              vec_merge with subreg.  */
>           rtx src = SET_SRC (set);
>           rtx dest = SET_DEST (set);
>           machine_mode dest_mode = GET_MODE (dest);
> +         machine_mode src_mode;
> +
> +         if (TARGET_USE_VECTOR_FP_CONVERTS)
> +           {
> +             src_mode = GET_MODE (XEXP (src, 0));
> +             if (src_mode == E_SFmode || src_mode == E_DFmode)
> +               continue;
> +           }
> +
> +         if (TARGET_USE_VECTOR_CONVERTS)
> +           {
> +             src_mode = GET_MODE (XEXP (src, 0));
> +             if (src_mode == E_SImode || src_mode == E_DImode)
> +               continue;
> +           }
> +
> +         if (!v4sf_const0)
> +           v4sf_const0 = gen_reg_rtx (V4SFmode);

Please better move initialization of src_mode to the top of the new hunk, like:

machine_mode src_mode = GET_MODE (XEXP (src, 0));
switch (src_mode)
{
  case E_SFmode:
  case E_DFmode:
    if (TARGET_USE_VECTOR_FP_CONVERTS)
      continue;
    break;
  case E_SImode:
  case E_DImode:
    if (TARGET_USE_VECTOR_CONVERTS)
      continue;
    break;
  default:
    break;
}

or something like the above.

Uros.

>
>           rtx zero;
>           machine_mode dest_vecmode;
> diff --git a/gcc/testsuite/gcc.target/i386/pr101900-1.c b/gcc/testsuite/gcc.target/i386/pr101900-1.c
> new file mode 100644
> index 00000000000..0a45f8e340a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr101900-1.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=use_vector_fp_converts" } */
> +
> +extern float f;
> +extern double d;
> +extern int i;
> +
> +void
> +foo (void)
> +{
> +  d = f;
> +  f = i;
> +}
> +
> +/* { dg-final { scan-assembler "vcvtps2pd" } } */
> +/* { dg-final { scan-assembler "vcvtsi2ssl" } } */
> +/* { dg-final { scan-assembler-not "vcvtss2sd" } } */
> +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr101900-2.c b/gcc/testsuite/gcc.target/i386/pr101900-2.c
> new file mode 100644
> index 00000000000..c8b2d1da5ae
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr101900-2.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=use_vector_converts" } */
> +
> +extern float f;
> +extern double d;
> +extern int i;
> +
> +void
> +foo (void)
> +{
> +  d = f;
> +  f = i;
> +}
> +
> +/* { dg-final { scan-assembler "vcvtss2sd" } } */
> +/* { dg-final { scan-assembler "vcvtdq2ps" } } */
> +/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */
> +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr101900-3.c b/gcc/testsuite/gcc.target/i386/pr101900-3.c
> new file mode 100644
> index 00000000000..6ee565b5bd4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr101900-3.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=use_vector_fp_converts,use_vector_converts" } */
> +
> +extern float f;
> +extern double d;
> +extern int i;
> +
> +void
> +foo (void)
> +{
> +  d = f;
> +  f = i;
> +}
> +
> +/* { dg-final { scan-assembler "vcvtps2pd" } } */
> +/* { dg-final { scan-assembler "vcvtdq2ps" } } */
> +/* { dg-final { scan-assembler-not "vcvtss2sd" } } */
> +/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */
> +/* { dg-final { scan-assembler-not "vxorps" } } */
> --
> 2.17.1
>
  
Li, Pan2 via Gcc-patches Sept. 17, 2021, 3:15 a.m. UTC | #2
> -----Original Message-----
> From: Uros Bizjak <ubizjak@gmail.com>
> Sent: Thursday, September 16, 2021 2:28 PM
> To: Cui, Lili <lili.cui@intel.com>
> Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao <hongtao.liu@intel.com>; H. J. Lu
> <hjl.tools@gmail.com>
> Subject: Re: [PATCH 3/4] [PATCH 3/4] x86: Properly handle
> USE_VECTOR_FP_CONVERTS/USE_VECTOR_CONVERTS
> 
> On Wed, Sep 15, 2021 at 10:10 AM <lili.cui@intel.com> wrote:
> >
> > From: "H.J. Lu" <hjl.tools@gmail.com>
> >
> > Check TARGET_USE_VECTOR_FP_CONVERTS or
> TARGET_USE_VECTOR_CONVERTS when
> > handling avx_partial_xmm_update attribute.  Don't convert AVX partial
> > XMM register update if vector packed SSE conversion should be used.
> >
> > gcc/
> >
> >         PR target/101900
> >         * config/i386/i386-features.c (remove_partial_avx_dependency):
> >         Check TARGET_USE_VECTOR_FP_CONVERTS and
> TARGET_USE_VECTOR_CONVERTS
> >         before generating vxorps.
> >
> > gcc/
> >
> >         PR target/101900
> >         * testsuite/gcc.target/i386/pr101900-1.c: New test.
> >         * testsuite/gcc.target/i386/pr101900-2.c: Likewise.
> >         * testsuite/gcc.target/i386/pr101900-3.c: Likewise.
> > ---
> >  gcc/config/i386/i386-features.c            | 21 ++++++++++++++++++---
> >  gcc/testsuite/gcc.target/i386/pr101900-1.c | 18 ++++++++++++++++++
> > gcc/testsuite/gcc.target/i386/pr101900-2.c | 18 ++++++++++++++++++
> > gcc/testsuite/gcc.target/i386/pr101900-3.c | 19 +++++++++++++++++++
> >  4 files changed, 73 insertions(+), 3 deletions(-)  create mode 100644
> > gcc/testsuite/gcc.target/i386/pr101900-1.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-2.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-3.c
> >
> > diff --git a/gcc/config/i386/i386-features.c
> > b/gcc/config/i386/i386-features.c index 5a99ea7c046..ae5ea02a002
> > 100644
> > --- a/gcc/config/i386/i386-features.c
> > +++ b/gcc/config/i386/i386-features.c
> > @@ -2210,15 +2210,30 @@ remove_partial_avx_dependency (void)
> >               != AVX_PARTIAL_XMM_UPDATE_TRUE)
> >             continue;
> >
> > -         if (!v4sf_const0)
> > -           v4sf_const0 = gen_reg_rtx (V4SFmode);
> > -
> >           /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF,
> >              SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and
> >              vec_merge with subreg.  */
> >           rtx src = SET_SRC (set);
> >           rtx dest = SET_DEST (set);
> >           machine_mode dest_mode = GET_MODE (dest);
> > +         machine_mode src_mode;
> > +
> > +         if (TARGET_USE_VECTOR_FP_CONVERTS)
> > +           {
> > +             src_mode = GET_MODE (XEXP (src, 0));
> > +             if (src_mode == E_SFmode || src_mode == E_DFmode)
> > +               continue;
> > +           }
> > +
> > +         if (TARGET_USE_VECTOR_CONVERTS)
> > +           {
> > +             src_mode = GET_MODE (XEXP (src, 0));
> > +             if (src_mode == E_SImode || src_mode == E_DImode)
> > +               continue;
> > +           }
> > +
> > +         if (!v4sf_const0)
> > +           v4sf_const0 = gen_reg_rtx (V4SFmode);
> 
> Please better move initialization of src_mode to the top of the new hunk, like:
> 
> machine_mode src_mode = GET_MODE (XEXP (src, 0)); switch (src_mode) {
>   case E_SFmode:
>   case E_DFmode:
>     if (TARGET_USE_VECTOR_FP_CONVERTS)
>       continue;
>     break;
>   case E_SImode:
>   case E_DImode:
>     if (TARGET_USE_VECTOR_CONVERTS)
>       continue;
>     break;
>   default:
>     break;
> }
> 
> or something like the above.

Done, thanks for your good advice, I also rebased patch 4/4, since it is based on patch 3/4.

Changed it to:

+         machine_mode src_mode = GET_MODE (XEXP (src, 0));
+
+         switch (src_mode)
+           {
+           case E_SFmode:
+           case E_DFmode:
+             if (TARGET_USE_VECTOR_FP_CONVERTS)
+               continue;
+             break;
+           case E_SImode:
+           case E_DImode:
+             if (TARGET_USE_VECTOR_CONVERTS)
+               continue;
+             break;
+           default:
+             break;
+           }
+         if (!v4sf_const0)
+           v4sf_const0 = gen_reg_rtx (V4SFmode);

Thanks,
Lili.

> 
> Uros.
> 
> >
> >           rtx zero;
> >           machine_mode dest_vecmode;
> > diff --git a/gcc/testsuite/gcc.target/i386/pr101900-1.c
> > b/gcc/testsuite/gcc.target/i386/pr101900-1.c
> > new file mode 100644
> > index 00000000000..0a45f8e340a
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr101900-1.c
> > @@ -0,0 +1,18 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -march=skylake -mfpmath=sse
> > +-mtune-ctrl=use_vector_fp_converts" } */
> > +
> > +extern float f;
> > +extern double d;
> > +extern int i;
> > +
> > +void
> > +foo (void)
> > +{
> > +  d = f;
> > +  f = i;
> > +}
> > +
> > +/* { dg-final { scan-assembler "vcvtps2pd" } } */
> > +/* { dg-final { scan-assembler "vcvtsi2ssl" } } */
> > +/* { dg-final { scan-assembler-not "vcvtss2sd" } } */
> > +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 }
> > +} */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr101900-2.c
> > b/gcc/testsuite/gcc.target/i386/pr101900-2.c
> > new file mode 100644
> > index 00000000000..c8b2d1da5ae
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr101900-2.c
> > @@ -0,0 +1,18 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -march=skylake -mfpmath=sse
> > +-mtune-ctrl=use_vector_converts" } */
> > +
> > +extern float f;
> > +extern double d;
> > +extern int i;
> > +
> > +void
> > +foo (void)
> > +{
> > +  d = f;
> > +  f = i;
> > +}
> > +
> > +/* { dg-final { scan-assembler "vcvtss2sd" } } */
> > +/* { dg-final { scan-assembler "vcvtdq2ps" } } */
> > +/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */
> > +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 }
> > +} */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr101900-3.c
> > b/gcc/testsuite/gcc.target/i386/pr101900-3.c
> > new file mode 100644
> > index 00000000000..6ee565b5bd4
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr101900-3.c
> > @@ -0,0 +1,19 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -march=skylake -mfpmath=sse
> > +-mtune-ctrl=use_vector_fp_converts,use_vector_converts" } */
> > +
> > +extern float f;
> > +extern double d;
> > +extern int i;
> > +
> > +void
> > +foo (void)
> > +{
> > +  d = f;
> > +  f = i;
> > +}
> > +
> > +/* { dg-final { scan-assembler "vcvtps2pd" } } */
> > +/* { dg-final { scan-assembler "vcvtdq2ps" } } */
> > +/* { dg-final { scan-assembler-not "vcvtss2sd" } } */
> > +/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */
> > +/* { dg-final { scan-assembler-not "vxorps" } } */
> > --
> > 2.17.1
> >
  
Uros Bizjak Sept. 17, 2021, 6:35 a.m. UTC | #3
On Fri, Sep 17, 2021 at 5:15 AM Cui, Lili <lili.cui@intel.com> wrote:
>
>
> > -----Original Message-----
> > From: Uros Bizjak <ubizjak@gmail.com>
> > Sent: Thursday, September 16, 2021 2:28 PM
> > To: Cui, Lili <lili.cui@intel.com>
> > Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao <hongtao.liu@intel.com>; H. J. Lu
> > <hjl.tools@gmail.com>
> > Subject: Re: [PATCH 3/4] [PATCH 3/4] x86: Properly handle
> > USE_VECTOR_FP_CONVERTS/USE_VECTOR_CONVERTS
> >
> > On Wed, Sep 15, 2021 at 10:10 AM <lili.cui@intel.com> wrote:
> > >
> > > From: "H.J. Lu" <hjl.tools@gmail.com>
> > >
> > > Check TARGET_USE_VECTOR_FP_CONVERTS or
> > TARGET_USE_VECTOR_CONVERTS when
> > > handling avx_partial_xmm_update attribute.  Don't convert AVX partial
> > > XMM register update if vector packed SSE conversion should be used.
> > >
> > > gcc/
> > >
> > >         PR target/101900
> > >         * config/i386/i386-features.c (remove_partial_avx_dependency):
> > >         Check TARGET_USE_VECTOR_FP_CONVERTS and
> > TARGET_USE_VECTOR_CONVERTS
> > >         before generating vxorps.
> > >
> > > gcc/
> > >
> > >         PR target/101900
> > >         * testsuite/gcc.target/i386/pr101900-1.c: New test.
> > >         * testsuite/gcc.target/i386/pr101900-2.c: Likewise.
> > >         * testsuite/gcc.target/i386/pr101900-3.c: Likewise.
> > > ---
> > >  gcc/config/i386/i386-features.c            | 21 ++++++++++++++++++---
> > >  gcc/testsuite/gcc.target/i386/pr101900-1.c | 18 ++++++++++++++++++
> > > gcc/testsuite/gcc.target/i386/pr101900-2.c | 18 ++++++++++++++++++
> > > gcc/testsuite/gcc.target/i386/pr101900-3.c | 19 +++++++++++++++++++
> > >  4 files changed, 73 insertions(+), 3 deletions(-)  create mode 100644
> > > gcc/testsuite/gcc.target/i386/pr101900-1.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-2.c
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-3.c
> > >
> > > diff --git a/gcc/config/i386/i386-features.c
> > > b/gcc/config/i386/i386-features.c index 5a99ea7c046..ae5ea02a002
> > > 100644
> > > --- a/gcc/config/i386/i386-features.c
> > > +++ b/gcc/config/i386/i386-features.c
> > > @@ -2210,15 +2210,30 @@ remove_partial_avx_dependency (void)
> > >               != AVX_PARTIAL_XMM_UPDATE_TRUE)
> > >             continue;
> > >
> > > -         if (!v4sf_const0)
> > > -           v4sf_const0 = gen_reg_rtx (V4SFmode);
> > > -
> > >           /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF,
> > >              SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and
> > >              vec_merge with subreg.  */
> > >           rtx src = SET_SRC (set);
> > >           rtx dest = SET_DEST (set);
> > >           machine_mode dest_mode = GET_MODE (dest);
> > > +         machine_mode src_mode;
> > > +
> > > +         if (TARGET_USE_VECTOR_FP_CONVERTS)
> > > +           {
> > > +             src_mode = GET_MODE (XEXP (src, 0));
> > > +             if (src_mode == E_SFmode || src_mode == E_DFmode)
> > > +               continue;
> > > +           }
> > > +
> > > +         if (TARGET_USE_VECTOR_CONVERTS)
> > > +           {
> > > +             src_mode = GET_MODE (XEXP (src, 0));
> > > +             if (src_mode == E_SImode || src_mode == E_DImode)
> > > +               continue;
> > > +           }
> > > +
> > > +         if (!v4sf_const0)
> > > +           v4sf_const0 = gen_reg_rtx (V4SFmode);
> >
> > Please better move initialization of src_mode to the top of the new hunk, like:
> >
> > machine_mode src_mode = GET_MODE (XEXP (src, 0)); switch (src_mode) {
> >   case E_SFmode:
> >   case E_DFmode:
> >     if (TARGET_USE_VECTOR_FP_CONVERTS)
> >       continue;
> >     break;
> >   case E_SImode:
> >   case E_DImode:
> >     if (TARGET_USE_VECTOR_CONVERTS)
> >       continue;
> >     break;
> >   default:
> >     break;
> > }
> >
> > or something like the above.
>
> Done, thanks for your good advice, I also rebased patch 4/4, since it is based on patch 3/4.

OK.

Thanks,
Uros.

>
> Changed it to:
>
> +         machine_mode src_mode = GET_MODE (XEXP (src, 0));
> +
> +         switch (src_mode)
> +           {
> +           case E_SFmode:
> +           case E_DFmode:
> +             if (TARGET_USE_VECTOR_FP_CONVERTS)
> +               continue;
> +             break;
> +           case E_SImode:
> +           case E_DImode:
> +             if (TARGET_USE_VECTOR_CONVERTS)
> +               continue;
> +             break;
> +           default:
> +             break;
> +           }
> +         if (!v4sf_const0)
> +           v4sf_const0 = gen_reg_rtx (V4SFmode);
>
> Thanks,
> Lili.
>
> >
> > Uros.
> >
> > >
> > >           rtx zero;
> > >           machine_mode dest_vecmode;
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr101900-1.c
> > > b/gcc/testsuite/gcc.target/i386/pr101900-1.c
> > > new file mode 100644
> > > index 00000000000..0a45f8e340a
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr101900-1.c
> > > @@ -0,0 +1,18 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2 -march=skylake -mfpmath=sse
> > > +-mtune-ctrl=use_vector_fp_converts" } */
> > > +
> > > +extern float f;
> > > +extern double d;
> > > +extern int i;
> > > +
> > > +void
> > > +foo (void)
> > > +{
> > > +  d = f;
> > > +  f = i;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler "vcvtps2pd" } } */
> > > +/* { dg-final { scan-assembler "vcvtsi2ssl" } } */
> > > +/* { dg-final { scan-assembler-not "vcvtss2sd" } } */
> > > +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 }
> > > +} */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr101900-2.c
> > > b/gcc/testsuite/gcc.target/i386/pr101900-2.c
> > > new file mode 100644
> > > index 00000000000..c8b2d1da5ae
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr101900-2.c
> > > @@ -0,0 +1,18 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2 -march=skylake -mfpmath=sse
> > > +-mtune-ctrl=use_vector_converts" } */
> > > +
> > > +extern float f;
> > > +extern double d;
> > > +extern int i;
> > > +
> > > +void
> > > +foo (void)
> > > +{
> > > +  d = f;
> > > +  f = i;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler "vcvtss2sd" } } */
> > > +/* { dg-final { scan-assembler "vcvtdq2ps" } } */
> > > +/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */
> > > +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 }
> > > +} */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr101900-3.c
> > > b/gcc/testsuite/gcc.target/i386/pr101900-3.c
> > > new file mode 100644
> > > index 00000000000..6ee565b5bd4
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr101900-3.c
> > > @@ -0,0 +1,19 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2 -march=skylake -mfpmath=sse
> > > +-mtune-ctrl=use_vector_fp_converts,use_vector_converts" } */
> > > +
> > > +extern float f;
> > > +extern double d;
> > > +extern int i;
> > > +
> > > +void
> > > +foo (void)
> > > +{
> > > +  d = f;
> > > +  f = i;
> > > +}
> > > +
> > > +/* { dg-final { scan-assembler "vcvtps2pd" } } */
> > > +/* { dg-final { scan-assembler "vcvtdq2ps" } } */
> > > +/* { dg-final { scan-assembler-not "vcvtss2sd" } } */
> > > +/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */
> > > +/* { dg-final { scan-assembler-not "vxorps" } } */
> > > --
> > > 2.17.1
> > >
  
Jakub Jelinek Sept. 17, 2021, 11:50 p.m. UTC | #4
On Fri, Sep 17, 2021 at 08:35:57AM +0200, Uros Bizjak via Gcc-patches wrote:
> > > On Wed, Sep 15, 2021 at 10:10 AM <lili.cui@intel.com> wrote:
> > > >
> > > > From: "H.J. Lu" <hjl.tools@gmail.com>
> > > >
> > > > Check TARGET_USE_VECTOR_FP_CONVERTS or
> > > TARGET_USE_VECTOR_CONVERTS when
> > > > handling avx_partial_xmm_update attribute.  Don't convert AVX partial
> > > > XMM register update if vector packed SSE conversion should be used.
> > > >
> > > > gcc/
> > > >
> > > >         PR target/101900
> > > >         * config/i386/i386-features.c (remove_partial_avx_dependency):
> > > >         Check TARGET_USE_VECTOR_FP_CONVERTS and
> > > TARGET_USE_VECTOR_CONVERTS
> > > >         before generating vxorps.
> > > >
> > > > gcc/
> > > >
> > > >         PR target/101900
> > > >         * testsuite/gcc.target/i386/pr101900-1.c: New test.
> > > >         * testsuite/gcc.target/i386/pr101900-2.c: Likewise.
> > > >         * testsuite/gcc.target/i386/pr101900-3.c: Likewise.
> > > > ---
> > > >  gcc/config/i386/i386-features.c            | 21 ++++++++++++++++++---
> > > >  gcc/testsuite/gcc.target/i386/pr101900-1.c | 18 ++++++++++++++++++
> > > > gcc/testsuite/gcc.target/i386/pr101900-2.c | 18 ++++++++++++++++++
> > > > gcc/testsuite/gcc.target/i386/pr101900-3.c | 19 +++++++++++++++++++
> > > >  4 files changed, 73 insertions(+), 3 deletions(-)  create mode 100644
> > > > gcc/testsuite/gcc.target/i386/pr101900-1.c
> > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-2.c
> > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-3.c
> > > >
> > > > diff --git a/gcc/config/i386/i386-features.c
> > > > b/gcc/config/i386/i386-features.c index 5a99ea7c046..ae5ea02a002
> > > > 100644
> > > > --- a/gcc/config/i386/i386-features.c
> > > > +++ b/gcc/config/i386/i386-features.c
> > > > @@ -2210,15 +2210,30 @@ remove_partial_avx_dependency (void)
> > > >               != AVX_PARTIAL_XMM_UPDATE_TRUE)
> > > >             continue;
> > > >
> > > > -         if (!v4sf_const0)
> > > > -           v4sf_const0 = gen_reg_rtx (V4SFmode);
> > > > -
> > > >           /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF,
> > > >              SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and
> > > >              vec_merge with subreg.  */
> > > >           rtx src = SET_SRC (set);
> > > >           rtx dest = SET_DEST (set);
> > > >           machine_mode dest_mode = GET_MODE (dest);
> > > > +         machine_mode src_mode;
> > > > +
> > > > +         if (TARGET_USE_VECTOR_FP_CONVERTS)
> > > > +           {
> > > > +             src_mode = GET_MODE (XEXP (src, 0));
> > > > +             if (src_mode == E_SFmode || src_mode == E_DFmode)
> > > > +               continue;
> > > > +           }
> > > > +
> > > > +         if (TARGET_USE_VECTOR_CONVERTS)
> > > > +           {
> > > > +             src_mode = GET_MODE (XEXP (src, 0));
> > > > +             if (src_mode == E_SImode || src_mode == E_DImode)
> > > > +               continue;
> > > > +           }
> > > > +
> > > > +         if (!v4sf_const0)
> > > > +           v4sf_const0 = gen_reg_rtx (V4SFmode);
> > >
> > > Please better move initialization of src_mode to the top of the new hunk, like:
> > >
> > > machine_mode src_mode = GET_MODE (XEXP (src, 0)); switch (src_mode) {
> > >   case E_SFmode:
> > >   case E_DFmode:
> > >     if (TARGET_USE_VECTOR_FP_CONVERTS)
> > >       continue;
> > >     break;
> > >   case E_SImode:
> > >   case E_DImode:
> > >     if (TARGET_USE_VECTOR_CONVERTS)
> > >       continue;
> > >     break;
> > >   default:
> > >     break;
> > > }
> > >
> > > or something like the above.
> >
> > Done, thanks for your good advice, I also rebased patch 4/4, since it is based on patch 3/4.

The above change broke
+FAIL: gcc.target/i386/avx512f-vscalefpd-2.c (internal compiler error)
+FAIL: gcc.target/i386/avx512f-vscalefpd-2.c (test for excess errors)
+UNRESOLVED: gcc.target/i386/avx512f-vscalefpd-2.c compilation failed to produce executable
+FAIL: gcc.target/i386/avx512f-vscalefps-2.c (internal compiler error)
+FAIL: gcc.target/i386/avx512f-vscalefps-2.c (test for excess errors)
+UNRESOLVED: gcc.target/i386/avx512f-vscalefps-2.c compilation failed to produce executable
+FAIL: gcc.target/i386/avx512f-vscalefss-2.c (internal compiler error)
+FAIL: gcc.target/i386/avx512f-vscalefss-2.c (test for excess errors)
+UNRESOLVED: gcc.target/i386/avx512f-vscalefss-2.c compilation failed to produce executable
+FAIL: gcc.target/i386/avx512vl-vscalefpd-2.c (internal compiler error)
+FAIL: gcc.target/i386/avx512vl-vscalefpd-2.c (test for excess errors)
+UNRESOLVED: gcc.target/i386/avx512vl-vscalefpd-2.c compilation failed to produce executable
+FAIL: gcc.target/i386/avx512vl-vscalefps-2.c (internal compiler error)
+FAIL: gcc.target/i386/avx512vl-vscalefps-2.c (test for excess errors)
+UNRESOLVED: gcc.target/i386/avx512vl-vscalefps-2.c compilation failed to produce executable
when configured with --enable-checking=yes,rtl,extra, the error is:
during RTL pass: rpad
/home/jakub/src/gcc/gcc/testsuite/gcc.target/i386/avx512f-vscalefpd-2.c:57:1: internal compiler error: RTL check: expected elt 0 type 'e' or 'u', have 'E' (rtx unspec) in remove_partial_avx_dependency, at config/i386/i386-features.c:2219
0x77541d rtl_check_failed_type2(rtx_def const*, int, int, int, char const*, int, char const*)
        ../../gcc/rtl.c:898
0x84e731 remove_partial_avx_dependency
        ../../gcc/config/i386/i386-features.c:2219
0x84e731 execute
        ../../gcc/config/i386/i386-features.c:2389
This is on
2219		  machine_mode src_mode = GET_MODE (XEXP (src, 0));
and src is:
(unspec:DF [
        (mem:DF (plus:DI (reg/f:DI 149)
                (reg:DI 103 [ ivtmp.65 ])) [3 MEM[(double *)&src2 + ivtmp.65_65 * 1]+0 S8 A64])
        (const_int 9 [0x9])
    ] UNSPEC_ROUND)
- whole insn
(insn 55 54 56 5 (set (reg:DF 99 [ _50 ])
        (unspec:DF [
                (mem:DF (plus:DI (reg/f:DI 149)
                        (reg:DI 103 [ ivtmp.65 ])) [3 MEM[(double *)&src2 + ivtmp.65_65 * 1]+0 S8 A64])
                (const_int 9 [0x9])
            ] UNSPEC_ROUND)) "/home/jakub/src/gcc/gcc/testsuite/gcc.target/i386/avx512f-vscalefpd-2.c":19:28 1076 {sse4_1_rounddf2}
     (nil))
so XEXP (src, 0) can't be used in that case.
Looking at insns with avx_partial_xmm_update attribute, it seems
src is either FLOAT_EXTEND/FLOAT_TRUNCATE/FLOAT/UNSIGNED_FLOAT and
in that case it looks like a conversion and has different modes,
or it is UNSPEC (UNSPEC_{RCP,RSQRT,ROUND}) or SQRT and in that case it doesn't.

	Jakub
  
Hongtao Liu Sept. 18, 2021, 2:25 a.m. UTC | #5
On Sat, Sep 18, 2021 at 7:50 AM Jakub Jelinek via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> On Fri, Sep 17, 2021 at 08:35:57AM +0200, Uros Bizjak via Gcc-patches wrote:
> > > > On Wed, Sep 15, 2021 at 10:10 AM <lili.cui@intel.com> wrote:
> > > > >
> > > > > From: "H.J. Lu" <hjl.tools@gmail.com>
> > > > >
> > > > > Check TARGET_USE_VECTOR_FP_CONVERTS or
> > > > TARGET_USE_VECTOR_CONVERTS when
> > > > > handling avx_partial_xmm_update attribute.  Don't convert AVX partial
> > > > > XMM register update if vector packed SSE conversion should be used.
> > > > >
> > > > > gcc/
> > > > >
> > > > >         PR target/101900
> > > > >         * config/i386/i386-features.c (remove_partial_avx_dependency):
> > > > >         Check TARGET_USE_VECTOR_FP_CONVERTS and
> > > > TARGET_USE_VECTOR_CONVERTS
> > > > >         before generating vxorps.
> > > > >
> > > > > gcc/
> > > > >
> > > > >         PR target/101900
> > > > >         * testsuite/gcc.target/i386/pr101900-1.c: New test.
> > > > >         * testsuite/gcc.target/i386/pr101900-2.c: Likewise.
> > > > >         * testsuite/gcc.target/i386/pr101900-3.c: Likewise.
> > > > > ---
> > > > >  gcc/config/i386/i386-features.c            | 21 ++++++++++++++++++---
> > > > >  gcc/testsuite/gcc.target/i386/pr101900-1.c | 18 ++++++++++++++++++
> > > > > gcc/testsuite/gcc.target/i386/pr101900-2.c | 18 ++++++++++++++++++
> > > > > gcc/testsuite/gcc.target/i386/pr101900-3.c | 19 +++++++++++++++++++
> > > > >  4 files changed, 73 insertions(+), 3 deletions(-)  create mode 100644
> > > > > gcc/testsuite/gcc.target/i386/pr101900-1.c
> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-2.c
> > > > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-3.c
> > > > >
> > > > > diff --git a/gcc/config/i386/i386-features.c
> > > > > b/gcc/config/i386/i386-features.c index 5a99ea7c046..ae5ea02a002
> > > > > 100644
> > > > > --- a/gcc/config/i386/i386-features.c
> > > > > +++ b/gcc/config/i386/i386-features.c
> > > > > @@ -2210,15 +2210,30 @@ remove_partial_avx_dependency (void)
> > > > >               != AVX_PARTIAL_XMM_UPDATE_TRUE)
> > > > >             continue;
> > > > >
> > > > > -         if (!v4sf_const0)
> > > > > -           v4sf_const0 = gen_reg_rtx (V4SFmode);
> > > > > -
> > > > >           /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF,
> > > > >              SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and
> > > > >              vec_merge with subreg.  */
> > > > >           rtx src = SET_SRC (set);
> > > > >           rtx dest = SET_DEST (set);
> > > > >           machine_mode dest_mode = GET_MODE (dest);
> > > > > +         machine_mode src_mode;
> > > > > +
> > > > > +         if (TARGET_USE_VECTOR_FP_CONVERTS)
> > > > > +           {
> > > > > +             src_mode = GET_MODE (XEXP (src, 0));
> > > > > +             if (src_mode == E_SFmode || src_mode == E_DFmode)
> > > > > +               continue;
> > > > > +           }
> > > > > +
> > > > > +         if (TARGET_USE_VECTOR_CONVERTS)
> > > > > +           {
> > > > > +             src_mode = GET_MODE (XEXP (src, 0));
> > > > > +             if (src_mode == E_SImode || src_mode == E_DImode)
> > > > > +               continue;
> > > > > +           }
> > > > > +
> > > > > +         if (!v4sf_const0)
> > > > > +           v4sf_const0 = gen_reg_rtx (V4SFmode);
> > > >
> > > > Please better move initialization of src_mode to the top of the new hunk, like:
> > > >
> > > > machine_mode src_mode = GET_MODE (XEXP (src, 0)); switch (src_mode) {
> > > >   case E_SFmode:
> > > >   case E_DFmode:
> > > >     if (TARGET_USE_VECTOR_FP_CONVERTS)
> > > >       continue;
> > > >     break;
> > > >   case E_SImode:
> > > >   case E_DImode:
> > > >     if (TARGET_USE_VECTOR_CONVERTS)
> > > >       continue;
> > > >     break;
> > > >   default:
> > > >     break;
> > > > }
> > > >
> > > > or something like the above.
> > >
> > > Done, thanks for your good advice, I also rebased patch 4/4, since it is based on patch 3/4.
>
> The above change broke
> +FAIL: gcc.target/i386/avx512f-vscalefpd-2.c (internal compiler error)
> +FAIL: gcc.target/i386/avx512f-vscalefpd-2.c (test for excess errors)
> +UNRESOLVED: gcc.target/i386/avx512f-vscalefpd-2.c compilation failed to produce executable
> +FAIL: gcc.target/i386/avx512f-vscalefps-2.c (internal compiler error)
> +FAIL: gcc.target/i386/avx512f-vscalefps-2.c (test for excess errors)
> +UNRESOLVED: gcc.target/i386/avx512f-vscalefps-2.c compilation failed to produce executable
> +FAIL: gcc.target/i386/avx512f-vscalefss-2.c (internal compiler error)
> +FAIL: gcc.target/i386/avx512f-vscalefss-2.c (test for excess errors)
> +UNRESOLVED: gcc.target/i386/avx512f-vscalefss-2.c compilation failed to produce executable
> +FAIL: gcc.target/i386/avx512vl-vscalefpd-2.c (internal compiler error)
> +FAIL: gcc.target/i386/avx512vl-vscalefpd-2.c (test for excess errors)
> +UNRESOLVED: gcc.target/i386/avx512vl-vscalefpd-2.c compilation failed to produce executable
> +FAIL: gcc.target/i386/avx512vl-vscalefps-2.c (internal compiler error)
> +FAIL: gcc.target/i386/avx512vl-vscalefps-2.c (test for excess errors)
> +UNRESOLVED: gcc.target/i386/avx512vl-vscalefps-2.c compilation failed to produce executable
> when configured with --enable-checking=yes,rtl,extra, the error is:
> during RTL pass: rpad
> /home/jakub/src/gcc/gcc/testsuite/gcc.target/i386/avx512f-vscalefpd-2.c:57:1: internal compiler error: RTL check: expected elt 0 type 'e' or 'u', have 'E' (rtx unspec) in remove_partial_avx_dependency, at config/i386/i386-features.c:2219
> 0x77541d rtl_check_failed_type2(rtx_def const*, int, int, int, char const*, int, char const*)
>         ../../gcc/rtl.c:898
> 0x84e731 remove_partial_avx_dependency
>         ../../gcc/config/i386/i386-features.c:2219
> 0x84e731 execute
>         ../../gcc/config/i386/i386-features.c:2389
> This is on
> 2219              machine_mode src_mode = GET_MODE (XEXP (src, 0));
> and src is:
> (unspec:DF [
>         (mem:DF (plus:DI (reg/f:DI 149)
>                 (reg:DI 103 [ ivtmp.65 ])) [3 MEM[(double *)&src2 + ivtmp.65_65 * 1]+0 S8 A64])
>         (const_int 9 [0x9])
>     ] UNSPEC_ROUND)
> - whole insn
> (insn 55 54 56 5 (set (reg:DF 99 [ _50 ])
>         (unspec:DF [
>                 (mem:DF (plus:DI (reg/f:DI 149)
>                         (reg:DI 103 [ ivtmp.65 ])) [3 MEM[(double *)&src2 + ivtmp.65_65 * 1]+0 S8 A64])
>                 (const_int 9 [0x9])
>             ] UNSPEC_ROUND)) "/home/jakub/src/gcc/gcc/testsuite/gcc.target/i386/avx512f-vscalefpd-2.c":19:28 1076 {sse4_1_rounddf2}
>      (nil))
> so XEXP (src, 0) can't be used in that case.
Let me fix this.
> Looking at insns with avx_partial_xmm_update attribute, it seems
> src is either FLOAT_EXTEND/FLOAT_TRUNCATE/FLOAT/UNSIGNED_FLOAT and
> in that case it looks like a conversion and has different modes,
> or it is UNSPEC (UNSPEC_{RCP,RSQRT,ROUND}) or SQRT and in that case it doesn't.
Yes, i'll add comments to mention pass_rpad also handle rcp/round/sqrt/rsqrt.
>
>         Jakub
>
  

Patch

diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c
index 5a99ea7c046..ae5ea02a002 100644
--- a/gcc/config/i386/i386-features.c
+++ b/gcc/config/i386/i386-features.c
@@ -2210,15 +2210,30 @@  remove_partial_avx_dependency (void)
 	      != AVX_PARTIAL_XMM_UPDATE_TRUE)
 	    continue;
 
-	  if (!v4sf_const0)
-	    v4sf_const0 = gen_reg_rtx (V4SFmode);
-
 	  /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF,
 	     SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and
 	     vec_merge with subreg.  */
 	  rtx src = SET_SRC (set);
 	  rtx dest = SET_DEST (set);
 	  machine_mode dest_mode = GET_MODE (dest);
+	  machine_mode src_mode;
+
+	  if (TARGET_USE_VECTOR_FP_CONVERTS)
+	    {
+	      src_mode = GET_MODE (XEXP (src, 0));
+	      if (src_mode == E_SFmode || src_mode == E_DFmode)
+		continue;
+	    }
+
+	  if (TARGET_USE_VECTOR_CONVERTS)
+	    {
+	      src_mode = GET_MODE (XEXP (src, 0));
+	      if (src_mode == E_SImode || src_mode == E_DImode)
+		continue;
+	    }
+
+	  if (!v4sf_const0)
+	    v4sf_const0 = gen_reg_rtx (V4SFmode);
 
 	  rtx zero;
 	  machine_mode dest_vecmode;
diff --git a/gcc/testsuite/gcc.target/i386/pr101900-1.c b/gcc/testsuite/gcc.target/i386/pr101900-1.c
new file mode 100644
index 00000000000..0a45f8e340a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101900-1.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=use_vector_fp_converts" } */
+
+extern float f;
+extern double d;
+extern int i;
+
+void
+foo (void)
+{
+  d = f;
+  f = i;
+}
+
+/* { dg-final { scan-assembler "vcvtps2pd" } } */
+/* { dg-final { scan-assembler "vcvtsi2ssl" } } */
+/* { dg-final { scan-assembler-not "vcvtss2sd" } } */
+/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr101900-2.c b/gcc/testsuite/gcc.target/i386/pr101900-2.c
new file mode 100644
index 00000000000..c8b2d1da5ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101900-2.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=use_vector_converts" } */
+
+extern float f;
+extern double d;
+extern int i;
+
+void
+foo (void)
+{
+  d = f;
+  f = i;
+}
+
+/* { dg-final { scan-assembler "vcvtss2sd" } } */
+/* { dg-final { scan-assembler "vcvtdq2ps" } } */
+/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */
+/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr101900-3.c b/gcc/testsuite/gcc.target/i386/pr101900-3.c
new file mode 100644
index 00000000000..6ee565b5bd4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101900-3.c
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=use_vector_fp_converts,use_vector_converts" } */
+
+extern float f;
+extern double d;
+extern int i;
+
+void
+foo (void)
+{
+  d = f;
+  f = i;
+}
+
+/* { dg-final { scan-assembler "vcvtps2pd" } } */
+/* { dg-final { scan-assembler "vcvtdq2ps" } } */
+/* { dg-final { scan-assembler-not "vcvtss2sd" } } */
+/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */
+/* { dg-final { scan-assembler-not "vxorps" } } */