Message ID | 20210915080951.10362-4-lili.cui@intel.com |
---|---|
State | New |
Headers | show |
Series | Update mtune=tremont | expand |
On Wed, Sep 15, 2021 at 10:10 AM <lili.cui@intel.com> wrote: > > From: "H.J. Lu" <hjl.tools@gmail.com> > > Check TARGET_USE_VECTOR_FP_CONVERTS or TARGET_USE_VECTOR_CONVERTS when > handling avx_partial_xmm_update attribute. Don't convert AVX partial > XMM register update if vector packed SSE conversion should be used. > > gcc/ > > PR target/101900 > * config/i386/i386-features.c (remove_partial_avx_dependency): > Check TARGET_USE_VECTOR_FP_CONVERTS and TARGET_USE_VECTOR_CONVERTS > before generating vxorps. > > gcc/ > > PR target/101900 > * testsuite/gcc.target/i386/pr101900-1.c: New test. > * testsuite/gcc.target/i386/pr101900-2.c: Likewise. > * testsuite/gcc.target/i386/pr101900-3.c: Likewise. > --- > gcc/config/i386/i386-features.c | 21 ++++++++++++++++++--- > gcc/testsuite/gcc.target/i386/pr101900-1.c | 18 ++++++++++++++++++ > gcc/testsuite/gcc.target/i386/pr101900-2.c | 18 ++++++++++++++++++ > gcc/testsuite/gcc.target/i386/pr101900-3.c | 19 +++++++++++++++++++ > 4 files changed, 73 insertions(+), 3 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-3.c > > diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c > index 5a99ea7c046..ae5ea02a002 100644 > --- a/gcc/config/i386/i386-features.c > +++ b/gcc/config/i386/i386-features.c > @@ -2210,15 +2210,30 @@ remove_partial_avx_dependency (void) > != AVX_PARTIAL_XMM_UPDATE_TRUE) > continue; > > - if (!v4sf_const0) > - v4sf_const0 = gen_reg_rtx (V4SFmode); > - > /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF, > SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and > vec_merge with subreg. */ > rtx src = SET_SRC (set); > rtx dest = SET_DEST (set); > machine_mode dest_mode = GET_MODE (dest); > + machine_mode src_mode; > + > + if (TARGET_USE_VECTOR_FP_CONVERTS) > + { > + src_mode = GET_MODE (XEXP (src, 0)); > + if (src_mode == E_SFmode || src_mode == E_DFmode) > + continue; > + } > + > + if (TARGET_USE_VECTOR_CONVERTS) > + { > + src_mode = GET_MODE (XEXP (src, 0)); > + if (src_mode == E_SImode || src_mode == E_DImode) > + continue; > + } > + > + if (!v4sf_const0) > + v4sf_const0 = gen_reg_rtx (V4SFmode); Please better move initialization of src_mode to the top of the new hunk, like: machine_mode src_mode = GET_MODE (XEXP (src, 0)); switch (src_mode) { case E_SFmode: case E_DFmode: if (TARGET_USE_VECTOR_FP_CONVERTS) continue; break; case E_SImode: case E_DImode: if (TARGET_USE_VECTOR_CONVERTS) continue; break; default: break; } or something like the above. Uros. > > rtx zero; > machine_mode dest_vecmode; > diff --git a/gcc/testsuite/gcc.target/i386/pr101900-1.c b/gcc/testsuite/gcc.target/i386/pr101900-1.c > new file mode 100644 > index 00000000000..0a45f8e340a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr101900-1.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=use_vector_fp_converts" } */ > + > +extern float f; > +extern double d; > +extern int i; > + > +void > +foo (void) > +{ > + d = f; > + f = i; > +} > + > +/* { dg-final { scan-assembler "vcvtps2pd" } } */ > +/* { dg-final { scan-assembler "vcvtsi2ssl" } } */ > +/* { dg-final { scan-assembler-not "vcvtss2sd" } } */ > +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr101900-2.c b/gcc/testsuite/gcc.target/i386/pr101900-2.c > new file mode 100644 > index 00000000000..c8b2d1da5ae > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr101900-2.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=use_vector_converts" } */ > + > +extern float f; > +extern double d; > +extern int i; > + > +void > +foo (void) > +{ > + d = f; > + f = i; > +} > + > +/* { dg-final { scan-assembler "vcvtss2sd" } } */ > +/* { dg-final { scan-assembler "vcvtdq2ps" } } */ > +/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */ > +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */ > diff --git a/gcc/testsuite/gcc.target/i386/pr101900-3.c b/gcc/testsuite/gcc.target/i386/pr101900-3.c > new file mode 100644 > index 00000000000..6ee565b5bd4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr101900-3.c > @@ -0,0 +1,19 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=use_vector_fp_converts,use_vector_converts" } */ > + > +extern float f; > +extern double d; > +extern int i; > + > +void > +foo (void) > +{ > + d = f; > + f = i; > +} > + > +/* { dg-final { scan-assembler "vcvtps2pd" } } */ > +/* { dg-final { scan-assembler "vcvtdq2ps" } } */ > +/* { dg-final { scan-assembler-not "vcvtss2sd" } } */ > +/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */ > +/* { dg-final { scan-assembler-not "vxorps" } } */ > -- > 2.17.1 >
> -----Original Message----- > From: Uros Bizjak <ubizjak@gmail.com> > Sent: Thursday, September 16, 2021 2:28 PM > To: Cui, Lili <lili.cui@intel.com> > Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao <hongtao.liu@intel.com>; H. J. Lu > <hjl.tools@gmail.com> > Subject: Re: [PATCH 3/4] [PATCH 3/4] x86: Properly handle > USE_VECTOR_FP_CONVERTS/USE_VECTOR_CONVERTS > > On Wed, Sep 15, 2021 at 10:10 AM <lili.cui@intel.com> wrote: > > > > From: "H.J. Lu" <hjl.tools@gmail.com> > > > > Check TARGET_USE_VECTOR_FP_CONVERTS or > TARGET_USE_VECTOR_CONVERTS when > > handling avx_partial_xmm_update attribute. Don't convert AVX partial > > XMM register update if vector packed SSE conversion should be used. > > > > gcc/ > > > > PR target/101900 > > * config/i386/i386-features.c (remove_partial_avx_dependency): > > Check TARGET_USE_VECTOR_FP_CONVERTS and > TARGET_USE_VECTOR_CONVERTS > > before generating vxorps. > > > > gcc/ > > > > PR target/101900 > > * testsuite/gcc.target/i386/pr101900-1.c: New test. > > * testsuite/gcc.target/i386/pr101900-2.c: Likewise. > > * testsuite/gcc.target/i386/pr101900-3.c: Likewise. > > --- > > gcc/config/i386/i386-features.c | 21 ++++++++++++++++++--- > > gcc/testsuite/gcc.target/i386/pr101900-1.c | 18 ++++++++++++++++++ > > gcc/testsuite/gcc.target/i386/pr101900-2.c | 18 ++++++++++++++++++ > > gcc/testsuite/gcc.target/i386/pr101900-3.c | 19 +++++++++++++++++++ > > 4 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 > > gcc/testsuite/gcc.target/i386/pr101900-1.c > > create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-2.c > > create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-3.c > > > > diff --git a/gcc/config/i386/i386-features.c > > b/gcc/config/i386/i386-features.c index 5a99ea7c046..ae5ea02a002 > > 100644 > > --- a/gcc/config/i386/i386-features.c > > +++ b/gcc/config/i386/i386-features.c > > @@ -2210,15 +2210,30 @@ remove_partial_avx_dependency (void) > > != AVX_PARTIAL_XMM_UPDATE_TRUE) > > continue; > > > > - if (!v4sf_const0) > > - v4sf_const0 = gen_reg_rtx (V4SFmode); > > - > > /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF, > > SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and > > vec_merge with subreg. */ > > rtx src = SET_SRC (set); > > rtx dest = SET_DEST (set); > > machine_mode dest_mode = GET_MODE (dest); > > + machine_mode src_mode; > > + > > + if (TARGET_USE_VECTOR_FP_CONVERTS) > > + { > > + src_mode = GET_MODE (XEXP (src, 0)); > > + if (src_mode == E_SFmode || src_mode == E_DFmode) > > + continue; > > + } > > + > > + if (TARGET_USE_VECTOR_CONVERTS) > > + { > > + src_mode = GET_MODE (XEXP (src, 0)); > > + if (src_mode == E_SImode || src_mode == E_DImode) > > + continue; > > + } > > + > > + if (!v4sf_const0) > > + v4sf_const0 = gen_reg_rtx (V4SFmode); > > Please better move initialization of src_mode to the top of the new hunk, like: > > machine_mode src_mode = GET_MODE (XEXP (src, 0)); switch (src_mode) { > case E_SFmode: > case E_DFmode: > if (TARGET_USE_VECTOR_FP_CONVERTS) > continue; > break; > case E_SImode: > case E_DImode: > if (TARGET_USE_VECTOR_CONVERTS) > continue; > break; > default: > break; > } > > or something like the above. Done, thanks for your good advice, I also rebased patch 4/4, since it is based on patch 3/4. Changed it to: + machine_mode src_mode = GET_MODE (XEXP (src, 0)); + + switch (src_mode) + { + case E_SFmode: + case E_DFmode: + if (TARGET_USE_VECTOR_FP_CONVERTS) + continue; + break; + case E_SImode: + case E_DImode: + if (TARGET_USE_VECTOR_CONVERTS) + continue; + break; + default: + break; + } + if (!v4sf_const0) + v4sf_const0 = gen_reg_rtx (V4SFmode); Thanks, Lili. > > Uros. > > > > > rtx zero; > > machine_mode dest_vecmode; > > diff --git a/gcc/testsuite/gcc.target/i386/pr101900-1.c > > b/gcc/testsuite/gcc.target/i386/pr101900-1.c > > new file mode 100644 > > index 00000000000..0a45f8e340a > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr101900-1.c > > @@ -0,0 +1,18 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -march=skylake -mfpmath=sse > > +-mtune-ctrl=use_vector_fp_converts" } */ > > + > > +extern float f; > > +extern double d; > > +extern int i; > > + > > +void > > +foo (void) > > +{ > > + d = f; > > + f = i; > > +} > > + > > +/* { dg-final { scan-assembler "vcvtps2pd" } } */ > > +/* { dg-final { scan-assembler "vcvtsi2ssl" } } */ > > +/* { dg-final { scan-assembler-not "vcvtss2sd" } } */ > > +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } > > +} */ > > diff --git a/gcc/testsuite/gcc.target/i386/pr101900-2.c > > b/gcc/testsuite/gcc.target/i386/pr101900-2.c > > new file mode 100644 > > index 00000000000..c8b2d1da5ae > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr101900-2.c > > @@ -0,0 +1,18 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -march=skylake -mfpmath=sse > > +-mtune-ctrl=use_vector_converts" } */ > > + > > +extern float f; > > +extern double d; > > +extern int i; > > + > > +void > > +foo (void) > > +{ > > + d = f; > > + f = i; > > +} > > + > > +/* { dg-final { scan-assembler "vcvtss2sd" } } */ > > +/* { dg-final { scan-assembler "vcvtdq2ps" } } */ > > +/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */ > > +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } > > +} */ > > diff --git a/gcc/testsuite/gcc.target/i386/pr101900-3.c > > b/gcc/testsuite/gcc.target/i386/pr101900-3.c > > new file mode 100644 > > index 00000000000..6ee565b5bd4 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr101900-3.c > > @@ -0,0 +1,19 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O2 -march=skylake -mfpmath=sse > > +-mtune-ctrl=use_vector_fp_converts,use_vector_converts" } */ > > + > > +extern float f; > > +extern double d; > > +extern int i; > > + > > +void > > +foo (void) > > +{ > > + d = f; > > + f = i; > > +} > > + > > +/* { dg-final { scan-assembler "vcvtps2pd" } } */ > > +/* { dg-final { scan-assembler "vcvtdq2ps" } } */ > > +/* { dg-final { scan-assembler-not "vcvtss2sd" } } */ > > +/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */ > > +/* { dg-final { scan-assembler-not "vxorps" } } */ > > -- > > 2.17.1 > >
On Fri, Sep 17, 2021 at 5:15 AM Cui, Lili <lili.cui@intel.com> wrote: > > > > -----Original Message----- > > From: Uros Bizjak <ubizjak@gmail.com> > > Sent: Thursday, September 16, 2021 2:28 PM > > To: Cui, Lili <lili.cui@intel.com> > > Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao <hongtao.liu@intel.com>; H. J. Lu > > <hjl.tools@gmail.com> > > Subject: Re: [PATCH 3/4] [PATCH 3/4] x86: Properly handle > > USE_VECTOR_FP_CONVERTS/USE_VECTOR_CONVERTS > > > > On Wed, Sep 15, 2021 at 10:10 AM <lili.cui@intel.com> wrote: > > > > > > From: "H.J. Lu" <hjl.tools@gmail.com> > > > > > > Check TARGET_USE_VECTOR_FP_CONVERTS or > > TARGET_USE_VECTOR_CONVERTS when > > > handling avx_partial_xmm_update attribute. Don't convert AVX partial > > > XMM register update if vector packed SSE conversion should be used. > > > > > > gcc/ > > > > > > PR target/101900 > > > * config/i386/i386-features.c (remove_partial_avx_dependency): > > > Check TARGET_USE_VECTOR_FP_CONVERTS and > > TARGET_USE_VECTOR_CONVERTS > > > before generating vxorps. > > > > > > gcc/ > > > > > > PR target/101900 > > > * testsuite/gcc.target/i386/pr101900-1.c: New test. > > > * testsuite/gcc.target/i386/pr101900-2.c: Likewise. > > > * testsuite/gcc.target/i386/pr101900-3.c: Likewise. > > > --- > > > gcc/config/i386/i386-features.c | 21 ++++++++++++++++++--- > > > gcc/testsuite/gcc.target/i386/pr101900-1.c | 18 ++++++++++++++++++ > > > gcc/testsuite/gcc.target/i386/pr101900-2.c | 18 ++++++++++++++++++ > > > gcc/testsuite/gcc.target/i386/pr101900-3.c | 19 +++++++++++++++++++ > > > 4 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 > > > gcc/testsuite/gcc.target/i386/pr101900-1.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-2.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-3.c > > > > > > diff --git a/gcc/config/i386/i386-features.c > > > b/gcc/config/i386/i386-features.c index 5a99ea7c046..ae5ea02a002 > > > 100644 > > > --- a/gcc/config/i386/i386-features.c > > > +++ b/gcc/config/i386/i386-features.c > > > @@ -2210,15 +2210,30 @@ remove_partial_avx_dependency (void) > > > != AVX_PARTIAL_XMM_UPDATE_TRUE) > > > continue; > > > > > > - if (!v4sf_const0) > > > - v4sf_const0 = gen_reg_rtx (V4SFmode); > > > - > > > /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF, > > > SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and > > > vec_merge with subreg. */ > > > rtx src = SET_SRC (set); > > > rtx dest = SET_DEST (set); > > > machine_mode dest_mode = GET_MODE (dest); > > > + machine_mode src_mode; > > > + > > > + if (TARGET_USE_VECTOR_FP_CONVERTS) > > > + { > > > + src_mode = GET_MODE (XEXP (src, 0)); > > > + if (src_mode == E_SFmode || src_mode == E_DFmode) > > > + continue; > > > + } > > > + > > > + if (TARGET_USE_VECTOR_CONVERTS) > > > + { > > > + src_mode = GET_MODE (XEXP (src, 0)); > > > + if (src_mode == E_SImode || src_mode == E_DImode) > > > + continue; > > > + } > > > + > > > + if (!v4sf_const0) > > > + v4sf_const0 = gen_reg_rtx (V4SFmode); > > > > Please better move initialization of src_mode to the top of the new hunk, like: > > > > machine_mode src_mode = GET_MODE (XEXP (src, 0)); switch (src_mode) { > > case E_SFmode: > > case E_DFmode: > > if (TARGET_USE_VECTOR_FP_CONVERTS) > > continue; > > break; > > case E_SImode: > > case E_DImode: > > if (TARGET_USE_VECTOR_CONVERTS) > > continue; > > break; > > default: > > break; > > } > > > > or something like the above. > > Done, thanks for your good advice, I also rebased patch 4/4, since it is based on patch 3/4. OK. Thanks, Uros. > > Changed it to: > > + machine_mode src_mode = GET_MODE (XEXP (src, 0)); > + > + switch (src_mode) > + { > + case E_SFmode: > + case E_DFmode: > + if (TARGET_USE_VECTOR_FP_CONVERTS) > + continue; > + break; > + case E_SImode: > + case E_DImode: > + if (TARGET_USE_VECTOR_CONVERTS) > + continue; > + break; > + default: > + break; > + } > + if (!v4sf_const0) > + v4sf_const0 = gen_reg_rtx (V4SFmode); > > Thanks, > Lili. > > > > > Uros. > > > > > > > > rtx zero; > > > machine_mode dest_vecmode; > > > diff --git a/gcc/testsuite/gcc.target/i386/pr101900-1.c > > > b/gcc/testsuite/gcc.target/i386/pr101900-1.c > > > new file mode 100644 > > > index 00000000000..0a45f8e340a > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.target/i386/pr101900-1.c > > > @@ -0,0 +1,18 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-options "-O2 -march=skylake -mfpmath=sse > > > +-mtune-ctrl=use_vector_fp_converts" } */ > > > + > > > +extern float f; > > > +extern double d; > > > +extern int i; > > > + > > > +void > > > +foo (void) > > > +{ > > > + d = f; > > > + f = i; > > > +} > > > + > > > +/* { dg-final { scan-assembler "vcvtps2pd" } } */ > > > +/* { dg-final { scan-assembler "vcvtsi2ssl" } } */ > > > +/* { dg-final { scan-assembler-not "vcvtss2sd" } } */ > > > +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } > > > +} */ > > > diff --git a/gcc/testsuite/gcc.target/i386/pr101900-2.c > > > b/gcc/testsuite/gcc.target/i386/pr101900-2.c > > > new file mode 100644 > > > index 00000000000..c8b2d1da5ae > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.target/i386/pr101900-2.c > > > @@ -0,0 +1,18 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-options "-O2 -march=skylake -mfpmath=sse > > > +-mtune-ctrl=use_vector_converts" } */ > > > + > > > +extern float f; > > > +extern double d; > > > +extern int i; > > > + > > > +void > > > +foo (void) > > > +{ > > > + d = f; > > > + f = i; > > > +} > > > + > > > +/* { dg-final { scan-assembler "vcvtss2sd" } } */ > > > +/* { dg-final { scan-assembler "vcvtdq2ps" } } */ > > > +/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */ > > > +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } > > > +} */ > > > diff --git a/gcc/testsuite/gcc.target/i386/pr101900-3.c > > > b/gcc/testsuite/gcc.target/i386/pr101900-3.c > > > new file mode 100644 > > > index 00000000000..6ee565b5bd4 > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.target/i386/pr101900-3.c > > > @@ -0,0 +1,19 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-options "-O2 -march=skylake -mfpmath=sse > > > +-mtune-ctrl=use_vector_fp_converts,use_vector_converts" } */ > > > + > > > +extern float f; > > > +extern double d; > > > +extern int i; > > > + > > > +void > > > +foo (void) > > > +{ > > > + d = f; > > > + f = i; > > > +} > > > + > > > +/* { dg-final { scan-assembler "vcvtps2pd" } } */ > > > +/* { dg-final { scan-assembler "vcvtdq2ps" } } */ > > > +/* { dg-final { scan-assembler-not "vcvtss2sd" } } */ > > > +/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */ > > > +/* { dg-final { scan-assembler-not "vxorps" } } */ > > > -- > > > 2.17.1 > > >
On Fri, Sep 17, 2021 at 08:35:57AM +0200, Uros Bizjak via Gcc-patches wrote: > > > On Wed, Sep 15, 2021 at 10:10 AM <lili.cui@intel.com> wrote: > > > > > > > > From: "H.J. Lu" <hjl.tools@gmail.com> > > > > > > > > Check TARGET_USE_VECTOR_FP_CONVERTS or > > > TARGET_USE_VECTOR_CONVERTS when > > > > handling avx_partial_xmm_update attribute. Don't convert AVX partial > > > > XMM register update if vector packed SSE conversion should be used. > > > > > > > > gcc/ > > > > > > > > PR target/101900 > > > > * config/i386/i386-features.c (remove_partial_avx_dependency): > > > > Check TARGET_USE_VECTOR_FP_CONVERTS and > > > TARGET_USE_VECTOR_CONVERTS > > > > before generating vxorps. > > > > > > > > gcc/ > > > > > > > > PR target/101900 > > > > * testsuite/gcc.target/i386/pr101900-1.c: New test. > > > > * testsuite/gcc.target/i386/pr101900-2.c: Likewise. > > > > * testsuite/gcc.target/i386/pr101900-3.c: Likewise. > > > > --- > > > > gcc/config/i386/i386-features.c | 21 ++++++++++++++++++--- > > > > gcc/testsuite/gcc.target/i386/pr101900-1.c | 18 ++++++++++++++++++ > > > > gcc/testsuite/gcc.target/i386/pr101900-2.c | 18 ++++++++++++++++++ > > > > gcc/testsuite/gcc.target/i386/pr101900-3.c | 19 +++++++++++++++++++ > > > > 4 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 > > > > gcc/testsuite/gcc.target/i386/pr101900-1.c > > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-2.c > > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-3.c > > > > > > > > diff --git a/gcc/config/i386/i386-features.c > > > > b/gcc/config/i386/i386-features.c index 5a99ea7c046..ae5ea02a002 > > > > 100644 > > > > --- a/gcc/config/i386/i386-features.c > > > > +++ b/gcc/config/i386/i386-features.c > > > > @@ -2210,15 +2210,30 @@ remove_partial_avx_dependency (void) > > > > != AVX_PARTIAL_XMM_UPDATE_TRUE) > > > > continue; > > > > > > > > - if (!v4sf_const0) > > > > - v4sf_const0 = gen_reg_rtx (V4SFmode); > > > > - > > > > /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF, > > > > SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and > > > > vec_merge with subreg. */ > > > > rtx src = SET_SRC (set); > > > > rtx dest = SET_DEST (set); > > > > machine_mode dest_mode = GET_MODE (dest); > > > > + machine_mode src_mode; > > > > + > > > > + if (TARGET_USE_VECTOR_FP_CONVERTS) > > > > + { > > > > + src_mode = GET_MODE (XEXP (src, 0)); > > > > + if (src_mode == E_SFmode || src_mode == E_DFmode) > > > > + continue; > > > > + } > > > > + > > > > + if (TARGET_USE_VECTOR_CONVERTS) > > > > + { > > > > + src_mode = GET_MODE (XEXP (src, 0)); > > > > + if (src_mode == E_SImode || src_mode == E_DImode) > > > > + continue; > > > > + } > > > > + > > > > + if (!v4sf_const0) > > > > + v4sf_const0 = gen_reg_rtx (V4SFmode); > > > > > > Please better move initialization of src_mode to the top of the new hunk, like: > > > > > > machine_mode src_mode = GET_MODE (XEXP (src, 0)); switch (src_mode) { > > > case E_SFmode: > > > case E_DFmode: > > > if (TARGET_USE_VECTOR_FP_CONVERTS) > > > continue; > > > break; > > > case E_SImode: > > > case E_DImode: > > > if (TARGET_USE_VECTOR_CONVERTS) > > > continue; > > > break; > > > default: > > > break; > > > } > > > > > > or something like the above. > > > > Done, thanks for your good advice, I also rebased patch 4/4, since it is based on patch 3/4. The above change broke +FAIL: gcc.target/i386/avx512f-vscalefpd-2.c (internal compiler error) +FAIL: gcc.target/i386/avx512f-vscalefpd-2.c (test for excess errors) +UNRESOLVED: gcc.target/i386/avx512f-vscalefpd-2.c compilation failed to produce executable +FAIL: gcc.target/i386/avx512f-vscalefps-2.c (internal compiler error) +FAIL: gcc.target/i386/avx512f-vscalefps-2.c (test for excess errors) +UNRESOLVED: gcc.target/i386/avx512f-vscalefps-2.c compilation failed to produce executable +FAIL: gcc.target/i386/avx512f-vscalefss-2.c (internal compiler error) +FAIL: gcc.target/i386/avx512f-vscalefss-2.c (test for excess errors) +UNRESOLVED: gcc.target/i386/avx512f-vscalefss-2.c compilation failed to produce executable +FAIL: gcc.target/i386/avx512vl-vscalefpd-2.c (internal compiler error) +FAIL: gcc.target/i386/avx512vl-vscalefpd-2.c (test for excess errors) +UNRESOLVED: gcc.target/i386/avx512vl-vscalefpd-2.c compilation failed to produce executable +FAIL: gcc.target/i386/avx512vl-vscalefps-2.c (internal compiler error) +FAIL: gcc.target/i386/avx512vl-vscalefps-2.c (test for excess errors) +UNRESOLVED: gcc.target/i386/avx512vl-vscalefps-2.c compilation failed to produce executable when configured with --enable-checking=yes,rtl,extra, the error is: during RTL pass: rpad /home/jakub/src/gcc/gcc/testsuite/gcc.target/i386/avx512f-vscalefpd-2.c:57:1: internal compiler error: RTL check: expected elt 0 type 'e' or 'u', have 'E' (rtx unspec) in remove_partial_avx_dependency, at config/i386/i386-features.c:2219 0x77541d rtl_check_failed_type2(rtx_def const*, int, int, int, char const*, int, char const*) ../../gcc/rtl.c:898 0x84e731 remove_partial_avx_dependency ../../gcc/config/i386/i386-features.c:2219 0x84e731 execute ../../gcc/config/i386/i386-features.c:2389 This is on 2219 machine_mode src_mode = GET_MODE (XEXP (src, 0)); and src is: (unspec:DF [ (mem:DF (plus:DI (reg/f:DI 149) (reg:DI 103 [ ivtmp.65 ])) [3 MEM[(double *)&src2 + ivtmp.65_65 * 1]+0 S8 A64]) (const_int 9 [0x9]) ] UNSPEC_ROUND) - whole insn (insn 55 54 56 5 (set (reg:DF 99 [ _50 ]) (unspec:DF [ (mem:DF (plus:DI (reg/f:DI 149) (reg:DI 103 [ ivtmp.65 ])) [3 MEM[(double *)&src2 + ivtmp.65_65 * 1]+0 S8 A64]) (const_int 9 [0x9]) ] UNSPEC_ROUND)) "/home/jakub/src/gcc/gcc/testsuite/gcc.target/i386/avx512f-vscalefpd-2.c":19:28 1076 {sse4_1_rounddf2} (nil)) so XEXP (src, 0) can't be used in that case. Looking at insns with avx_partial_xmm_update attribute, it seems src is either FLOAT_EXTEND/FLOAT_TRUNCATE/FLOAT/UNSIGNED_FLOAT and in that case it looks like a conversion and has different modes, or it is UNSPEC (UNSPEC_{RCP,RSQRT,ROUND}) or SQRT and in that case it doesn't. Jakub
On Sat, Sep 18, 2021 at 7:50 AM Jakub Jelinek via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > On Fri, Sep 17, 2021 at 08:35:57AM +0200, Uros Bizjak via Gcc-patches wrote: > > > > On Wed, Sep 15, 2021 at 10:10 AM <lili.cui@intel.com> wrote: > > > > > > > > > > From: "H.J. Lu" <hjl.tools@gmail.com> > > > > > > > > > > Check TARGET_USE_VECTOR_FP_CONVERTS or > > > > TARGET_USE_VECTOR_CONVERTS when > > > > > handling avx_partial_xmm_update attribute. Don't convert AVX partial > > > > > XMM register update if vector packed SSE conversion should be used. > > > > > > > > > > gcc/ > > > > > > > > > > PR target/101900 > > > > > * config/i386/i386-features.c (remove_partial_avx_dependency): > > > > > Check TARGET_USE_VECTOR_FP_CONVERTS and > > > > TARGET_USE_VECTOR_CONVERTS > > > > > before generating vxorps. > > > > > > > > > > gcc/ > > > > > > > > > > PR target/101900 > > > > > * testsuite/gcc.target/i386/pr101900-1.c: New test. > > > > > * testsuite/gcc.target/i386/pr101900-2.c: Likewise. > > > > > * testsuite/gcc.target/i386/pr101900-3.c: Likewise. > > > > > --- > > > > > gcc/config/i386/i386-features.c | 21 ++++++++++++++++++--- > > > > > gcc/testsuite/gcc.target/i386/pr101900-1.c | 18 ++++++++++++++++++ > > > > > gcc/testsuite/gcc.target/i386/pr101900-2.c | 18 ++++++++++++++++++ > > > > > gcc/testsuite/gcc.target/i386/pr101900-3.c | 19 +++++++++++++++++++ > > > > > 4 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 > > > > > gcc/testsuite/gcc.target/i386/pr101900-1.c > > > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-2.c > > > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-3.c > > > > > > > > > > diff --git a/gcc/config/i386/i386-features.c > > > > > b/gcc/config/i386/i386-features.c index 5a99ea7c046..ae5ea02a002 > > > > > 100644 > > > > > --- a/gcc/config/i386/i386-features.c > > > > > +++ b/gcc/config/i386/i386-features.c > > > > > @@ -2210,15 +2210,30 @@ remove_partial_avx_dependency (void) > > > > > != AVX_PARTIAL_XMM_UPDATE_TRUE) > > > > > continue; > > > > > > > > > > - if (!v4sf_const0) > > > > > - v4sf_const0 = gen_reg_rtx (V4SFmode); > > > > > - > > > > > /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF, > > > > > SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and > > > > > vec_merge with subreg. */ > > > > > rtx src = SET_SRC (set); > > > > > rtx dest = SET_DEST (set); > > > > > machine_mode dest_mode = GET_MODE (dest); > > > > > + machine_mode src_mode; > > > > > + > > > > > + if (TARGET_USE_VECTOR_FP_CONVERTS) > > > > > + { > > > > > + src_mode = GET_MODE (XEXP (src, 0)); > > > > > + if (src_mode == E_SFmode || src_mode == E_DFmode) > > > > > + continue; > > > > > + } > > > > > + > > > > > + if (TARGET_USE_VECTOR_CONVERTS) > > > > > + { > > > > > + src_mode = GET_MODE (XEXP (src, 0)); > > > > > + if (src_mode == E_SImode || src_mode == E_DImode) > > > > > + continue; > > > > > + } > > > > > + > > > > > + if (!v4sf_const0) > > > > > + v4sf_const0 = gen_reg_rtx (V4SFmode); > > > > > > > > Please better move initialization of src_mode to the top of the new hunk, like: > > > > > > > > machine_mode src_mode = GET_MODE (XEXP (src, 0)); switch (src_mode) { > > > > case E_SFmode: > > > > case E_DFmode: > > > > if (TARGET_USE_VECTOR_FP_CONVERTS) > > > > continue; > > > > break; > > > > case E_SImode: > > > > case E_DImode: > > > > if (TARGET_USE_VECTOR_CONVERTS) > > > > continue; > > > > break; > > > > default: > > > > break; > > > > } > > > > > > > > or something like the above. > > > > > > Done, thanks for your good advice, I also rebased patch 4/4, since it is based on patch 3/4. > > The above change broke > +FAIL: gcc.target/i386/avx512f-vscalefpd-2.c (internal compiler error) > +FAIL: gcc.target/i386/avx512f-vscalefpd-2.c (test for excess errors) > +UNRESOLVED: gcc.target/i386/avx512f-vscalefpd-2.c compilation failed to produce executable > +FAIL: gcc.target/i386/avx512f-vscalefps-2.c (internal compiler error) > +FAIL: gcc.target/i386/avx512f-vscalefps-2.c (test for excess errors) > +UNRESOLVED: gcc.target/i386/avx512f-vscalefps-2.c compilation failed to produce executable > +FAIL: gcc.target/i386/avx512f-vscalefss-2.c (internal compiler error) > +FAIL: gcc.target/i386/avx512f-vscalefss-2.c (test for excess errors) > +UNRESOLVED: gcc.target/i386/avx512f-vscalefss-2.c compilation failed to produce executable > +FAIL: gcc.target/i386/avx512vl-vscalefpd-2.c (internal compiler error) > +FAIL: gcc.target/i386/avx512vl-vscalefpd-2.c (test for excess errors) > +UNRESOLVED: gcc.target/i386/avx512vl-vscalefpd-2.c compilation failed to produce executable > +FAIL: gcc.target/i386/avx512vl-vscalefps-2.c (internal compiler error) > +FAIL: gcc.target/i386/avx512vl-vscalefps-2.c (test for excess errors) > +UNRESOLVED: gcc.target/i386/avx512vl-vscalefps-2.c compilation failed to produce executable > when configured with --enable-checking=yes,rtl,extra, the error is: > during RTL pass: rpad > /home/jakub/src/gcc/gcc/testsuite/gcc.target/i386/avx512f-vscalefpd-2.c:57:1: internal compiler error: RTL check: expected elt 0 type 'e' or 'u', have 'E' (rtx unspec) in remove_partial_avx_dependency, at config/i386/i386-features.c:2219 > 0x77541d rtl_check_failed_type2(rtx_def const*, int, int, int, char const*, int, char const*) > ../../gcc/rtl.c:898 > 0x84e731 remove_partial_avx_dependency > ../../gcc/config/i386/i386-features.c:2219 > 0x84e731 execute > ../../gcc/config/i386/i386-features.c:2389 > This is on > 2219 machine_mode src_mode = GET_MODE (XEXP (src, 0)); > and src is: > (unspec:DF [ > (mem:DF (plus:DI (reg/f:DI 149) > (reg:DI 103 [ ivtmp.65 ])) [3 MEM[(double *)&src2 + ivtmp.65_65 * 1]+0 S8 A64]) > (const_int 9 [0x9]) > ] UNSPEC_ROUND) > - whole insn > (insn 55 54 56 5 (set (reg:DF 99 [ _50 ]) > (unspec:DF [ > (mem:DF (plus:DI (reg/f:DI 149) > (reg:DI 103 [ ivtmp.65 ])) [3 MEM[(double *)&src2 + ivtmp.65_65 * 1]+0 S8 A64]) > (const_int 9 [0x9]) > ] UNSPEC_ROUND)) "/home/jakub/src/gcc/gcc/testsuite/gcc.target/i386/avx512f-vscalefpd-2.c":19:28 1076 {sse4_1_rounddf2} > (nil)) > so XEXP (src, 0) can't be used in that case. Let me fix this. > Looking at insns with avx_partial_xmm_update attribute, it seems > src is either FLOAT_EXTEND/FLOAT_TRUNCATE/FLOAT/UNSIGNED_FLOAT and > in that case it looks like a conversion and has different modes, > or it is UNSPEC (UNSPEC_{RCP,RSQRT,ROUND}) or SQRT and in that case it doesn't. Yes, i'll add comments to mention pass_rpad also handle rcp/round/sqrt/rsqrt. > > Jakub >
diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c index 5a99ea7c046..ae5ea02a002 100644 --- a/gcc/config/i386/i386-features.c +++ b/gcc/config/i386/i386-features.c @@ -2210,15 +2210,30 @@ remove_partial_avx_dependency (void) != AVX_PARTIAL_XMM_UPDATE_TRUE) continue; - if (!v4sf_const0) - v4sf_const0 = gen_reg_rtx (V4SFmode); - /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF, SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and vec_merge with subreg. */ rtx src = SET_SRC (set); rtx dest = SET_DEST (set); machine_mode dest_mode = GET_MODE (dest); + machine_mode src_mode; + + if (TARGET_USE_VECTOR_FP_CONVERTS) + { + src_mode = GET_MODE (XEXP (src, 0)); + if (src_mode == E_SFmode || src_mode == E_DFmode) + continue; + } + + if (TARGET_USE_VECTOR_CONVERTS) + { + src_mode = GET_MODE (XEXP (src, 0)); + if (src_mode == E_SImode || src_mode == E_DImode) + continue; + } + + if (!v4sf_const0) + v4sf_const0 = gen_reg_rtx (V4SFmode); rtx zero; machine_mode dest_vecmode; diff --git a/gcc/testsuite/gcc.target/i386/pr101900-1.c b/gcc/testsuite/gcc.target/i386/pr101900-1.c new file mode 100644 index 00000000000..0a45f8e340a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101900-1.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=use_vector_fp_converts" } */ + +extern float f; +extern double d; +extern int i; + +void +foo (void) +{ + d = f; + f = i; +} + +/* { dg-final { scan-assembler "vcvtps2pd" } } */ +/* { dg-final { scan-assembler "vcvtsi2ssl" } } */ +/* { dg-final { scan-assembler-not "vcvtss2sd" } } */ +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr101900-2.c b/gcc/testsuite/gcc.target/i386/pr101900-2.c new file mode 100644 index 00000000000..c8b2d1da5ae --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101900-2.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=use_vector_converts" } */ + +extern float f; +extern double d; +extern int i; + +void +foo (void) +{ + d = f; + f = i; +} + +/* { dg-final { scan-assembler "vcvtss2sd" } } */ +/* { dg-final { scan-assembler "vcvtdq2ps" } } */ +/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */ +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr101900-3.c b/gcc/testsuite/gcc.target/i386/pr101900-3.c new file mode 100644 index 00000000000..6ee565b5bd4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101900-3.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=use_vector_fp_converts,use_vector_converts" } */ + +extern float f; +extern double d; +extern int i; + +void +foo (void) +{ + d = f; + f = i; +} + +/* { dg-final { scan-assembler "vcvtps2pd" } } */ +/* { dg-final { scan-assembler "vcvtdq2ps" } } */ +/* { dg-final { scan-assembler-not "vcvtss2sd" } } */ +/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */ +/* { dg-final { scan-assembler-not "vxorps" } } */
From: "H.J. Lu" <hjl.tools@gmail.com> Check TARGET_USE_VECTOR_FP_CONVERTS or TARGET_USE_VECTOR_CONVERTS when handling avx_partial_xmm_update attribute. Don't convert AVX partial XMM register update if vector packed SSE conversion should be used. gcc/ PR target/101900 * config/i386/i386-features.c (remove_partial_avx_dependency): Check TARGET_USE_VECTOR_FP_CONVERTS and TARGET_USE_VECTOR_CONVERTS before generating vxorps. gcc/ PR target/101900 * testsuite/gcc.target/i386/pr101900-1.c: New test. * testsuite/gcc.target/i386/pr101900-2.c: Likewise. * testsuite/gcc.target/i386/pr101900-3.c: Likewise. --- gcc/config/i386/i386-features.c | 21 ++++++++++++++++++--- gcc/testsuite/gcc.target/i386/pr101900-1.c | 18 ++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr101900-2.c | 18 ++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr101900-3.c | 19 +++++++++++++++++++ 4 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-3.c