Support vector conversion for AVX512 vcvtudq2pd/vcvttps2udq/vcvttpd2udq.

Message ID 20230330014456.1425596-1-hongtao.liu@intel.com
State New
Headers
Series Support vector conversion for AVX512 vcvtudq2pd/vcvttps2udq/vcvttpd2udq. |

Commit Message

Liu, Hongtao March 30, 2023, 1:44 a.m. UTC
  There's some typo for the standard pattern name for unsigned_{float,fix},
it should be floatunsmn2/fixuns_truncmn2, not ufloatmn2/ufix_truncmn2
in current trunk, the patch fix the typo.

Also vcvttps2udq is available under AVX512VL, so it can be generated
directly instead of being emulated via vcvttps2dq.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}
Ok for GCC14 stage1{or maybe for trunk)?

gcc/ChangeLog:

	PR target/85048
	* config/i386/sse.md (floatuns<si2dfmodelower><mode>2):
	Generate vcvtudq2ps under AVX512VL.
	(fixuns_truncv4dfv4si2): New expander.
	(floatuns<si2dfmodelower><mode>2): New expander.

gcc/testsuite/ChangeLog:

	* g++.target/i386/pr85048.C: New test.
---
 gcc/config/i386/sse.md                  | 18 ++++++++++++--
 gcc/testsuite/g++.target/i386/pr85048.C | 33 +++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/pr85048.C
  

Comments

Uros Bizjak March 30, 2023, 6:17 a.m. UTC | #1
On Thu, Mar 30, 2023 at 3:47 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> There's some typo for the standard pattern name for unsigned_{float,fix},
> it should be floatunsmn2/fixuns_truncmn2, not ufloatmn2/ufix_truncmn2
> in current trunk, the patch fix the typo.
>
> Also vcvttps2udq is available under AVX512VL, so it can be generated
> directly instead of being emulated via vcvttps2dq.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}
> Ok for GCC14 stage1{or maybe for trunk)?
>
> gcc/ChangeLog:
>
>         PR target/85048
>         * config/i386/sse.md (floatuns<si2dfmodelower><mode>2):
>         Generate vcvtudq2ps under AVX512VL.
>         (fixuns_truncv4dfv4si2): New expander.
>         (floatuns<si2dfmodelower><mode>2): New expander.
>
> gcc/testsuite/ChangeLog:
>
>         * g++.target/i386/pr85048.C: New test.
> ---
>  gcc/config/i386/sse.md                  | 18 ++++++++++++--
>  gcc/testsuite/g++.target/i386/pr85048.C | 33 +++++++++++++++++++++++++
>  2 files changed, 49 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/g++.target/i386/pr85048.C
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 172ec3bea4f..9c2bd468c65 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -8014,8 +8014,9 @@ (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
>     (match_operand:VF1 1 "register_operand")]
>    "TARGET_SSE2"
>  {
> -  if (<MODE>mode == V16SFmode)
> -    emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
> +  /* AVX512 support vcvttps2udq for all 128/256/512-bit vectors.  */
> +  if (<MODE>mode == V16SFmode || TARGET_AVX512VL)
> +    emit_insn (gen_ufix_trunc<mode><sseintvecmodelower>2 (operands[0],
>                                           operands[1]));
>    else
>      {
> @@ -8413,6 +8414,12 @@ (define_insn "*float<floatunssuffix>v2div2sf2_mask_1"
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "V4SF")])
>
> +(define_expand "floatuns<si2dfmodelower><mode>2"
> +  [(set (match_operand:VF2_512_256VL 0 "register_operand")
> +       (unsigned_float:VF2_512_256VL
> +         (match_operand:<si2dfmode> 1 "nonimmediate_operand")))]
> +   "TARGET_AVX512F")
> +

Just rename the instruction and fix all its call sites. The name of
the insn pattern is internal to the compiler and can be renamed at
will.

>  (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
>    [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
>         (unsigned_float:VF2_512_256VL
> @@ -8694,6 +8701,13 @@ (define_insn "fix_truncv4dfv4si2<mask_name>"
>     (set_attr "prefix" "maybe_evex")
>     (set_attr "mode" "OI")])
>
> +
> +/* The standard pattern name is fixuns_truncmn2.  */
> +(define_expand "fixuns_truncv4dfv4si2"
> +  [(set (match_operand:V4SI 0 "register_operand")
> +       (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512VL && TARGET_AVX512F")
> +

Also the above.

Uros.

>  (define_insn "ufix_truncv4dfv4si2<mask_name>"
>    [(set (match_operand:V4SI 0 "register_operand" "=v")
>         (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
> diff --git a/gcc/testsuite/g++.target/i386/pr85048.C b/gcc/testsuite/g++.target/i386/pr85048.C
> new file mode 100644
> index 00000000000..52973c18ebd
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr85048.C
> @@ -0,0 +1,33 @@
> +/* PR target/85048 */
> +/* { dg-do compile }  */
> +/* { dg-options "-std=c++17 -O2 -mavx512vl -mavx512dq -mprefer-vector-width=512" } */
> +/* { dg-final { scan-assembler-times {(?n)vcvtudq2pd[ \t]+} 2 } } */
> +/* { dg-final { scan-assembler-times {(?n)vcvttps2udq[ \t]+} 2 } } */
> +/* { dg-final { scan-assembler-times {(?n)vcvttpd2udqy?[ \t]+} 1 } } */
> +
> +#include <cstdint>
> +
> +template <class T, int N, int Size = N * sizeof(T)>
> +using V [[gnu::vector_size(Size)]] = T;
> +
> +template <class From, class To> V<To, 4> cvt4(V<From, 4> x) {
> +    return V<To, 4>{To(x[0]), To(x[1]), To(x[2]), To(x[3])};
> +}
> +template <class From, class To> V<To, 8> cvt8(V<From, 8> x) {
> +    return V<To, 8>{
> +        To(x[0]), To(x[1]), To(x[2]), To(x[3]),
> +        To(x[4]), To(x[5]), To(x[6]), To(x[7])
> +    };
> +}
> +
> +#define _(name, from, to, size) \
> +auto name(V<from, size> x) { return cvt##size<from, to>(x); }
> +// integral -> double
> +_(vcvtudq2pd, uint32_t, double, 4)
> +_(vcvtudq2pd, uint32_t, double, 8)
> +
> +_( cvttps2udq, float, uint32_t,  4)
> +_(vcvttps2udq, float, uint32_t,  8)
> +
> +// double -> integral
> +_(vcvttpd2udq, double, uint32_t, 4)
> --
> 2.39.1.388.g2fc9e9ca3c
>
  
Uros Bizjak March 30, 2023, 6:24 a.m. UTC | #2
On Thu, Mar 30, 2023 at 8:17 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Thu, Mar 30, 2023 at 3:47 AM liuhongt <hongtao.liu@intel.com> wrote:
> >
> > There's some typo for the standard pattern name for unsigned_{float,fix},
> > it should be floatunsmn2/fixuns_truncmn2, not ufloatmn2/ufix_truncmn2
> > in current trunk, the patch fix the typo.
> >
> > Also vcvttps2udq is available under AVX512VL, so it can be generated
> > directly instead of being emulated via vcvttps2dq.
> >
> > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}
> > Ok for GCC14 stage1{or maybe for trunk)?
> >
> > gcc/ChangeLog:
> >
> >         PR target/85048
> >         * config/i386/sse.md (floatuns<si2dfmodelower><mode>2):
> >         Generate vcvtudq2ps under AVX512VL.
> >         (fixuns_truncv4dfv4si2): New expander.
> >         (floatuns<si2dfmodelower><mode>2): New expander.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * g++.target/i386/pr85048.C: New test.
> > ---
> >  gcc/config/i386/sse.md                  | 18 ++++++++++++--
> >  gcc/testsuite/g++.target/i386/pr85048.C | 33 +++++++++++++++++++++++++
> >  2 files changed, 49 insertions(+), 2 deletions(-)
> >  create mode 100644 gcc/testsuite/g++.target/i386/pr85048.C
> >
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > index 172ec3bea4f..9c2bd468c65 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -8014,8 +8014,9 @@ (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
> >     (match_operand:VF1 1 "register_operand")]
> >    "TARGET_SSE2"
> >  {
> > -  if (<MODE>mode == V16SFmode)
> > -    emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
> > +  /* AVX512 support vcvttps2udq for all 128/256/512-bit vectors.  */
> > +  if (<MODE>mode == V16SFmode || TARGET_AVX512VL)
> > +    emit_insn (gen_ufix_trunc<mode><sseintvecmodelower>2 (operands[0],
> >                                           operands[1]));
> >    else
> >      {
> > @@ -8413,6 +8414,12 @@ (define_insn "*float<floatunssuffix>v2div2sf2_mask_1"
> >     (set_attr "prefix" "evex")
> >     (set_attr "mode" "V4SF")])
> >
> > +(define_expand "floatuns<si2dfmodelower><mode>2"
> > +  [(set (match_operand:VF2_512_256VL 0 "register_operand")
> > +       (unsigned_float:VF2_512_256VL
> > +         (match_operand:<si2dfmode> 1 "nonimmediate_operand")))]
> > +   "TARGET_AVX512F")
> > +
>
> Just rename the instruction and fix all its call sites. The name of
> the insn pattern is internal to the compiler and can be renamed at
> will.

Ideally, we should standardize all the names to a standard name, so
e.g. ufix_  -> fixuns_ and ufloat -> floatuns.

Uros.

> >  (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
> >    [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
> >         (unsigned_float:VF2_512_256VL
> > @@ -8694,6 +8701,13 @@ (define_insn "fix_truncv4dfv4si2<mask_name>"
> >     (set_attr "prefix" "maybe_evex")
> >     (set_attr "mode" "OI")])
> >
> > +
> > +/* The standard pattern name is fixuns_truncmn2.  */
> > +(define_expand "fixuns_truncv4dfv4si2"
> > +  [(set (match_operand:V4SI 0 "register_operand")
> > +       (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand")))]
> > +  "TARGET_AVX512VL && TARGET_AVX512F")
> > +
>
> Also the above.
>
> Uros.
>
> >  (define_insn "ufix_truncv4dfv4si2<mask_name>"
> >    [(set (match_operand:V4SI 0 "register_operand" "=v")
> >         (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
> > diff --git a/gcc/testsuite/g++.target/i386/pr85048.C b/gcc/testsuite/g++.target/i386/pr85048.C
> > new file mode 100644
> > index 00000000000..52973c18ebd
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/i386/pr85048.C
> > @@ -0,0 +1,33 @@
> > +/* PR target/85048 */
> > +/* { dg-do compile }  */
> > +/* { dg-options "-std=c++17 -O2 -mavx512vl -mavx512dq -mprefer-vector-width=512" } */
> > +/* { dg-final { scan-assembler-times {(?n)vcvtudq2pd[ \t]+} 2 } } */
> > +/* { dg-final { scan-assembler-times {(?n)vcvttps2udq[ \t]+} 2 } } */
> > +/* { dg-final { scan-assembler-times {(?n)vcvttpd2udqy?[ \t]+} 1 } } */
> > +
> > +#include <cstdint>
> > +
> > +template <class T, int N, int Size = N * sizeof(T)>
> > +using V [[gnu::vector_size(Size)]] = T;
> > +
> > +template <class From, class To> V<To, 4> cvt4(V<From, 4> x) {
> > +    return V<To, 4>{To(x[0]), To(x[1]), To(x[2]), To(x[3])};
> > +}
> > +template <class From, class To> V<To, 8> cvt8(V<From, 8> x) {
> > +    return V<To, 8>{
> > +        To(x[0]), To(x[1]), To(x[2]), To(x[3]),
> > +        To(x[4]), To(x[5]), To(x[6]), To(x[7])
> > +    };
> > +}
> > +
> > +#define _(name, from, to, size) \
> > +auto name(V<from, size> x) { return cvt##size<from, to>(x); }
> > +// integral -> double
> > +_(vcvtudq2pd, uint32_t, double, 4)
> > +_(vcvtudq2pd, uint32_t, double, 8)
> > +
> > +_( cvttps2udq, float, uint32_t,  4)
> > +_(vcvttps2udq, float, uint32_t,  8)
> > +
> > +// double -> integral
> > +_(vcvttpd2udq, double, uint32_t, 4)
> > --
> > 2.39.1.388.g2fc9e9ca3c
> >
  

Patch

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 172ec3bea4f..9c2bd468c65 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -8014,8 +8014,9 @@  (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
    (match_operand:VF1 1 "register_operand")]
   "TARGET_SSE2"
 {
-  if (<MODE>mode == V16SFmode)
-    emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
+  /* AVX512 support vcvttps2udq for all 128/256/512-bit vectors.  */
+  if (<MODE>mode == V16SFmode || TARGET_AVX512VL)
+    emit_insn (gen_ufix_trunc<mode><sseintvecmodelower>2 (operands[0],
 					  operands[1]));
   else
     {
@@ -8413,6 +8414,12 @@  (define_insn "*float<floatunssuffix>v2div2sf2_mask_1"
    (set_attr "prefix" "evex")
    (set_attr "mode" "V4SF")])
 
+(define_expand "floatuns<si2dfmodelower><mode>2"
+  [(set (match_operand:VF2_512_256VL 0 "register_operand")
+	(unsigned_float:VF2_512_256VL
+	  (match_operand:<si2dfmode> 1 "nonimmediate_operand")))]
+   "TARGET_AVX512F")
+
 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
   [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
 	(unsigned_float:VF2_512_256VL
@@ -8694,6 +8701,13 @@  (define_insn "fix_truncv4dfv4si2<mask_name>"
    (set_attr "prefix" "maybe_evex")
    (set_attr "mode" "OI")])
 
+
+/* The standard pattern name is fixuns_truncmn2.  */
+(define_expand "fixuns_truncv4dfv4si2"
+  [(set (match_operand:V4SI 0 "register_operand")
+	(unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512VL && TARGET_AVX512F")
+
 (define_insn "ufix_truncv4dfv4si2<mask_name>"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
 	(unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
diff --git a/gcc/testsuite/g++.target/i386/pr85048.C b/gcc/testsuite/g++.target/i386/pr85048.C
new file mode 100644
index 00000000000..52973c18ebd
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr85048.C
@@ -0,0 +1,33 @@ 
+/* PR target/85048 */
+/* { dg-do compile }  */
+/* { dg-options "-std=c++17 -O2 -mavx512vl -mavx512dq -mprefer-vector-width=512" } */
+/* { dg-final { scan-assembler-times {(?n)vcvtudq2pd[ \t]+} 2 } } */
+/* { dg-final { scan-assembler-times {(?n)vcvttps2udq[ \t]+} 2 } } */
+/* { dg-final { scan-assembler-times {(?n)vcvttpd2udqy?[ \t]+} 1 } } */
+
+#include <cstdint>
+
+template <class T, int N, int Size = N * sizeof(T)>
+using V [[gnu::vector_size(Size)]] = T;
+
+template <class From, class To> V<To, 4> cvt4(V<From, 4> x) {
+    return V<To, 4>{To(x[0]), To(x[1]), To(x[2]), To(x[3])};
+}
+template <class From, class To> V<To, 8> cvt8(V<From, 8> x) {
+    return V<To, 8>{
+        To(x[0]), To(x[1]), To(x[2]), To(x[3]),
+        To(x[4]), To(x[5]), To(x[6]), To(x[7])
+    };
+}
+
+#define _(name, from, to, size) \
+auto name(V<from, size> x) { return cvt##size<from, to>(x); }
+// integral -> double
+_(vcvtudq2pd, uint32_t, double, 4)
+_(vcvtudq2pd, uint32_t, double, 8)
+
+_( cvttps2udq, float, uint32_t,  4)
+_(vcvttps2udq, float, uint32_t,  8)
+
+// double -> integral
+_(vcvttpd2udq, double, uint32_t, 4)