Canonicalize vec_perm index to make the first index come from the first vector.

Message ID 20221018232301.264776-1-hongtao.liu@intel.com
State New
Headers
Series Canonicalize vec_perm index to make the first index come from the first vector. |

Commit Message

Liu, Hongtao Oct. 18, 2022, 11:23 p.m. UTC
  Fix unexpected non-canon form from gimple vector selector.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?

gcc/ChangeLog:

	PR target/107271
	* config/i386/i386-expand.cc (ix86_vec_perm_index_canon): New.
	(expand_vec_perm_shufps_shufps): Call
	ix86_vec_perm_index_canon

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr107271.c: New test.
---
 gcc/config/i386/i386-expand.cc           | 17 +++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr107271.c | 16 ++++++++++++++++
 2 files changed, 33 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107271.c
  

Comments

H.J. Lu Oct. 18, 2022, 11:49 p.m. UTC | #1
On Tue, Oct 18, 2022 at 4:25 PM liuhongt <hongtao.liu@intel.com> wrote:
>
> Fix unexpected non-canon form from gimple vector selector.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
>
> gcc/ChangeLog:
>
>         PR target/107271
>         * config/i386/i386-expand.cc (ix86_vec_perm_index_canon): New.
>         (expand_vec_perm_shufps_shufps): Call
>         ix86_vec_perm_index_canon
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr107271.c: New test.
> ---
>  gcc/config/i386/i386-expand.cc           | 17 +++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr107271.c | 16 ++++++++++++++++
>  2 files changed, 33 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr107271.c
>
> diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> index 6baff6d0e61..4f121516091 100644
> --- a/gcc/config/i386/i386-expand.cc
> +++ b/gcc/config/i386/i386-expand.cc
> @@ -19604,6 +19604,22 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
>    return false;
>  }
>
> +/* Canonicalize vec_perm index to make the first index
> +   always comes from the first index.  */
                                                    vector?
> +static void
> +ix86_vec_perm_index_canon (struct expand_vec_perm_d *d)
> +{
> +  unsigned nelt = d->nelt;
> +  if (d->perm[0] < nelt)
> +    return;
> +
> +  for (unsigned i = 0; i != nelt; i++)
> +    d->perm[i] = (d->perm[i] + nelt) % (2 * nelt);
> +
> +  std::swap (d->op0, d->op1);
> +  return;
> +}
> +
>  /* A subroutine of ix86_expand_vec_perm_const_1. Try to implement D
>     in terms of a pair of shufps+ shufps/pshufd instructions.  */
>  static bool
> @@ -19621,6 +19637,7 @@ expand_vec_perm_shufps_shufps (struct expand_vec_perm_d *d)
>    if (d->testing_p)
>      return true;
>
> +  ix86_vec_perm_index_canon (d);
>    for (i = 0; i < 4; ++i)
>      count += d->perm[i] > 3 ? 1 : 0;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr107271.c b/gcc/testsuite/gcc.target/i386/pr107271.c
> new file mode 100644
> index 00000000000..fe89c9a5bef
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr107271.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O0" } */
> +
> +typedef int __attribute__((__vector_size__ (16))) V;
> +
> +static inline __attribute__((__always_inline__)) V
> +bar (V v128u32_0)
> +{
> +  return __builtin_shuffle ((V){}, v128u32_0, v128u32_0);
> +}
> +
> +V
> +foo (void)
> +{
> +  return bar ((V){7, 4, 4});
> +}
> --
> 2.27.0
>
  
Hongtao Liu Oct. 18, 2022, 11:51 p.m. UTC | #2
On Wed, Oct 19, 2022 at 7:49 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Tue, Oct 18, 2022 at 4:25 PM liuhongt <hongtao.liu@intel.com> wrote:
> >
> > Fix unexpected non-canon form from gimple vector selector.
> >
> > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> > Ok for trunk?
> >
> > gcc/ChangeLog:
> >
> >         PR target/107271
> >         * config/i386/i386-expand.cc (ix86_vec_perm_index_canon): New.
> >         (expand_vec_perm_shufps_shufps): Call
> >         ix86_vec_perm_index_canon
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.target/i386/pr107271.c: New test.
> > ---
> >  gcc/config/i386/i386-expand.cc           | 17 +++++++++++++++++
> >  gcc/testsuite/gcc.target/i386/pr107271.c | 16 ++++++++++++++++
> >  2 files changed, 33 insertions(+)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr107271.c
> >
> > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> > index 6baff6d0e61..4f121516091 100644
> > --- a/gcc/config/i386/i386-expand.cc
> > +++ b/gcc/config/i386/i386-expand.cc
> > @@ -19604,6 +19604,22 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
> >    return false;
> >  }
> >
> > +/* Canonicalize vec_perm index to make the first index
> > +   always comes from the first index.  */
>                                                     vector?
Yes.
> > +static void
> > +ix86_vec_perm_index_canon (struct expand_vec_perm_d *d)
> > +{
> > +  unsigned nelt = d->nelt;
> > +  if (d->perm[0] < nelt)
> > +    return;
> > +
> > +  for (unsigned i = 0; i != nelt; i++)
> > +    d->perm[i] = (d->perm[i] + nelt) % (2 * nelt);
> > +
> > +  std::swap (d->op0, d->op1);
> > +  return;
> > +}
> > +
> >  /* A subroutine of ix86_expand_vec_perm_const_1. Try to implement D
> >     in terms of a pair of shufps+ shufps/pshufd instructions.  */
> >  static bool
> > @@ -19621,6 +19637,7 @@ expand_vec_perm_shufps_shufps (struct expand_vec_perm_d *d)
> >    if (d->testing_p)
> >      return true;
> >
> > +  ix86_vec_perm_index_canon (d);
> >    for (i = 0; i < 4; ++i)
> >      count += d->perm[i] > 3 ? 1 : 0;
> >
> > diff --git a/gcc/testsuite/gcc.target/i386/pr107271.c b/gcc/testsuite/gcc.target/i386/pr107271.c
> > new file mode 100644
> > index 00000000000..fe89c9a5bef
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr107271.c
> > @@ -0,0 +1,16 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O0" } */
> > +
> > +typedef int __attribute__((__vector_size__ (16))) V;
> > +
> > +static inline __attribute__((__always_inline__)) V
> > +bar (V v128u32_0)
> > +{
> > +  return __builtin_shuffle ((V){}, v128u32_0, v128u32_0);
> > +}
> > +
> > +V
> > +foo (void)
> > +{
> > +  return bar ((V){7, 4, 4});
> > +}
> > --
> > 2.27.0
> >
>
>
> --
> H.J.
  
Uros Bizjak Oct. 19, 2022, 5:58 a.m. UTC | #3
On Wed, Oct 19, 2022 at 1:25 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> Fix unexpected non-canon form from gimple vector selector.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
>
> gcc/ChangeLog:
>
>         PR target/107271
>         * config/i386/i386-expand.cc (ix86_vec_perm_index_canon): New.
>         (expand_vec_perm_shufps_shufps): Call
>         ix86_vec_perm_index_canon
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr107271.c: New test.

OK.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386-expand.cc           | 17 +++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr107271.c | 16 ++++++++++++++++
>  2 files changed, 33 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr107271.c
>
> diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
> index 6baff6d0e61..4f121516091 100644
> --- a/gcc/config/i386/i386-expand.cc
> +++ b/gcc/config/i386/i386-expand.cc
> @@ -19604,6 +19604,22 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
>    return false;
>  }
>
> +/* Canonicalize vec_perm index to make the first index
> +   always comes from the first index.  */
> +static void
> +ix86_vec_perm_index_canon (struct expand_vec_perm_d *d)
> +{
> +  unsigned nelt = d->nelt;
> +  if (d->perm[0] < nelt)
> +    return;
> +
> +  for (unsigned i = 0; i != nelt; i++)
> +    d->perm[i] = (d->perm[i] + nelt) % (2 * nelt);
> +
> +  std::swap (d->op0, d->op1);
> +  return;
> +}
> +
>  /* A subroutine of ix86_expand_vec_perm_const_1. Try to implement D
>     in terms of a pair of shufps+ shufps/pshufd instructions.  */
>  static bool
> @@ -19621,6 +19637,7 @@ expand_vec_perm_shufps_shufps (struct expand_vec_perm_d *d)
>    if (d->testing_p)
>      return true;
>
> +  ix86_vec_perm_index_canon (d);
>    for (i = 0; i < 4; ++i)
>      count += d->perm[i] > 3 ? 1 : 0;
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr107271.c b/gcc/testsuite/gcc.target/i386/pr107271.c
> new file mode 100644
> index 00000000000..fe89c9a5bef
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr107271.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O0" } */
> +
> +typedef int __attribute__((__vector_size__ (16))) V;
> +
> +static inline __attribute__((__always_inline__)) V
> +bar (V v128u32_0)
> +{
> +  return __builtin_shuffle ((V){}, v128u32_0, v128u32_0);
> +}
> +
> +V
> +foo (void)
> +{
> +  return bar ((V){7, 4, 4});
> +}
> --
> 2.27.0
>
  

Patch

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 6baff6d0e61..4f121516091 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -19604,6 +19604,22 @@  expand_vec_perm_1 (struct expand_vec_perm_d *d)
   return false;
 }
 
+/* Canonicalize vec_perm index to make the first index
+   always comes from the first index.  */
+static void
+ix86_vec_perm_index_canon (struct expand_vec_perm_d *d)
+{
+  unsigned nelt = d->nelt;
+  if (d->perm[0] < nelt)
+    return;
+
+  for (unsigned i = 0; i != nelt; i++)
+    d->perm[i] = (d->perm[i] + nelt) % (2 * nelt);
+
+  std::swap (d->op0, d->op1);
+  return;
+}
+
 /* A subroutine of ix86_expand_vec_perm_const_1. Try to implement D
    in terms of a pair of shufps+ shufps/pshufd instructions.  */
 static bool
@@ -19621,6 +19637,7 @@  expand_vec_perm_shufps_shufps (struct expand_vec_perm_d *d)
   if (d->testing_p)
     return true;
 
+  ix86_vec_perm_index_canon (d);
   for (i = 0; i < 4; ++i)
     count += d->perm[i] > 3 ? 1 : 0;
 
diff --git a/gcc/testsuite/gcc.target/i386/pr107271.c b/gcc/testsuite/gcc.target/i386/pr107271.c
new file mode 100644
index 00000000000..fe89c9a5bef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107271.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+
+typedef int __attribute__((__vector_size__ (16))) V;
+
+static inline __attribute__((__always_inline__)) V
+bar (V v128u32_0)
+{
+  return __builtin_shuffle ((V){}, v128u32_0, v128u32_0);
+}
+
+V
+foo (void)
+{
+  return bar ((V){7, 4, 4});
+}