aarch64: [PR101529] Fix vector shuffle insertion expansion
Commit Message
From: Andrew Pinski <apinski@marvell.com>
The function aarch64_evpc_ins would reuse the target even though
it might be the same register as the two inputs.
Instead of checking to see if we can reuse the target, creating
a new register always is better.
OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.
PR target/101529
gcc/ChangeLog:
* config/aarch64/aarch64.c (aarch64_evpc_ins): Don't use target
as an input instead create a new reg.
gcc/testsuite/ChangeLog:
* c-c++-common/torture/builtin-convertvector-2.c: New test.
* c-c++-common/torture/builtin-shufflevector-2.c: New test.
---
gcc/config/aarch64/aarch64.c | 8 ++++--
.../torture/builtin-convertvector-2.c | 26 +++++++++++++++++++
.../torture/builtin-shufflevector-2.c | 26 +++++++++++++++++++
3 files changed, 58 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
Comments
apinski--- via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> From: Andrew Pinski <apinski@marvell.com>
>
> The function aarch64_evpc_ins would reuse the target even though
> it might be the same register as the two inputs.
> Instead of checking to see if we can reuse the target, creating
> a new register always is better.
>
> OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.
>
> PR target/101529
>
> gcc/ChangeLog:
>
> * config/aarch64/aarch64.c (aarch64_evpc_ins): Don't use target
> as an input instead create a new reg.
>
> gcc/testsuite/ChangeLog:
>
> * c-c++-common/torture/builtin-convertvector-2.c: New test.
> * c-c++-common/torture/builtin-shufflevector-2.c: New test.
> ---
> gcc/config/aarch64/aarch64.c | 8 ++++--
> .../torture/builtin-convertvector-2.c | 26 +++++++++++++++++++
> .../torture/builtin-shufflevector-2.c | 26 +++++++++++++++++++
> 3 files changed, 58 insertions(+), 2 deletions(-)
> create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
> create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
>
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 2c00583e12c..e4fc546fae7 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -23084,11 +23084,15 @@ aarch64_evpc_ins (struct expand_vec_perm_d *d)
> }
> gcc_assert (extractindex < nelt);
>
> - emit_move_insn (d->target, insv);
> + /* Use a new reg instead of target as one of the
> + operands might be target. */
> + rtx original = gen_reg_rtx (GET_MODE (d->target));
> +
> + emit_move_insn (original, insv);
Why can't we just remove the move and pass insv directly to
create_input_operand? That'll encourage the RA to allocate insv and
d->target to the same register where possible (but will reload where
tying is not possible).
OK with that change if it works.
Thanks,
Richard
> insn_code icode = code_for_aarch64_simd_vec_copy_lane (mode);
> expand_operand ops[5];
> create_output_operand (&ops[0], d->target, mode);
> - create_input_operand (&ops[1], d->target, mode);
> + create_input_operand (&ops[1], original, mode);
> create_integer_operand (&ops[2], 1 << idx);
> create_input_operand (&ops[3], extractv, mode);
> create_integer_operand (&ops[4], extractindex);
> diff --git a/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
> new file mode 100644
> index 00000000000..d88f6a72b5c
> --- /dev/null
> +++ b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
> @@ -0,0 +1,26 @@
> +/* { dg-do run } */
> +/* PR target/101529 */
> +
> +typedef unsigned char __attribute__((__vector_size__ (1))) W;
> +typedef unsigned char __attribute__((__vector_size__ (8))) V;
> +typedef unsigned short __attribute__((__vector_size__ (16))) U;
> +
> +unsigned short us;
> +
> +/* aarch64 used to miscompile foo to just return 0. */
> +W
> +foo (unsigned char uc)
> +{
> + V v = __builtin_convertvector ((U){ } >= us, V);
> + return __builtin_shufflevector ((W){ }, v, 4) & uc;
> +}
> +
> +int
> +main (void)
> +{
> + W x = foo (5);
> + if (x[0] != 5)
> + __builtin_abort();
> + return 0;
> +}
> +
> diff --git a/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
> new file mode 100644
> index 00000000000..7c4999ed4e9
> --- /dev/null
> +++ b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
> @@ -0,0 +1,26 @@
> +/* { dg-do run} */
> +/* PR target/101529 */
> +typedef unsigned char C;
> +typedef unsigned char __attribute__((__vector_size__ (8))) V;
> +typedef unsigned char __attribute__((__vector_size__ (32))) U;
> +
> +C c;
> +
> +/* aarch64 used to miscompile foo to just return a vector of 0s */
> +V
> +foo (V v)
> +{
> + v |= __builtin_shufflevector (c * v, (U) (0 == (U){ }),
> + 0, 1, 8, 32, 8, 20, 36, 36);
> + return v;
> +}
> +
> +int
> +main (void)
> +{
> + V v = foo ((V) { });
> + for (unsigned i = 0; i < sizeof (v); i++)
> + if (v[i] != (i >= 2 ? 0xff : 0))
> + __builtin_abort ();
> + return 0;
> +}
On Tue, Nov 9, 2021 at 6:51 AM Richard Sandiford via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> apinski--- via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> > From: Andrew Pinski <apinski@marvell.com>
> >
> > The function aarch64_evpc_ins would reuse the target even though
> > it might be the same register as the two inputs.
> > Instead of checking to see if we can reuse the target, creating
> > a new register always is better.
> >
> > OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.
> >
> > PR target/101529
> >
> > gcc/ChangeLog:
> >
> > * config/aarch64/aarch64.c (aarch64_evpc_ins): Don't use target
> > as an input instead create a new reg.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * c-c++-common/torture/builtin-convertvector-2.c: New test.
> > * c-c++-common/torture/builtin-shufflevector-2.c: New test.
> > ---
> > gcc/config/aarch64/aarch64.c | 8 ++++--
> > .../torture/builtin-convertvector-2.c | 26 +++++++++++++++++++
> > .../torture/builtin-shufflevector-2.c | 26 +++++++++++++++++++
> > 3 files changed, 58 insertions(+), 2 deletions(-)
> > create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
> > create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
> >
> > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> > index 2c00583e12c..e4fc546fae7 100644
> > --- a/gcc/config/aarch64/aarch64.c
> > +++ b/gcc/config/aarch64/aarch64.c
> > @@ -23084,11 +23084,15 @@ aarch64_evpc_ins (struct expand_vec_perm_d *d)
> > }
> > gcc_assert (extractindex < nelt);
> >
> > - emit_move_insn (d->target, insv);
> > + /* Use a new reg instead of target as one of the
> > + operands might be target. */
> > + rtx original = gen_reg_rtx (GET_MODE (d->target));
> > +
> > + emit_move_insn (original, insv);
>
> Why can't we just remove the move and pass insv directly to
> create_input_operand? That'll encourage the RA to allocate insv and
> d->target to the same register where possible (but will reload where
> tying is not possible).
>
> OK with that change if it works.
Yes that worked, committed as r12-5078-g52fa771758 .
Thanks,
Andrew
>
> Thanks,
> Richard
>
> > insn_code icode = code_for_aarch64_simd_vec_copy_lane (mode);
> > expand_operand ops[5];
> > create_output_operand (&ops[0], d->target, mode);
> > - create_input_operand (&ops[1], d->target, mode);
> > + create_input_operand (&ops[1], original, mode);
> > create_integer_operand (&ops[2], 1 << idx);
> > create_input_operand (&ops[3], extractv, mode);
> > create_integer_operand (&ops[4], extractindex);
> > diff --git a/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
> > new file mode 100644
> > index 00000000000..d88f6a72b5c
> > --- /dev/null
> > +++ b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
> > @@ -0,0 +1,26 @@
> > +/* { dg-do run } */
> > +/* PR target/101529 */
> > +
> > +typedef unsigned char __attribute__((__vector_size__ (1))) W;
> > +typedef unsigned char __attribute__((__vector_size__ (8))) V;
> > +typedef unsigned short __attribute__((__vector_size__ (16))) U;
> > +
> > +unsigned short us;
> > +
> > +/* aarch64 used to miscompile foo to just return 0. */
> > +W
> > +foo (unsigned char uc)
> > +{
> > + V v = __builtin_convertvector ((U){ } >= us, V);
> > + return __builtin_shufflevector ((W){ }, v, 4) & uc;
> > +}
> > +
> > +int
> > +main (void)
> > +{
> > + W x = foo (5);
> > + if (x[0] != 5)
> > + __builtin_abort();
> > + return 0;
> > +}
> > +
> > diff --git a/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
> > new file mode 100644
> > index 00000000000..7c4999ed4e9
> > --- /dev/null
> > +++ b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
> > @@ -0,0 +1,26 @@
> > +/* { dg-do run} */
> > +/* PR target/101529 */
> > +typedef unsigned char C;
> > +typedef unsigned char __attribute__((__vector_size__ (8))) V;
> > +typedef unsigned char __attribute__((__vector_size__ (32))) U;
> > +
> > +C c;
> > +
> > +/* aarch64 used to miscompile foo to just return a vector of 0s */
> > +V
> > +foo (V v)
> > +{
> > + v |= __builtin_shufflevector (c * v, (U) (0 == (U){ }),
> > + 0, 1, 8, 32, 8, 20, 36, 36);
> > + return v;
> > +}
> > +
> > +int
> > +main (void)
> > +{
> > + V v = foo ((V) { });
> > + for (unsigned i = 0; i < sizeof (v); i++)
> > + if (v[i] != (i >= 2 ? 0xff : 0))
> > + __builtin_abort ();
> > + return 0;
> > +}
@@ -23084,11 +23084,15 @@ aarch64_evpc_ins (struct expand_vec_perm_d *d)
}
gcc_assert (extractindex < nelt);
- emit_move_insn (d->target, insv);
+ /* Use a new reg instead of target as one of the
+ operands might be target. */
+ rtx original = gen_reg_rtx (GET_MODE (d->target));
+
+ emit_move_insn (original, insv);
insn_code icode = code_for_aarch64_simd_vec_copy_lane (mode);
expand_operand ops[5];
create_output_operand (&ops[0], d->target, mode);
- create_input_operand (&ops[1], d->target, mode);
+ create_input_operand (&ops[1], original, mode);
create_integer_operand (&ops[2], 1 << idx);
create_input_operand (&ops[3], extractv, mode);
create_integer_operand (&ops[4], extractindex);
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* PR target/101529 */
+
+typedef unsigned char __attribute__((__vector_size__ (1))) W;
+typedef unsigned char __attribute__((__vector_size__ (8))) V;
+typedef unsigned short __attribute__((__vector_size__ (16))) U;
+
+unsigned short us;
+
+/* aarch64 used to miscompile foo to just return 0. */
+W
+foo (unsigned char uc)
+{
+ V v = __builtin_convertvector ((U){ } >= us, V);
+ return __builtin_shufflevector ((W){ }, v, 4) & uc;
+}
+
+int
+main (void)
+{
+ W x = foo (5);
+ if (x[0] != 5)
+ __builtin_abort();
+ return 0;
+}
+
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do run} */
+/* PR target/101529 */
+typedef unsigned char C;
+typedef unsigned char __attribute__((__vector_size__ (8))) V;
+typedef unsigned char __attribute__((__vector_size__ (32))) U;
+
+C c;
+
+/* aarch64 used to miscompile foo to just return a vector of 0s */
+V
+foo (V v)
+{
+ v |= __builtin_shufflevector (c * v, (U) (0 == (U){ }),
+ 0, 1, 8, 32, 8, 20, 36, 36);
+ return v;
+}
+
+int
+main (void)
+{
+ V v = foo ((V) { });
+ for (unsigned i = 0; i < sizeof (v); i++)
+ if (v[i] != (i >= 2 ? 0xff : 0))
+ __builtin_abort ();
+ return 0;
+}