i386: Fix up cvtsd2ss splitter [PR104502]

Message ID 20220212090230.GY2646553@tucnak
State New
Headers
Series i386: Fix up cvtsd2ss splitter [PR104502] |

Commit Message

Jakub Jelinek Feb. 12, 2022, 9:02 a.m. UTC
  Hi!

The following testcase ICEs, because AVX512F is enabled, AVX512VL is not,
and the cvtsd2ss insn has %xmm0-15 as output operand and %xmm16-31 as
input operand.  For output operand %xmm16+ the splitter just gives up
in such case, but for such input it just emits vmovddup which requires
AVX512VL if either operand is EXT_REX_SSE_REG_P (when it is 128-bit).

The following patch fixes it by treating that case like the pre-SSE3
output != input case - move the input to output and do everything on
the output reg which is known to be < %xmm16.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2022-02-12  Jakub Jelinek  <jakub@redhat.com>

	PR target/104502
	* config/i386/i386.md (cvtsd2ss splitter): If operands[1] is xmm16+
	and AVX512VL isn't available, move operands[1] to operands[0] first.

	* gcc.target/i386/pr104502.c: New test.


	Jakub
  

Comments

Uros Bizjak Feb. 12, 2022, 9:29 a.m. UTC | #1
On Sat, Feb 12, 2022 at 10:02 AM Jakub Jelinek <jakub@redhat.com> wrote:
>
> Hi!
>
> The following testcase ICEs, because AVX512F is enabled, AVX512VL is not,
> and the cvtsd2ss insn has %xmm0-15 as output operand and %xmm16-31 as
> input operand.  For output operand %xmm16+ the splitter just gives up
> in such case, but for such input it just emits vmovddup which requires
> AVX512VL if either operand is EXT_REX_SSE_REG_P (when it is 128-bit).
>
> The following patch fixes it by treating that case like the pre-SSE3
> output != input case - move the input to output and do everything on
> the output reg which is known to be < %xmm16.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2022-02-12  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/104502
>         * config/i386/i386.md (cvtsd2ss splitter): If operands[1] is xmm16+
>         and AVX512VL isn't available, move operands[1] to operands[0] first.
>
>         * gcc.target/i386/pr104502.c: New test.

OK.

Thanks,
Uros.

>
> --- gcc/config/i386/i386.md.jj  2022-01-18 11:58:59.142988343 +0100
> +++ gcc/config/i386/i386.md     2022-02-11 16:19:48.399518508 +0100
> @@ -4838,8 +4838,8 @@ (define_split
>       movddup is available.  */
>    if (REG_P (operands[1]))
>      {
> -      if (!TARGET_SSE3
> -         && REGNO (operands[0]) != REGNO (operands[1]))
> +      if ((!TARGET_SSE3 && REGNO (operands[0]) != REGNO (operands[1]))
> +         || (EXT_REX_SSE_REG_P (operands[1]) && !TARGET_AVX512VL))
>         {
>           rtx tmp = lowpart_subreg (DFmode, operands[0], SFmode);
>           emit_move_insn (tmp, operands[1]);
> --- gcc/testsuite/gcc.target/i386/pr104502.c.jj 2022-02-11 16:28:12.880556460 +0100
> +++ gcc/testsuite/gcc.target/i386/pr104502.c    2022-02-11 16:29:42.358321630 +0100
> @@ -0,0 +1,31 @@
> +/* PR target/104502 */
> +/* { dg-do compile { target fstack_protector } } */
> +/* { dg-options "-O -flive-range-shrinkage -march=barcelona -fstack-protector-all -mavx512f" } */
> +
> +typedef char __attribute__((__vector_size__ (8))) U;
> +typedef int __attribute__((__vector_size__ (8))) A;
> +typedef int __attribute__((__vector_size__ (16))) B;
> +typedef int __attribute__((__vector_size__ (32))) C;
> +typedef int __attribute__((__vector_size__ (64))) D;
> +typedef __float128 __attribute__((__vector_size__ (32))) F;
> +
> +char s;
> +U u;
> +A a;
> +int i;
> +C c;
> +double d;
> +
> +U
> +foo (U u0, A a0, B b0, B b1, C c0, C c1, C c2, C c3, A a1, A a2, F f0)
> +{
> +  C ca = c |= (short) (float) d;
> +  C cb = c0 + c1 + c2 + c3 + ca + (C) f0;
> +  U ua = s << (u & 4);
> +  B ba = ((union {C a; B b;}) cb).b + b0 + b1;
> +  U ub = ((union {B a; U b;}) ba).b +
> +    u0 + u + ua + (U) a + (U) a + (U) a0 + (U) a1 + (U) a2;
> +  long long u64_r = i + d;
> +  char u8_r = u64_r;
> +  return ub + u8_r;
> +}
>
>         Jakub
>
  

Patch

--- gcc/config/i386/i386.md.jj	2022-01-18 11:58:59.142988343 +0100
+++ gcc/config/i386/i386.md	2022-02-11 16:19:48.399518508 +0100
@@ -4838,8 +4838,8 @@  (define_split
      movddup is available.  */
   if (REG_P (operands[1]))
     {
-      if (!TARGET_SSE3
-	  && REGNO (operands[0]) != REGNO (operands[1]))
+      if ((!TARGET_SSE3 && REGNO (operands[0]) != REGNO (operands[1]))
+	  || (EXT_REX_SSE_REG_P (operands[1]) && !TARGET_AVX512VL))
 	{
 	  rtx tmp = lowpart_subreg (DFmode, operands[0], SFmode);
 	  emit_move_insn (tmp, operands[1]);
--- gcc/testsuite/gcc.target/i386/pr104502.c.jj	2022-02-11 16:28:12.880556460 +0100
+++ gcc/testsuite/gcc.target/i386/pr104502.c	2022-02-11 16:29:42.358321630 +0100
@@ -0,0 +1,31 @@ 
+/* PR target/104502 */
+/* { dg-do compile { target fstack_protector } } */
+/* { dg-options "-O -flive-range-shrinkage -march=barcelona -fstack-protector-all -mavx512f" } */
+
+typedef char __attribute__((__vector_size__ (8))) U;
+typedef int __attribute__((__vector_size__ (8))) A;
+typedef int __attribute__((__vector_size__ (16))) B;
+typedef int __attribute__((__vector_size__ (32))) C;
+typedef int __attribute__((__vector_size__ (64))) D;
+typedef __float128 __attribute__((__vector_size__ (32))) F;
+
+char s;
+U u;
+A a;
+int i;
+C c;
+double d;
+
+U
+foo (U u0, A a0, B b0, B b1, C c0, C c1, C c2, C c3, A a1, A a2, F f0)
+{
+  C ca = c |= (short) (float) d;
+  C cb = c0 + c1 + c2 + c3 + ca + (C) f0;
+  U ua = s << (u & 4);
+  B ba = ((union {C a; B b;}) cb).b + b0 + b1;
+  U ub = ((union {B a; U b;}) ba).b +
+    u0 + u + ua + (U) a + (U) a + (U) a0 + (U) a1 + (U) a2;
+  long long u64_r = i + d;
+  char u8_r = u64_r;
+  return ub + u8_r;
+}