[v2] x86: Also check mode of memory broadcast in bcst_mem_operand

Message ID 20220123122816.345498-1-hjl.tools@gmail.com
State Committed
Commit 4d2321314a656dd3e30117e2a5266cbacb1e60eb
Headers
Series [v2] x86: Also check mode of memory broadcast in bcst_mem_operand |

Commit Message

H.J. Lu Jan. 23, 2022, 12:28 p.m. UTC
  Return false for invalid mode on memory broadcast in bcst_mem_operand:

(vec_duplicate:V16SF (mem/j:V4SF (reg/v/f:DI 109 [ b ])))

gcc/

	PR target/104188
	* config/i386/predicates.md (bcst_mem_operand): Also check mode
	of memory broadcast.

gcc/testsuite/

	PR target/104188
	* gcc.target/i386/pr104188.c: New test.
---
 gcc/config/i386/predicates.md            |  2 +
 gcc/testsuite/gcc.target/i386/pr104188.c | 70 ++++++++++++++++++++++++
 2 files changed, 72 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr104188.c
  

Comments

Hongtao Liu Jan. 24, 2022, 12:35 a.m. UTC | #1
On Sun, Jan 23, 2022 at 8:28 PM H.J. Lu via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Return false for invalid mode on memory broadcast in bcst_mem_operand:
>
> (vec_duplicate:V16SF (mem/j:V4SF (reg/v/f:DI 109 [ b ])))
>
Yes, thanks.
> gcc/
>
>         PR target/104188
>         * config/i386/predicates.md (bcst_mem_operand): Also check mode
>         of memory broadcast.
>
> gcc/testsuite/
>
>         PR target/104188
>         * gcc.target/i386/pr104188.c: New test.
> ---
>  gcc/config/i386/predicates.md            |  2 +
>  gcc/testsuite/gcc.target/i386/pr104188.c | 70 ++++++++++++++++++++++++
>  2 files changed, 72 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr104188.c
>
> diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
> index eae6ab58e23..a8cc17a054d 100644
> --- a/gcc/config/i386/predicates.md
> +++ b/gcc/config/i386/predicates.md
> @@ -1157,6 +1157,8 @@ (define_predicate "bcst_mem_operand"
>             (ior (match_test "TARGET_AVX512VL")
>                  (match_test "GET_MODE_SIZE (GET_MODE (op)) == 64")))
>         (match_test "VALID_BCST_MODE_P (GET_MODE_INNER (GET_MODE (op)))")
> +       (match_test "GET_MODE (XEXP (op, 0))
> +                   == GET_MODE_INNER (GET_MODE (op))")
>         (match_test "memory_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)))")))
>
>  ; Return true when OP is bcst_mem_operand or vector_memory_operand.
> diff --git a/gcc/testsuite/gcc.target/i386/pr104188.c b/gcc/testsuite/gcc.target/i386/pr104188.c
> new file mode 100644
> index 00000000000..c6f615b9625
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr104188.c
> @@ -0,0 +1,70 @@
> +/* { dg-do run { target avx512f } } */
> +/* { dg-options "-O2 -mfpmath=sse" } */
> +
> +#include <x86intrin.h>
> +
> +union U {
> +  float m[4][4];
> +  __m128 r[4];
> +  __m512 s;
> +};
> +
> +__attribute__((noipa, target("avx512f")))
> +void
> +foo (union U *x, union U *a, union U *b)
> +{
> +  __m512 c = _mm512_loadu_ps (&a->s);
> +  __m512 d = _mm512_broadcast_f32x4 (b->r[0]);
> +  __m512 e = _mm512_broadcast_f32x4 (b->r[1]);
> +  __m512 f = _mm512_broadcast_f32x4 (b->r[2]);
> +  __m512 g = _mm512_broadcast_f32x4 (b->r[3]);
> +  __m512 h = _mm512_mul_ps (_mm512_permute_ps (c, 0x00), d);
> +  h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0x55), e, h);
> +  h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0xaa), f, h);
> +  h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0xff), g, h);
> +  _mm512_storeu_ps (&x->s, h);
> +}
> +
> +__attribute__((noipa, target("avx512f")))
> +void
> +do_test (void)
> +{
> +  union U a = { .m = { { 1.0f, 2.0f, 3.0f, 4.0f },
> +                      { 5.0f, 6.0f, 7.0f, 8.0f },
> +                      { 9.0f, 10.0f, 11.0f, 12.0f },
> +                      { 13.0f, 14.0f, 15.0f, 16.0f } } };
> +  union U b = { .m = { { 17.0f, 18.0f, 19.0f, 20.0f },
> +                      { 21.0f, 22.0f, 23.0f, 24.0f },
> +                      { 25.0f, 26.0f, 27.0f, 28.0f },
> +                      { 29.0f, 30.0f, 31.0f, 32.0f } } };
> +  union U c;
> +  foo (&c, &a, &b);
> +  if (c.m[0][0] != 250.0f
> +      || c.m[0][1] != 260.0f
> +      || c.m[0][2] != 270.0f
> +      || c.m[0][3] != 280.0f)
> +    __builtin_abort ();
> +  if (c.m[1][0] != 618.0f
> +      || c.m[1][1] != 644.0f
> +      || c.m[1][2] != 670.0f
> +      || c.m[1][3] != 696.0f)
> +    __builtin_abort ();
> +  if (c.m[2][0] != 986.0f
> +      || c.m[2][1] != 1028.0f
> +      || c.m[2][2] != 1070.0f
> +      || c.m[2][3] != 1112.0f)
> +    __builtin_abort ();
> +  if (c.m[3][0] != 1354.0f
> +      || c.m[3][1] != 1412.0f
> +      || c.m[3][2] != 1470.0f
> +      || c.m[3][3] != 1528.0f)
> +    __builtin_abort ();
> +}
> +
> +int
> +main ()
> +{
> +  if (__builtin_cpu_supports ("avx512f"))
> +    do_test ();
> +  return 0;
> +}
> --
> 2.34.1
>
  
H.J. Lu Jan. 24, 2022, 12:39 a.m. UTC | #2
On Sun, Jan 23, 2022 at 4:35 PM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Sun, Jan 23, 2022 at 8:28 PM H.J. Lu via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > Return false for invalid mode on memory broadcast in bcst_mem_operand:
> >
> > (vec_duplicate:V16SF (mem/j:V4SF (reg/v/f:DI 109 [ b ])))
> >
> Yes, thanks.

I will also backport it to GCC 11 branch.

Thanks.

> > gcc/
> >
> >         PR target/104188
> >         * config/i386/predicates.md (bcst_mem_operand): Also check mode
> >         of memory broadcast.
> >
> > gcc/testsuite/
> >
> >         PR target/104188
> >         * gcc.target/i386/pr104188.c: New test.
> > ---
> >  gcc/config/i386/predicates.md            |  2 +
> >  gcc/testsuite/gcc.target/i386/pr104188.c | 70 ++++++++++++++++++++++++
> >  2 files changed, 72 insertions(+)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr104188.c
> >
> > diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
> > index eae6ab58e23..a8cc17a054d 100644
> > --- a/gcc/config/i386/predicates.md
> > +++ b/gcc/config/i386/predicates.md
> > @@ -1157,6 +1157,8 @@ (define_predicate "bcst_mem_operand"
> >             (ior (match_test "TARGET_AVX512VL")
> >                  (match_test "GET_MODE_SIZE (GET_MODE (op)) == 64")))
> >         (match_test "VALID_BCST_MODE_P (GET_MODE_INNER (GET_MODE (op)))")
> > +       (match_test "GET_MODE (XEXP (op, 0))
> > +                   == GET_MODE_INNER (GET_MODE (op))")
> >         (match_test "memory_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)))")))
> >
> >  ; Return true when OP is bcst_mem_operand or vector_memory_operand.
> > diff --git a/gcc/testsuite/gcc.target/i386/pr104188.c b/gcc/testsuite/gcc.target/i386/pr104188.c
> > new file mode 100644
> > index 00000000000..c6f615b9625
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr104188.c
> > @@ -0,0 +1,70 @@
> > +/* { dg-do run { target avx512f } } */
> > +/* { dg-options "-O2 -mfpmath=sse" } */
> > +
> > +#include <x86intrin.h>
> > +
> > +union U {
> > +  float m[4][4];
> > +  __m128 r[4];
> > +  __m512 s;
> > +};
> > +
> > +__attribute__((noipa, target("avx512f")))
> > +void
> > +foo (union U *x, union U *a, union U *b)
> > +{
> > +  __m512 c = _mm512_loadu_ps (&a->s);
> > +  __m512 d = _mm512_broadcast_f32x4 (b->r[0]);
> > +  __m512 e = _mm512_broadcast_f32x4 (b->r[1]);
> > +  __m512 f = _mm512_broadcast_f32x4 (b->r[2]);
> > +  __m512 g = _mm512_broadcast_f32x4 (b->r[3]);
> > +  __m512 h = _mm512_mul_ps (_mm512_permute_ps (c, 0x00), d);
> > +  h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0x55), e, h);
> > +  h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0xaa), f, h);
> > +  h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0xff), g, h);
> > +  _mm512_storeu_ps (&x->s, h);
> > +}
> > +
> > +__attribute__((noipa, target("avx512f")))
> > +void
> > +do_test (void)
> > +{
> > +  union U a = { .m = { { 1.0f, 2.0f, 3.0f, 4.0f },
> > +                      { 5.0f, 6.0f, 7.0f, 8.0f },
> > +                      { 9.0f, 10.0f, 11.0f, 12.0f },
> > +                      { 13.0f, 14.0f, 15.0f, 16.0f } } };
> > +  union U b = { .m = { { 17.0f, 18.0f, 19.0f, 20.0f },
> > +                      { 21.0f, 22.0f, 23.0f, 24.0f },
> > +                      { 25.0f, 26.0f, 27.0f, 28.0f },
> > +                      { 29.0f, 30.0f, 31.0f, 32.0f } } };
> > +  union U c;
> > +  foo (&c, &a, &b);
> > +  if (c.m[0][0] != 250.0f
> > +      || c.m[0][1] != 260.0f
> > +      || c.m[0][2] != 270.0f
> > +      || c.m[0][3] != 280.0f)
> > +    __builtin_abort ();
> > +  if (c.m[1][0] != 618.0f
> > +      || c.m[1][1] != 644.0f
> > +      || c.m[1][2] != 670.0f
> > +      || c.m[1][3] != 696.0f)
> > +    __builtin_abort ();
> > +  if (c.m[2][0] != 986.0f
> > +      || c.m[2][1] != 1028.0f
> > +      || c.m[2][2] != 1070.0f
> > +      || c.m[2][3] != 1112.0f)
> > +    __builtin_abort ();
> > +  if (c.m[3][0] != 1354.0f
> > +      || c.m[3][1] != 1412.0f
> > +      || c.m[3][2] != 1470.0f
> > +      || c.m[3][3] != 1528.0f)
> > +    __builtin_abort ();
> > +}
> > +
> > +int
> > +main ()
> > +{
> > +  if (__builtin_cpu_supports ("avx512f"))
> > +    do_test ();
> > +  return 0;
> > +}
> > --
> > 2.34.1
> >
>
>
> --
> BR,
> Hongtao
  
Jakub Jelinek Jan. 26, 2022, 11:09 a.m. UTC | #3
On Sun, Jan 23, 2022 at 04:39:34PM -0800, H.J. Lu via Gcc-patches wrote:
> On Sun, Jan 23, 2022 at 4:35 PM Hongtao Liu <crazylht@gmail.com> wrote:
> >
> > On Sun, Jan 23, 2022 at 8:28 PM H.J. Lu via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > >
> > > Return false for invalid mode on memory broadcast in bcst_mem_operand:
> > >
> > > (vec_duplicate:V16SF (mem/j:V4SF (reg/v/f:DI 109 [ b ])))
> > >
> > Yes, thanks.
> 
> I will also backport it to GCC 11 branch.

On i686-linux this new testcase FAILs with:
cc1: warning: SSE instruction set disabled, using 387 arithmetics
FAIL: gcc.target/i386/pr104188.c (test for excess errors)
Excess errors:
cc1: warning: SSE instruction set disabled, using 387 arithmetics
This is because it uses -mfpmath=sse, but -msse2 isn't on.  Fixed
by adding -msse2 to dg-options and requiring sse2_runtime effective
target.

Tested on x86_64-linux and i686-linux, committed as obvious to trunk/11:

2022-01-26  Jakub Jelinek  <jakub@redhat.com>

	PR target/104188
	* gcc.target/i386/pr104188.c: Add dg-require-effective-target
	sse2_runtime.  Add -msse2 to dg-options.

--- gcc/testsuite/gcc.target/i386/pr104188.c.jj	2022-01-24 10:18:21.174512441 +0100
+++ gcc/testsuite/gcc.target/i386/pr104188.c	2022-01-26 11:54:58.025950692 +0100
@@ -1,5 +1,6 @@
 /* { dg-do run { target avx512f } } */
-/* { dg-options "-O2 -mfpmath=sse" } */
+/* { dg-require-effective-target sse2_runtime } */
+/* { dg-options "-O2 -msse2 -mfpmath=sse" } */
 
 #include <x86intrin.h>
 

	Jakub
  

Patch

diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index eae6ab58e23..a8cc17a054d 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1157,6 +1157,8 @@  (define_predicate "bcst_mem_operand"
 	    (ior (match_test "TARGET_AVX512VL")
 		 (match_test "GET_MODE_SIZE (GET_MODE (op)) == 64")))
        (match_test "VALID_BCST_MODE_P (GET_MODE_INNER (GET_MODE (op)))")
+       (match_test "GET_MODE (XEXP (op, 0))
+		    == GET_MODE_INNER (GET_MODE (op))")
        (match_test "memory_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)))")))
 
 ; Return true when OP is bcst_mem_operand or vector_memory_operand.
diff --git a/gcc/testsuite/gcc.target/i386/pr104188.c b/gcc/testsuite/gcc.target/i386/pr104188.c
new file mode 100644
index 00000000000..c6f615b9625
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr104188.c
@@ -0,0 +1,70 @@ 
+/* { dg-do run { target avx512f } } */
+/* { dg-options "-O2 -mfpmath=sse" } */
+
+#include <x86intrin.h>
+
+union U {
+  float m[4][4];
+  __m128 r[4];
+  __m512 s;
+};
+
+__attribute__((noipa, target("avx512f")))
+void
+foo (union U *x, union U *a, union U *b)
+{
+  __m512 c = _mm512_loadu_ps (&a->s);
+  __m512 d = _mm512_broadcast_f32x4 (b->r[0]);
+  __m512 e = _mm512_broadcast_f32x4 (b->r[1]);
+  __m512 f = _mm512_broadcast_f32x4 (b->r[2]);
+  __m512 g = _mm512_broadcast_f32x4 (b->r[3]);
+  __m512 h = _mm512_mul_ps (_mm512_permute_ps (c, 0x00), d);
+  h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0x55), e, h);
+  h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0xaa), f, h);
+  h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0xff), g, h);
+  _mm512_storeu_ps (&x->s, h);
+}
+
+__attribute__((noipa, target("avx512f")))
+void
+do_test (void)
+{
+  union U a = { .m = { { 1.0f, 2.0f, 3.0f, 4.0f },
+		       { 5.0f, 6.0f, 7.0f, 8.0f },
+		       { 9.0f, 10.0f, 11.0f, 12.0f },
+		       { 13.0f, 14.0f, 15.0f, 16.0f } } };
+  union U b = { .m = { { 17.0f, 18.0f, 19.0f, 20.0f },
+		       { 21.0f, 22.0f, 23.0f, 24.0f },
+		       { 25.0f, 26.0f, 27.0f, 28.0f },
+		       { 29.0f, 30.0f, 31.0f, 32.0f } } };
+  union U c;
+  foo (&c, &a, &b);
+  if (c.m[0][0] != 250.0f
+      || c.m[0][1] != 260.0f
+      || c.m[0][2] != 270.0f
+      || c.m[0][3] != 280.0f)
+    __builtin_abort ();
+  if (c.m[1][0] != 618.0f
+      || c.m[1][1] != 644.0f
+      || c.m[1][2] != 670.0f
+      || c.m[1][3] != 696.0f)
+    __builtin_abort ();
+  if (c.m[2][0] != 986.0f
+      || c.m[2][1] != 1028.0f
+      || c.m[2][2] != 1070.0f
+      || c.m[2][3] != 1112.0f)
+    __builtin_abort ();
+  if (c.m[3][0] != 1354.0f
+      || c.m[3][1] != 1412.0f
+      || c.m[3][2] != 1470.0f
+      || c.m[3][3] != 1528.0f)
+    __builtin_abort ();
+}
+
+int
+main ()
+{
+  if (__builtin_cpu_supports ("avx512f"))
+    do_test ();
+  return 0;
+}