x86: Also check VALID_BCST_MODE_P on memory broadcast
Commit Message
Return false for invalid broadcast mode in bcst_mem_operand on memory
broadcast:
(vec_duplicate:V16SF (mem/j:V4SF (reg/v/f:DI 109 [ b ])))
gcc/
PR target/104188
* config/i386/predicates.md (bcst_mem_operand): Also check
VALID_BCST_MODE_P on memory broadcast.
gcc/testsuite/
PR target/104188
* gcc.target/i386/pr104188.c: New test.
---
gcc/config/i386/predicates.md | 3 +-
gcc/testsuite/gcc.target/i386/pr104188.c | 70 ++++++++++++++++++++++++
2 files changed, 72 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr104188.c
Comments
On Sat, Jan 22, 2022 at 2:24 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Return false for invalid broadcast mode in bcst_mem_operand on memory
> broadcast:
>
> (vec_duplicate:V16SF (mem/j:V4SF (reg/v/f:DI 109 [ b ])))
>
> gcc/
>
> PR target/104188
> * config/i386/predicates.md (bcst_mem_operand): Also check
> VALID_BCST_MODE_P on memory broadcast.
>
> gcc/testsuite/
>
> PR target/104188
> * gcc.target/i386/pr104188.c: New test.
> ---
> gcc/config/i386/predicates.md | 3 +-
> gcc/testsuite/gcc.target/i386/pr104188.c | 70 ++++++++++++++++++++++++
> 2 files changed, 72 insertions(+), 1 deletion(-)
> create mode 100644 gcc/testsuite/gcc.target/i386/pr104188.c
>
> diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
> index eae6ab58e23..fd716f006f3 100644
> --- a/gcc/config/i386/predicates.md
> +++ b/gcc/config/i386/predicates.md
> @@ -1157,7 +1157,8 @@ (define_predicate "bcst_mem_operand"
> (ior (match_test "TARGET_AVX512VL")
> (match_test "GET_MODE_SIZE (GET_MODE (op)) == 64")))
> (match_test "VALID_BCST_MODE_P (GET_MODE_INNER (GET_MODE (op)))")
> - (match_test "memory_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)))")))
> + (match_test "memory_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)))")
> + (match_test "VALID_BCST_MODE_P (GET_MODE (XEXP (op, 0)))")))
Is this one better?
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index eae6ab58e23..a8cc17a054d 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1157,6 +1157,8 @@ (define_predicate "bcst_mem_operand"
(ior (match_test "TARGET_AVX512VL")
(match_test "GET_MODE_SIZE (GET_MODE (op)) == 64")))
(match_test "VALID_BCST_MODE_P (GET_MODE_INNER (GET_MODE (op)))")
+ (match_test "GET_MODE (XEXP (op, 0))
+ == GET_MODE_INNER (GET_MODE (op))")
(match_test "memory_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)))")))
; Return true when OP is bcst_mem_operand or vector_memory_operand.
> ; Return true when OP is bcst_mem_operand or vector_memory_operand.
> (define_predicate "bcst_vector_operand"
> diff --git a/gcc/testsuite/gcc.target/i386/pr104188.c b/gcc/testsuite/gcc.target/i386/pr104188.c
> new file mode 100644
> index 00000000000..c6f615b9625
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr104188.c
> @@ -0,0 +1,70 @@
> +/* { dg-do run { target avx512f } } */
> +/* { dg-options "-O2 -mfpmath=sse" } */
> +
> +#include <x86intrin.h>
> +
> +union U {
> + float m[4][4];
> + __m128 r[4];
> + __m512 s;
> +};
> +
> +__attribute__((noipa, target("avx512f")))
> +void
> +foo (union U *x, union U *a, union U *b)
> +{
> + __m512 c = _mm512_loadu_ps (&a->s);
> + __m512 d = _mm512_broadcast_f32x4 (b->r[0]);
> + __m512 e = _mm512_broadcast_f32x4 (b->r[1]);
> + __m512 f = _mm512_broadcast_f32x4 (b->r[2]);
> + __m512 g = _mm512_broadcast_f32x4 (b->r[3]);
> + __m512 h = _mm512_mul_ps (_mm512_permute_ps (c, 0x00), d);
> + h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0x55), e, h);
> + h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0xaa), f, h);
> + h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0xff), g, h);
> + _mm512_storeu_ps (&x->s, h);
> +}
> +
> +__attribute__((noipa, target("avx512f")))
> +void
> +do_test (void)
> +{
> + union U a = { .m = { { 1.0f, 2.0f, 3.0f, 4.0f },
> + { 5.0f, 6.0f, 7.0f, 8.0f },
> + { 9.0f, 10.0f, 11.0f, 12.0f },
> + { 13.0f, 14.0f, 15.0f, 16.0f } } };
> + union U b = { .m = { { 17.0f, 18.0f, 19.0f, 20.0f },
> + { 21.0f, 22.0f, 23.0f, 24.0f },
> + { 25.0f, 26.0f, 27.0f, 28.0f },
> + { 29.0f, 30.0f, 31.0f, 32.0f } } };
> + union U c;
> + foo (&c, &a, &b);
> + if (c.m[0][0] != 250.0f
> + || c.m[0][1] != 260.0f
> + || c.m[0][2] != 270.0f
> + || c.m[0][3] != 280.0f)
> + __builtin_abort ();
> + if (c.m[1][0] != 618.0f
> + || c.m[1][1] != 644.0f
> + || c.m[1][2] != 670.0f
> + || c.m[1][3] != 696.0f)
> + __builtin_abort ();
> + if (c.m[2][0] != 986.0f
> + || c.m[2][1] != 1028.0f
> + || c.m[2][2] != 1070.0f
> + || c.m[2][3] != 1112.0f)
> + __builtin_abort ();
> + if (c.m[3][0] != 1354.0f
> + || c.m[3][1] != 1412.0f
> + || c.m[3][2] != 1470.0f
> + || c.m[3][3] != 1528.0f)
> + __builtin_abort ();
> +}
> +
> +int
> +main ()
> +{
> + if (__builtin_cpu_supports ("avx512f"))
> + do_test ();
> + return 0;
> +}
> --
> 2.34.1
>
@@ -1157,7 +1157,8 @@ (define_predicate "bcst_mem_operand"
(ior (match_test "TARGET_AVX512VL")
(match_test "GET_MODE_SIZE (GET_MODE (op)) == 64")))
(match_test "VALID_BCST_MODE_P (GET_MODE_INNER (GET_MODE (op)))")
- (match_test "memory_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)))")))
+ (match_test "memory_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)))")
+ (match_test "VALID_BCST_MODE_P (GET_MODE (XEXP (op, 0)))")))
; Return true when OP is bcst_mem_operand or vector_memory_operand.
(define_predicate "bcst_vector_operand"
new file mode 100644
@@ -0,0 +1,70 @@
+/* { dg-do run { target avx512f } } */
+/* { dg-options "-O2 -mfpmath=sse" } */
+
+#include <x86intrin.h>
+
+union U {
+ float m[4][4];
+ __m128 r[4];
+ __m512 s;
+};
+
+__attribute__((noipa, target("avx512f")))
+void
+foo (union U *x, union U *a, union U *b)
+{
+ __m512 c = _mm512_loadu_ps (&a->s);
+ __m512 d = _mm512_broadcast_f32x4 (b->r[0]);
+ __m512 e = _mm512_broadcast_f32x4 (b->r[1]);
+ __m512 f = _mm512_broadcast_f32x4 (b->r[2]);
+ __m512 g = _mm512_broadcast_f32x4 (b->r[3]);
+ __m512 h = _mm512_mul_ps (_mm512_permute_ps (c, 0x00), d);
+ h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0x55), e, h);
+ h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0xaa), f, h);
+ h = _mm512_fmadd_ps (_mm512_permute_ps (c, 0xff), g, h);
+ _mm512_storeu_ps (&x->s, h);
+}
+
+__attribute__((noipa, target("avx512f")))
+void
+do_test (void)
+{
+ union U a = { .m = { { 1.0f, 2.0f, 3.0f, 4.0f },
+ { 5.0f, 6.0f, 7.0f, 8.0f },
+ { 9.0f, 10.0f, 11.0f, 12.0f },
+ { 13.0f, 14.0f, 15.0f, 16.0f } } };
+ union U b = { .m = { { 17.0f, 18.0f, 19.0f, 20.0f },
+ { 21.0f, 22.0f, 23.0f, 24.0f },
+ { 25.0f, 26.0f, 27.0f, 28.0f },
+ { 29.0f, 30.0f, 31.0f, 32.0f } } };
+ union U c;
+ foo (&c, &a, &b);
+ if (c.m[0][0] != 250.0f
+ || c.m[0][1] != 260.0f
+ || c.m[0][2] != 270.0f
+ || c.m[0][3] != 280.0f)
+ __builtin_abort ();
+ if (c.m[1][0] != 618.0f
+ || c.m[1][1] != 644.0f
+ || c.m[1][2] != 670.0f
+ || c.m[1][3] != 696.0f)
+ __builtin_abort ();
+ if (c.m[2][0] != 986.0f
+ || c.m[2][1] != 1028.0f
+ || c.m[2][2] != 1070.0f
+ || c.m[2][3] != 1112.0f)
+ __builtin_abort ();
+ if (c.m[3][0] != 1354.0f
+ || c.m[3][1] != 1412.0f
+ || c.m[3][2] != 1470.0f
+ || c.m[3][3] != 1528.0f)
+ __builtin_abort ();
+}
+
+int
+main ()
+{
+ if (__builtin_cpu_supports ("avx512f"))
+ do_test ();
+ return 0;
+}