@@ -13610,12 +13610,18 @@ ix86_print_operand (FILE *file, rtx x, int code)
case E_V8SFmode:
case E_V8DFmode:
case E_V8DImode:
+ case E_V8HFmode:
fputs ("{1to8}", file);
break;
case E_V16SFmode:
case E_V16SImode:
+ case E_V16HFmode:
fputs ("{1to16}", file);
break;
+ case E_V32HFmode:
+ fputs ("{1to32}", file);
+ break;
+
default:
gcc_unreachable ();
}
@@ -1101,7 +1101,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
#define VALID_BCST_MODE_P(MODE) \
((MODE) == SFmode || (MODE) == DFmode \
- || (MODE) == SImode || (MODE) == DImode)
+ || (MODE) == SImode || (MODE) == DImode \
+ || (MODE) == HFmode)
/* It is possible to write patterns to move flags; but until someone
does it, */
@@ -821,14 +821,6 @@ (define_mode_iterator V8_128 [V8HI V8HF])
(define_mode_iterator V16_256 [V16HI V16HF])
(define_mode_iterator V32_512 [V32HI V32HF])
-(define_mode_attr avx512bcst
- [(V4SI "%{1to4%}") (V2DI "%{1to2%}")
- (V8SI "%{1to8%}") (V4DI "%{1to4%}")
- (V16SI "%{1to16%}") (V8DI "%{1to8%}")
- (V4SF "%{1to4%}") (V2DF "%{1to2%}")
- (V8SF "%{1to8%}") (V4DF "%{1to4%}")
- (V16SF "%{1to16%}") (V8DF "%{1to8%}")])
-
;; Mapping from float mode to required SSE level
(define_mode_attr sse
[(SF "sse") (DF "sse2") (HF "avx512fp16")
new file mode 100644
@@ -0,0 +1,33 @@
+/* PR target/87767 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */
+/* { dg-additional-options "-mdynamic-no-pic" { target { *-*-darwin* && ia32 } } }
+/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to8\\\}" 4 } } */
+/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to16\\\}" 4 } } */
+/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to32\\\}" 4 } } */
+
+typedef _Float16 v8hf __attribute__ ((vector_size (16)));
+typedef _Float16 v16hf __attribute__ ((vector_size (32)));
+typedef _Float16 v32hf __attribute__ ((vector_size (64)));
+
+#define CONSTANT 101;
+#define FOO(VTYPE, OP_NAME, OP) \
+VTYPE \
+ __attribute__ ((noipa)) \
+foo_##OP_NAME##_##VTYPE (VTYPE a) \
+{ \
+ return a OP CONSTANT; \
+} \
+
+FOO (v8hf, add, +);
+FOO (v16hf, add, +);
+FOO (v32hf, add, +);
+FOO (v8hf, sub, -);
+FOO (v16hf, sub, -);
+FOO (v32hf, sub, -);
+FOO (v8hf, mul, *);
+FOO (v16hf, mul, *);
+FOO (v32hf, mul, *);
+FOO (v8hf, div, /);
+FOO (v16hf, div, /);
+FOO (v32hf, div, /);
new file mode 100644
@@ -0,0 +1,53 @@
+/* PR target/87767 */
+/* { dg-do run } */
+/* { dg-options "-O1 -mavx512fp16 -mavx512dq -mavx512vl" } */
+/* { dg-require-effective-target avx512dq } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512fp16 } */
+
+#define AVX512DQ
+#define AVX512VL
+#define AVX512FP16
+#include "avx512f-helper.h"
+
+#include "avx512fp16-broadcast-1.c"
+
+#define RTEST(VTYPE, TYPE, N, OP_NAME, OP) \
+ do \
+ { \
+ TYPE exp[N], src[N]; \
+ VTYPE res; \
+ for (int i = 0; i < N; i++) \
+ src[i] = 2.0 * i - 8.4; \
+ res = foo_##OP_NAME##_##VTYPE (*(VTYPE*)&src[0]); \
+ for (int i = 0; i < N; i ++) \
+ exp[i] = src[i] OP CONSTANT; \
+ for (int j = 0; j < N; j++) \
+ { \
+ if (res[j] != exp[j]) \
+ abort(); \
+ } \
+ } \
+ while (0)
+
+void
+test_256 (void)
+{
+ RTEST (v8hf, _Float16, 8, add, +);
+ RTEST (v16hf, _Float16, 16, add, +);
+ RTEST (v32hf, _Float16, 32, add, +);
+ RTEST (v8hf, _Float16, 8, sub, -);
+ RTEST (v16hf, _Float16, 16, sub, -);
+ RTEST (v32hf, _Float16, 32, sub, -);
+ RTEST (v8hf, _Float16, 8, mul, *);
+ RTEST (v16hf, _Float16, 16, mul, *);
+ RTEST (v32hf, _Float16, 32, mul, *);
+ RTEST (v8hf, _Float16, 8, div, /);
+ RTEST (v16hf, _Float16, 16, div, /);
+ RTEST (v32hf, _Float16, 32, div, /);
+}
+
+void
+test_128 (void)
+{
+}