[AVX512FP16] Support embedded broadcast for AVX512FP16 instructions.

Message ID 20210916081204.3571665-1-hongtao.liu@intel.com
State New
Headers
Series [AVX512FP16] Support embedded broadcast for AVX512FP16 instructions. |

Commit Message

liuhongt Sept. 16, 2021, 8:12 a.m. UTC
  Bootstrapped and regtest on x86_64-pc-linux-gnu{-m32,}.
  Runtime tests passed under sde{-m32,}.

gcc/ChangeLog:

	PR target/87767
	* config/i386/i386.c (ix86_print_operand): Handle
	V8HF/V16HF/V32HFmode.
	* config/i386/i386.h (VALID_BCST_MODE_P): Add HFmode.
	* config/i386/sse.md (avx512bcst): Remove.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx512fp16-broadcast-1.c: New test.
	* gcc.target/i386/avx512fp16-broadcast-2.c: New test.
---
 gcc/config/i386/i386.c                        |  6 +++
 gcc/config/i386/i386.h                        |  3 +-
 gcc/config/i386/sse.md                        |  8 ---
 .../gcc.target/i386/avx512fp16-broadcast-1.c  | 33 ++++++++++++
 .../gcc.target/i386/avx512fp16-broadcast-2.c  | 53 +++++++++++++++++++
 5 files changed, 94 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-broadcast-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-broadcast-2.c
  

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index d7abff0f396..4dec27845fe 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -13610,12 +13610,18 @@  ix86_print_operand (FILE *file, rtx x, int code)
 	case E_V8SFmode:
 	case E_V8DFmode:
 	case E_V8DImode:
+	case E_V8HFmode:
 	  fputs ("{1to8}", file);
 	  break;
 	case E_V16SFmode:
 	case E_V16SImode:
+	case E_V16HFmode:
 	  fputs ("{1to16}", file);
 	  break;
+	case E_V32HFmode:
+	  fputs ("{1to32}", file);
+	  break;
+
 	default:
 	  gcc_unreachable ();
 	}
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index e76bb55c080..285aef9ce5e 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1101,7 +1101,8 @@  extern const char *host_detect_local_cpu (int argc, const char **argv);
 
 #define VALID_BCST_MODE_P(MODE)			\
   ((MODE) == SFmode || (MODE) == DFmode		\
-   || (MODE) == SImode || (MODE) == DImode)
+   || (MODE) == SImode || (MODE) == DImode	\
+   || (MODE) == HFmode)
 
 /* It is possible to write patterns to move flags; but until someone
    does it,  */
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a1d419292d1..ba3e5009852 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -821,14 +821,6 @@  (define_mode_iterator V8_128 [V8HI V8HF])
 (define_mode_iterator V16_256 [V16HI V16HF])
 (define_mode_iterator V32_512 [V32HI V32HF])
 
-(define_mode_attr avx512bcst
-  [(V4SI "%{1to4%}") (V2DI "%{1to2%}")
-   (V8SI "%{1to8%}") (V4DI "%{1to4%}")
-   (V16SI "%{1to16%}") (V8DI "%{1to8%}")
-   (V4SF "%{1to4%}") (V2DF "%{1to2%}")
-   (V8SF "%{1to8%}") (V4DF "%{1to4%}")
-   (V16SF "%{1to16%}") (V8DF "%{1to8%}")])
-
 ;; Mapping from float mode to required SSE level
 (define_mode_attr sse
   [(SF "sse") (DF "sse2") (HF "avx512fp16")
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-broadcast-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-broadcast-1.c
new file mode 100644
index 00000000000..1da73493f3b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-broadcast-1.c
@@ -0,0 +1,33 @@ 
+/* PR target/87767 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -mavx512dq" } */
+/* { dg-additional-options "-mdynamic-no-pic" { target { *-*-darwin* && ia32 } } }
+/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to8\\\}" 4 } }  */
+/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to16\\\}" 4 } }  */
+/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to32\\\}" 4 } }  */
+
+typedef _Float16 v8hf  __attribute__ ((vector_size (16)));
+typedef _Float16 v16hf  __attribute__ ((vector_size (32)));
+typedef _Float16 v32hf  __attribute__ ((vector_size (64)));
+
+#define CONSTANT 101;
+#define FOO(VTYPE, OP_NAME, OP)			\
+VTYPE						\
+ __attribute__ ((noipa))			\
+foo_##OP_NAME##_##VTYPE (VTYPE a)		\
+{						\
+  return a OP CONSTANT;				\
+}						\
+
+FOO (v8hf, add, +);
+FOO (v16hf, add, +);
+FOO (v32hf, add, +);
+FOO (v8hf, sub, -);
+FOO (v16hf, sub, -);
+FOO (v32hf, sub, -);
+FOO (v8hf, mul, *);
+FOO (v16hf, mul, *);
+FOO (v32hf, mul, *);
+FOO (v8hf, div, /);
+FOO (v16hf, div, /);
+FOO (v32hf, div, /);
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-broadcast-2.c b/gcc/testsuite/gcc.target/i386/avx512fp16-broadcast-2.c
new file mode 100644
index 00000000000..839bb562d3c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-broadcast-2.c
@@ -0,0 +1,53 @@ 
+/* PR target/87767 */
+/* { dg-do run } */
+/* { dg-options "-O1 -mavx512fp16 -mavx512dq -mavx512vl" } */
+/* { dg-require-effective-target avx512dq } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512fp16 } */
+
+#define AVX512DQ
+#define AVX512VL
+#define AVX512FP16
+#include "avx512f-helper.h"
+
+#include "avx512fp16-broadcast-1.c"
+
+#define RTEST(VTYPE, TYPE, N, OP_NAME, OP)		\
+  do							\
+    {							\
+      TYPE exp[N], src[N];				\
+      VTYPE res;					\
+      for (int i = 0; i < N; i++)			\
+	src[i] = 2.0 * i - 8.4;				\
+      res = foo_##OP_NAME##_##VTYPE (*(VTYPE*)&src[0]);	\
+      for (int i = 0; i < N; i ++)			\
+	exp[i] = src[i] OP CONSTANT;			\
+      for (int j = 0; j < N; j++)			\
+	{						\
+	  if (res[j] != exp[j])				\
+	    abort();					\
+	}						\
+    }							\
+  while (0)
+
+void
+test_256 (void)
+{
+  RTEST (v8hf, _Float16, 8, add, +);
+  RTEST (v16hf, _Float16, 16, add, +);
+  RTEST (v32hf, _Float16, 32, add, +);
+  RTEST (v8hf, _Float16, 8, sub, -);
+  RTEST (v16hf, _Float16, 16, sub, -);
+  RTEST (v32hf, _Float16, 32, sub, -);
+  RTEST (v8hf, _Float16, 8, mul, *);
+  RTEST (v16hf, _Float16, 16, mul, *);
+  RTEST (v32hf, _Float16, 32, mul, *);
+  RTEST (v8hf, _Float16, 8, div, /);
+  RTEST (v16hf, _Float16, 16, div, /);
+  RTEST (v32hf, _Float16, 32, div, /);
+}
+
+void
+test_128 (void)
+{
+}