AVX512FP16: Add *_set1_pch intrinsics.

Message ID 20211015083807.21741-1-dianhong.xu@intel.com
State Committed
Commit 38f6ee6bfc4633175ca6f6d29e597d379ccae820
Headers
Series AVX512FP16: Add *_set1_pch intrinsics. |

Commit Message

Li, Pan2 via Gcc-patches Oct. 15, 2021, 8:38 a.m. UTC
  From: dianhong xu <dianhong.xu@intel.com>

Add *_set1_pch (_Float16 _Complex A) intrinsics.

gcc/ChangeLog:

	* config/i386/avx512fp16intrin.h:
	(_mm512_set1_pch): New intrinsic.
	* config/i386/avx512fp16vlintrin.h:
	(_mm256_set1_pch): New intrinsic.
	(_mm_set1_pch): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx512fp16-set1-pch-1a.c: New test.
	* gcc.target/i386/avx512fp16-set1-pch-1b.c: New test.
	* gcc.target/i386/avx512fp16vl-set1-pch-1a.c: New test.
	* gcc.target/i386/avx512fp16vl-set1-pch-1b.c: New test.
---
 gcc/config/i386/avx512fp16intrin.h            | 13 +++++
 gcc/config/i386/avx512fp16vlintrin.h          | 26 +++++++++
 .../gcc.target/i386/avx512fp16-set1-pch-1a.c  | 13 +++++
 .../gcc.target/i386/avx512fp16-set1-pch-1b.c  | 42 ++++++++++++++
 .../i386/avx512fp16vl-set1-pch-1a.c           | 20 +++++++
 .../i386/avx512fp16vl-set1-pch-1b.c           | 57 +++++++++++++++++++
 6 files changed, 171 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1b.c
  

Comments

Hongtao Liu Oct. 18, 2021, 2:07 a.m. UTC | #1
On Fri, Oct 15, 2021 at 4:38 PM dianhong.xu--- via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> From: dianhong xu <dianhong.xu@intel.com>
>
> Add *_set1_pch (_Float16 _Complex A) intrinsics.
>
> gcc/ChangeLog:
>
>         * config/i386/avx512fp16intrin.h:
>         (_mm512_set1_pch): New intrinsic.
>         * config/i386/avx512fp16vlintrin.h:
>         (_mm256_set1_pch): New intrinsic.
>         (_mm_set1_pch): Ditto.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/avx512fp16-set1-pch-1a.c: New test.
>         * gcc.target/i386/avx512fp16-set1-pch-1b.c: New test.
>         * gcc.target/i386/avx512fp16vl-set1-pch-1a.c: New test.
>         * gcc.target/i386/avx512fp16vl-set1-pch-1b.c: New test.
LGTM.
> ---
>  gcc/config/i386/avx512fp16intrin.h            | 13 +++++
>  gcc/config/i386/avx512fp16vlintrin.h          | 26 +++++++++
>  .../gcc.target/i386/avx512fp16-set1-pch-1a.c  | 13 +++++
>  .../gcc.target/i386/avx512fp16-set1-pch-1b.c  | 42 ++++++++++++++
>  .../i386/avx512fp16vl-set1-pch-1a.c           | 20 +++++++
>  .../i386/avx512fp16vl-set1-pch-1b.c           | 57 +++++++++++++++++++
>  6 files changed, 171 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1b.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1b.c
>
> diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
> index 079ce321c01..17025d68b8e 100644
> --- a/gcc/config/i386/avx512fp16intrin.h
> +++ b/gcc/config/i386/avx512fp16intrin.h
> @@ -7237,6 +7237,19 @@ _mm512_permutexvar_ph (__m512i __A, __m512h __B)
>                                                      (__mmask32)-1);
>  }
>
> +extern __inline __m512h
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm512_set1_pch (_Float16 _Complex __A)
> +{
> +  union
> +  {
> +    _Float16 _Complex a;
> +    float b;
> +  } u = { .a = __A};
> +
> +  return (__m512h) _mm512_set1_ps (u.b);
> +}
> +
>  #ifdef __DISABLE_AVX512FP16__
>  #undef __DISABLE_AVX512FP16__
>  #pragma GCC pop_options
> diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h
> index f83a429ba43..1de4513d7f1 100644
> --- a/gcc/config/i386/avx512fp16vlintrin.h
> +++ b/gcc/config/i386/avx512fp16vlintrin.h
> @@ -3315,6 +3315,32 @@ _mm_permutexvar_ph (__m128i __A, __m128h __B)
>                                                      (__mmask8)-1);
>  }
>
> +extern __inline __m256h
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_set1_pch (_Float16 _Complex __A)
> +{
> +  union
> +  {
> +    _Float16 _Complex a;
> +    float b;
> +  } u = { .a = __A };
> +
> +  return (__m256h) _mm256_set1_ps (u.b);
> +}
> +
> +extern __inline __m128h
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_set1_pch (_Float16 _Complex __A)
> +{
> +  union
> +  {
> +    _Float16 _Complex a;
> +    float b;
> +  } u = { .a = __A };
> +
> +  return (__m128h) _mm_set1_ps (u.b);
> +}
> +
>  #ifdef __DISABLE_AVX512FP16VL__
>  #undef __DISABLE_AVX512FP16VL__
>  #pragma GCC pop_options
> diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1a.c
> new file mode 100644
> index 00000000000..0055193f243
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1a.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile} */
> +/* { dg-options "-O2 -mavx512fp16" } */
> +
> +#include <immintrin.h>
> +
> +__m512h
> +__attribute__ ((noinline, noclone))
> +test_mm512_set1_pch (_Float16 _Complex A)
> +{
> +  return _mm512_set1_pch(A);
> +}
> +
> +/* { dg-final { scan-assembler "vbroadcastss\[ \\t\]+\[^\n\r\]*%zmm\[01\]" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1b.c
> new file mode 100644
> index 00000000000..450d7e37237
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1b.c
> @@ -0,0 +1,42 @@
> +/* { dg-do run { target avx512fp16 } } */
> +/* { dg-options "-O2 -mavx512fp16" } */
> +
> +#include<stdio.h>
> +#include <math.h>
> +#include <complex.h>
> +
> +static void do_test (void);
> +
> +#define DO_TEST do_test
> +#define AVX512FP16
> +
> +#include <immintrin.h>
> +#include "avx512-check.h"
> +
> +static void
> +do_test (void)
> +{
> + _Float16 _Complex fc = 1.0 + 1.0*I;
> +  union
> +  {
> +    _Float16 _Complex a;
> +    float b;
> +  } u = { .a = fc };
> +  float ff= u.b;
> +
> +  typedef union
> +  {
> +    float fp[16];
> +    __m512h m512h;
> +  } u1;
> +
> +  __m512h test512 = _mm512_set1_pch(fc);
> +
> +  u1 test;
> +  test.m512h = test512;
> +  for (int i = 0; i<16; i++)
> +  {
> +    if (test.fp[i] != ff) abort();
> +  }
> +
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1a.c
> new file mode 100644
> index 00000000000..4c5624f9935
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1a.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile} */
> +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
> +
> +#include <immintrin.h>
> +
> +__m256h
> +__attribute__ ((noinline, noclone))
> +test_mm256_set1_pch (_Float16 _Complex A)
> +{
> +  return _mm256_set1_pch(A);
> +}
> +
> +__m128h
> +__attribute__ ((noinline, noclone))
> +test_mm_set1_pch (_Float16 _Complex A)
> +{
> +  return _mm_set1_pch(A);
> +}
> +
> +/* { dg-final { scan-assembler-times "vbroadcastss" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1b.c
> new file mode 100644
> index 00000000000..aebff141821
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1b.c
> @@ -0,0 +1,57 @@
> +/* { dg-do run { target avx512fp16 } } */
> +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
> +
> +#include<stdio.h>
> +#include <math.h>
> +#include <complex.h>
> +
> +static void do_test (void);
> +
> +#define DO_TEST do_test
> +#define AVX512FP16
> +
> +#include <immintrin.h>
> +#include "avx512-check.h"
> +
> +static void
> +do_test (void)
> +{
> +  _Float16 _Complex fc = 1.0 + 1.0*I;
> +  union
> +  {
> +    _Float16 _Complex a;
> +    float b;
> +  } u = { .a = fc };
> +  float ff= u.b;
> +
> +  typedef union
> +  {
> +    float fp[8];
> +    __m256h m256h;
> +  } u1;
> +
> +  __m256h test256 = _mm256_set1_pch(fc);
> +
> +  u1 test1;
> +  test1.m256h = test256;
> +  for (int i = 0; i<8; i++)
> +  {
> +    if (test1.fp[i] != ff) abort();
> +  }
> +
> +  typedef union
> +  {
> +    float fp[4];
> +    __m128h m128h;
> +  } u2;
> +
> +  __m128h test128 = _mm_set1_pch(fc);
> +
> +  u2 test2;
> +  test2.m128h = test128;
> +  for (int i = 0; i<4; i++)
> +  {
> +    if (test2.fp[i] != ff) abort();
> +  }
> +
> +}
> --
> 2.18.1
>
  

Patch

diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
index 079ce321c01..17025d68b8e 100644
--- a/gcc/config/i386/avx512fp16intrin.h
+++ b/gcc/config/i386/avx512fp16intrin.h
@@ -7237,6 +7237,19 @@  _mm512_permutexvar_ph (__m512i __A, __m512h __B)
 						     (__mmask32)-1);
 }
 
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set1_pch (_Float16 _Complex __A)
+{
+  union
+  {
+    _Float16 _Complex a;
+    float b;
+  } u = { .a = __A};
+
+  return (__m512h) _mm512_set1_ps (u.b);
+}
+
 #ifdef __DISABLE_AVX512FP16__
 #undef __DISABLE_AVX512FP16__
 #pragma GCC pop_options
diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h
index f83a429ba43..1de4513d7f1 100644
--- a/gcc/config/i386/avx512fp16vlintrin.h
+++ b/gcc/config/i386/avx512fp16vlintrin.h
@@ -3315,6 +3315,32 @@  _mm_permutexvar_ph (__m128i __A, __m128h __B)
 						     (__mmask8)-1);
 }
 
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_pch (_Float16 _Complex __A)
+{
+  union
+  {
+    _Float16 _Complex a;
+    float b;
+  } u = { .a = __A };
+
+  return (__m256h) _mm256_set1_ps (u.b);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_pch (_Float16 _Complex __A)
+{
+  union
+  {
+    _Float16 _Complex a;
+    float b;
+  } u = { .a = __A };
+
+  return (__m128h) _mm_set1_ps (u.b);
+}
+
 #ifdef __DISABLE_AVX512FP16VL__
 #undef __DISABLE_AVX512FP16VL__
 #pragma GCC pop_options
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1a.c
new file mode 100644
index 00000000000..0055193f243
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1a.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile} */
+/* { dg-options "-O2 -mavx512fp16" } */
+
+#include <immintrin.h>
+
+__m512h
+__attribute__ ((noinline, noclone))
+test_mm512_set1_pch (_Float16 _Complex A)
+{
+  return _mm512_set1_pch(A);
+}
+
+/* { dg-final { scan-assembler "vbroadcastss\[ \\t\]+\[^\n\r\]*%zmm\[01\]" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1b.c
new file mode 100644
index 00000000000..450d7e37237
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1b.c
@@ -0,0 +1,42 @@ 
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16" } */
+
+#include<stdio.h>
+#include <math.h>
+#include <complex.h>
+
+static void do_test (void);
+
+#define DO_TEST do_test
+#define AVX512FP16
+
+#include <immintrin.h>
+#include "avx512-check.h"
+
+static void
+do_test (void)
+{
+ _Float16 _Complex fc = 1.0 + 1.0*I;
+  union
+  {
+    _Float16 _Complex a;
+    float b;
+  } u = { .a = fc };
+  float ff= u.b;
+
+  typedef union
+  {
+    float fp[16];
+    __m512h m512h;
+  } u1;
+
+  __m512h test512 = _mm512_set1_pch(fc);
+
+  u1 test;
+  test.m512h = test512;
+  for (int i = 0; i<16; i++)
+  {
+    if (test.fp[i] != ff) abort();
+  }
+
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1a.c
new file mode 100644
index 00000000000..4c5624f9935
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1a.c
@@ -0,0 +1,20 @@ 
+/* { dg-do compile} */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+
+#include <immintrin.h>
+
+__m256h
+__attribute__ ((noinline, noclone))
+test_mm256_set1_pch (_Float16 _Complex A)
+{
+  return _mm256_set1_pch(A);
+}
+
+__m128h
+__attribute__ ((noinline, noclone))
+test_mm_set1_pch (_Float16 _Complex A)
+{
+  return _mm_set1_pch(A);
+}
+
+/* { dg-final { scan-assembler-times "vbroadcastss" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1b.c
new file mode 100644
index 00000000000..aebff141821
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1b.c
@@ -0,0 +1,57 @@ 
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+
+#include<stdio.h>
+#include <math.h>
+#include <complex.h>
+
+static void do_test (void);
+
+#define DO_TEST do_test
+#define AVX512FP16
+
+#include <immintrin.h>
+#include "avx512-check.h"
+
+static void
+do_test (void)
+{
+  _Float16 _Complex fc = 1.0 + 1.0*I;
+  union
+  { 
+    _Float16 _Complex a;
+    float b;
+  } u = { .a = fc };
+  float ff= u.b;
+
+  typedef union
+  {
+    float fp[8];
+    __m256h m256h;
+  } u1;
+
+  __m256h test256 = _mm256_set1_pch(fc);
+
+  u1 test1;
+  test1.m256h = test256;
+  for (int i = 0; i<8; i++)
+  {
+    if (test1.fp[i] != ff) abort();
+  }
+
+  typedef union
+  {
+    float fp[4];
+    __m128h m128h;
+  } u2;
+
+  __m128h test128 = _mm_set1_pch(fc);
+
+  u2 test2;
+  test2.m128h = test128;
+  for (int i = 0; i<4; i++)
+  {
+    if (test2.fp[i] != ff) abort();
+  }
+
+}