AVX512FP16: Add *_set1_pch intrinsics.
Commit Message
From: dianhong xu <dianhong.xu@intel.com>
Add *_set1_pch (_Float16 _Complex A) intrinsics.
gcc/ChangeLog:
* config/i386/avx512fp16intrin.h:
(_mm512_set1_pch): New intrinsic.
* config/i386/avx512fp16vlintrin.h:
(_mm256_set1_pch): New intrinsic.
(_mm_set1_pch): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/i386/avx512fp16-set1-pch-1a.c: New test.
* gcc.target/i386/avx512fp16-set1-pch-1b.c: New test.
* gcc.target/i386/avx512fp16vl-set1-pch-1a.c: New test.
* gcc.target/i386/avx512fp16vl-set1-pch-1b.c: New test.
---
gcc/config/i386/avx512fp16intrin.h | 13 +++++
gcc/config/i386/avx512fp16vlintrin.h | 26 +++++++++
.../gcc.target/i386/avx512fp16-set1-pch-1a.c | 13 +++++
.../gcc.target/i386/avx512fp16-set1-pch-1b.c | 42 ++++++++++++++
.../i386/avx512fp16vl-set1-pch-1a.c | 20 +++++++
.../i386/avx512fp16vl-set1-pch-1b.c | 57 +++++++++++++++++++
6 files changed, 171 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1a.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1b.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1a.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1b.c
Comments
On Fri, Oct 15, 2021 at 4:38 PM dianhong.xu--- via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> From: dianhong xu <dianhong.xu@intel.com>
>
> Add *_set1_pch (_Float16 _Complex A) intrinsics.
>
> gcc/ChangeLog:
>
> * config/i386/avx512fp16intrin.h:
> (_mm512_set1_pch): New intrinsic.
> * config/i386/avx512fp16vlintrin.h:
> (_mm256_set1_pch): New intrinsic.
> (_mm_set1_pch): Ditto.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/avx512fp16-set1-pch-1a.c: New test.
> * gcc.target/i386/avx512fp16-set1-pch-1b.c: New test.
> * gcc.target/i386/avx512fp16vl-set1-pch-1a.c: New test.
> * gcc.target/i386/avx512fp16vl-set1-pch-1b.c: New test.
LGTM.
> ---
> gcc/config/i386/avx512fp16intrin.h | 13 +++++
> gcc/config/i386/avx512fp16vlintrin.h | 26 +++++++++
> .../gcc.target/i386/avx512fp16-set1-pch-1a.c | 13 +++++
> .../gcc.target/i386/avx512fp16-set1-pch-1b.c | 42 ++++++++++++++
> .../i386/avx512fp16vl-set1-pch-1a.c | 20 +++++++
> .../i386/avx512fp16vl-set1-pch-1b.c | 57 +++++++++++++++++++
> 6 files changed, 171 insertions(+)
> create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1a.c
> create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1b.c
> create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1a.c
> create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1b.c
>
> diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
> index 079ce321c01..17025d68b8e 100644
> --- a/gcc/config/i386/avx512fp16intrin.h
> +++ b/gcc/config/i386/avx512fp16intrin.h
> @@ -7237,6 +7237,19 @@ _mm512_permutexvar_ph (__m512i __A, __m512h __B)
> (__mmask32)-1);
> }
>
> +extern __inline __m512h
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm512_set1_pch (_Float16 _Complex __A)
> +{
> + union
> + {
> + _Float16 _Complex a;
> + float b;
> + } u = { .a = __A};
> +
> + return (__m512h) _mm512_set1_ps (u.b);
> +}
> +
> #ifdef __DISABLE_AVX512FP16__
> #undef __DISABLE_AVX512FP16__
> #pragma GCC pop_options
> diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h
> index f83a429ba43..1de4513d7f1 100644
> --- a/gcc/config/i386/avx512fp16vlintrin.h
> +++ b/gcc/config/i386/avx512fp16vlintrin.h
> @@ -3315,6 +3315,32 @@ _mm_permutexvar_ph (__m128i __A, __m128h __B)
> (__mmask8)-1);
> }
>
> +extern __inline __m256h
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm256_set1_pch (_Float16 _Complex __A)
> +{
> + union
> + {
> + _Float16 _Complex a;
> + float b;
> + } u = { .a = __A };
> +
> + return (__m256h) _mm256_set1_ps (u.b);
> +}
> +
> +extern __inline __m128h
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_set1_pch (_Float16 _Complex __A)
> +{
> + union
> + {
> + _Float16 _Complex a;
> + float b;
> + } u = { .a = __A };
> +
> + return (__m128h) _mm_set1_ps (u.b);
> +}
> +
> #ifdef __DISABLE_AVX512FP16VL__
> #undef __DISABLE_AVX512FP16VL__
> #pragma GCC pop_options
> diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1a.c
> new file mode 100644
> index 00000000000..0055193f243
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1a.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile} */
> +/* { dg-options "-O2 -mavx512fp16" } */
> +
> +#include <immintrin.h>
> +
> +__m512h
> +__attribute__ ((noinline, noclone))
> +test_mm512_set1_pch (_Float16 _Complex A)
> +{
> + return _mm512_set1_pch(A);
> +}
> +
> +/* { dg-final { scan-assembler "vbroadcastss\[ \\t\]+\[^\n\r\]*%zmm\[01\]" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1b.c
> new file mode 100644
> index 00000000000..450d7e37237
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-set1-pch-1b.c
> @@ -0,0 +1,42 @@
> +/* { dg-do run { target avx512fp16 } } */
> +/* { dg-options "-O2 -mavx512fp16" } */
> +
> +#include<stdio.h>
> +#include <math.h>
> +#include <complex.h>
> +
> +static void do_test (void);
> +
> +#define DO_TEST do_test
> +#define AVX512FP16
> +
> +#include <immintrin.h>
> +#include "avx512-check.h"
> +
> +static void
> +do_test (void)
> +{
> + _Float16 _Complex fc = 1.0 + 1.0*I;
> + union
> + {
> + _Float16 _Complex a;
> + float b;
> + } u = { .a = fc };
> + float ff= u.b;
> +
> + typedef union
> + {
> + float fp[16];
> + __m512h m512h;
> + } u1;
> +
> + __m512h test512 = _mm512_set1_pch(fc);
> +
> + u1 test;
> + test.m512h = test512;
> + for (int i = 0; i<16; i++)
> + {
> + if (test.fp[i] != ff) abort();
> + }
> +
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1a.c
> new file mode 100644
> index 00000000000..4c5624f9935
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1a.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile} */
> +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
> +
> +#include <immintrin.h>
> +
> +__m256h
> +__attribute__ ((noinline, noclone))
> +test_mm256_set1_pch (_Float16 _Complex A)
> +{
> + return _mm256_set1_pch(A);
> +}
> +
> +__m128h
> +__attribute__ ((noinline, noclone))
> +test_mm_set1_pch (_Float16 _Complex A)
> +{
> + return _mm_set1_pch(A);
> +}
> +
> +/* { dg-final { scan-assembler-times "vbroadcastss" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1b.c
> new file mode 100644
> index 00000000000..aebff141821
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-set1-pch-1b.c
> @@ -0,0 +1,57 @@
> +/* { dg-do run { target avx512fp16 } } */
> +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
> +
> +#include<stdio.h>
> +#include <math.h>
> +#include <complex.h>
> +
> +static void do_test (void);
> +
> +#define DO_TEST do_test
> +#define AVX512FP16
> +
> +#include <immintrin.h>
> +#include "avx512-check.h"
> +
> +static void
> +do_test (void)
> +{
> + _Float16 _Complex fc = 1.0 + 1.0*I;
> + union
> + {
> + _Float16 _Complex a;
> + float b;
> + } u = { .a = fc };
> + float ff= u.b;
> +
> + typedef union
> + {
> + float fp[8];
> + __m256h m256h;
> + } u1;
> +
> + __m256h test256 = _mm256_set1_pch(fc);
> +
> + u1 test1;
> + test1.m256h = test256;
> + for (int i = 0; i<8; i++)
> + {
> + if (test1.fp[i] != ff) abort();
> + }
> +
> + typedef union
> + {
> + float fp[4];
> + __m128h m128h;
> + } u2;
> +
> + __m128h test128 = _mm_set1_pch(fc);
> +
> + u2 test2;
> + test2.m128h = test128;
> + for (int i = 0; i<4; i++)
> + {
> + if (test2.fp[i] != ff) abort();
> + }
> +
> +}
> --
> 2.18.1
>
@@ -7237,6 +7237,19 @@ _mm512_permutexvar_ph (__m512i __A, __m512h __B)
(__mmask32)-1);
}
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_set1_pch (_Float16 _Complex __A)
+{
+ union
+ {
+ _Float16 _Complex a;
+ float b;
+ } u = { .a = __A};
+
+ return (__m512h) _mm512_set1_ps (u.b);
+}
+
#ifdef __DISABLE_AVX512FP16__
#undef __DISABLE_AVX512FP16__
#pragma GCC pop_options
@@ -3315,6 +3315,32 @@ _mm_permutexvar_ph (__m128i __A, __m128h __B)
(__mmask8)-1);
}
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_set1_pch (_Float16 _Complex __A)
+{
+ union
+ {
+ _Float16 _Complex a;
+ float b;
+ } u = { .a = __A };
+
+ return (__m256h) _mm256_set1_ps (u.b);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set1_pch (_Float16 _Complex __A)
+{
+ union
+ {
+ _Float16 _Complex a;
+ float b;
+ } u = { .a = __A };
+
+ return (__m128h) _mm_set1_ps (u.b);
+}
+
#ifdef __DISABLE_AVX512FP16VL__
#undef __DISABLE_AVX512FP16VL__
#pragma GCC pop_options
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile} */
+/* { dg-options "-O2 -mavx512fp16" } */
+
+#include <immintrin.h>
+
+__m512h
+__attribute__ ((noinline, noclone))
+test_mm512_set1_pch (_Float16 _Complex A)
+{
+ return _mm512_set1_pch(A);
+}
+
+/* { dg-final { scan-assembler "vbroadcastss\[ \\t\]+\[^\n\r\]*%zmm\[01\]" } } */
new file mode 100644
@@ -0,0 +1,42 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16" } */
+
+#include<stdio.h>
+#include <math.h>
+#include <complex.h>
+
+static void do_test (void);
+
+#define DO_TEST do_test
+#define AVX512FP16
+
+#include <immintrin.h>
+#include "avx512-check.h"
+
+static void
+do_test (void)
+{
+ _Float16 _Complex fc = 1.0 + 1.0*I;
+ union
+ {
+ _Float16 _Complex a;
+ float b;
+ } u = { .a = fc };
+ float ff= u.b;
+
+ typedef union
+ {
+ float fp[16];
+ __m512h m512h;
+ } u1;
+
+ __m512h test512 = _mm512_set1_pch(fc);
+
+ u1 test;
+ test.m512h = test512;
+ for (int i = 0; i<16; i++)
+ {
+ if (test.fp[i] != ff) abort();
+ }
+
+}
new file mode 100644
@@ -0,0 +1,20 @@
+/* { dg-do compile} */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+
+#include <immintrin.h>
+
+__m256h
+__attribute__ ((noinline, noclone))
+test_mm256_set1_pch (_Float16 _Complex A)
+{
+ return _mm256_set1_pch(A);
+}
+
+__m128h
+__attribute__ ((noinline, noclone))
+test_mm_set1_pch (_Float16 _Complex A)
+{
+ return _mm_set1_pch(A);
+}
+
+/* { dg-final { scan-assembler-times "vbroadcastss" 2 } } */
new file mode 100644
@@ -0,0 +1,57 @@
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+
+#include<stdio.h>
+#include <math.h>
+#include <complex.h>
+
+static void do_test (void);
+
+#define DO_TEST do_test
+#define AVX512FP16
+
+#include <immintrin.h>
+#include "avx512-check.h"
+
+static void
+do_test (void)
+{
+ _Float16 _Complex fc = 1.0 + 1.0*I;
+ union
+ {
+ _Float16 _Complex a;
+ float b;
+ } u = { .a = fc };
+ float ff= u.b;
+
+ typedef union
+ {
+ float fp[8];
+ __m256h m256h;
+ } u1;
+
+ __m256h test256 = _mm256_set1_pch(fc);
+
+ u1 test1;
+ test1.m256h = test256;
+ for (int i = 0; i<8; i++)
+ {
+ if (test1.fp[i] != ff) abort();
+ }
+
+ typedef union
+ {
+ float fp[4];
+ __m128h m128h;
+ } u2;
+
+ __m128h test128 = _mm_set1_pch(fc);
+
+ u2 test2;
+ test2.m128h = test128;
+ for (int i = 0; i<4; i++)
+ {
+ if (test2.fp[i] != ff) abort();
+ }
+
+}