i386: Support complex fma/conj_fma for _Float16.
Commit Message
Hi,
This patch is to support cmla_optab, cmul_optab, cmla_conj_optab, cmul_conj_optab for vector _Float16.
Ok for master?
gcc/ChangeLog:
* config/i386/sse.md (cmul<conj_op><mode>3): add new define_expand.
(cmla<conj_op><mode>4): Likewise
gcc/testsuite/ChangeLog:
* gcc.target/i386/avx512fp16-vector-complex-float.c: New test.
---
gcc/config/i386/sse.md | 23 +++++++++++
.../i386/avx512fp16-vector-complex-float.c | 40 +++++++++++++++++++
2 files changed, 63 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vector-complex-float.c
--
2.18.1
Comments
On Fri, Nov 5, 2021 at 3:09 PM Kong, Lingling via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi,
>
> This patch is to support cmla_optab, cmul_optab, cmla_conj_optab, cmul_conj_optab for vector _Float16.
> Ok for master?
LGTM.
> gcc/ChangeLog:
>
> * config/i386/sse.md (cmul<conj_op><mode>3): add new define_expand.
> (cmla<conj_op><mode>4): Likewise
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/avx512fp16-vector-complex-float.c: New test.
> ---
> gcc/config/i386/sse.md | 23 +++++++++++
> .../i386/avx512fp16-vector-complex-float.c | 40 +++++++++++++++++++
> 2 files changed, 63 insertions(+)
> create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vector-complex-float.c
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 0a7f5b178f9..8d3fef0a31a 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -5922,6 +5922,12 @@
> (UNSPEC_COMPLEX_FMUL "fmulc")
> (UNSPEC_COMPLEX_FCMUL "fcmulc")])
>
> +(define_int_attr conj_op
> + [(UNSPEC_COMPLEX_FMA "")
> + (UNSPEC_COMPLEX_FCMA "_conj")
> + (UNSPEC_COMPLEX_FMUL "")
> + (UNSPEC_COMPLEX_FCMUL "_conj")])
> +
> (define_mode_attr complexmove
> [(V32HF "avx512f_loadv16sf")
> (V16HF "avx512vl_loadv8sf")
> @@ -6003,6 +6009,15 @@
> DONE;
> })
>
> +(define_expand "cmla<conj_op><mode>4"
> + [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
> + (unspec:VF_AVX512FP16VL
> + [(match_operand:VF_AVX512FP16VL 1 "vector_operand")
> + (match_operand:VF_AVX512FP16VL 2 "vector_operand")
> + (match_operand:VF_AVX512FP16VL 3 "vector_operand")]
> + UNSPEC_COMPLEX_F_C_MA))]
> + "TARGET_AVX512FP16")
> +
> (define_insn "fma_<complexopname>_<mode><sdc_maskz_name><round_name>"
> [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v")
> (unspec:VF_AVX512FP16VL
> @@ -6084,6 +6099,14 @@
> [(set_attr "type" "ssemuladd")
> (set_attr "mode" "<MODE>")])
>
> +(define_expand "cmul<conj_op><mode>3"
> + [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
> + (unspec:VF_AVX512FP16VL
> + [(match_operand:VF_AVX512FP16VL 1 "vector_operand")
> + (match_operand:VF_AVX512FP16VL 2 "vector_operand")]
> + UNSPEC_COMPLEX_F_C_MUL))]
> + "TARGET_AVX512FP16")
> +
> (define_insn "<avx512>_<complexopname>_<mode><maskc_name><round_name>"
> [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v")
> (unspec:VF_AVX512FP16VL
> diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vector-complex-float.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vector-complex-float.c
> new file mode 100644
> index 00000000000..bcb957f0de0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vector-complex-float.c
> @@ -0,0 +1,40 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
> +/* { dg-final { scan-assembler-times "vfmaddcph\[ \\t\]" 1 } } */
> +/* { dg-final { scan-assembler-not "vfmadd\[123]*ph\[ \\t\]"} } */
> +/* { dg-final { scan-assembler-not "vfmadd\[123]*sh\[ \\t\]"} } */
> +/* { dg-final { scan-assembler-times "vfcmaddcph\[ \\t\]" 1 } } */
> +/* { dg-final { scan-assembler-times "vfmulcph\[ \\t\]" 1 } } */
> +/* { dg-final { scan-assembler-times "vfcmulcph\[ \\t\]" 1 } } */
> +
> +#include<complex.h>
> +#define TYPE _Float16
> +#define N 16
> +
> +void fma0 (_Complex TYPE *a, _Complex TYPE *b,
> + _Complex TYPE *c)
> +{
> + for (int i = 0; i < N; i++)
> + c[i] += a[i] * b[i];
> +}
> +
> +void fmaconj (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
> + _Complex TYPE c[restrict N])
> +{
> + for (int i = 0; i < N; i++)
> + c[i] += a[i] * ~b[i];
> +}
> +
> +void fmul (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
> + _Complex TYPE c[restrict N])
> +{
> + for (int i = 0; i < N; i++)
> + c[i] = a[i] * b[i];
> +}
> +
> +void fmulconj (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
> + _Complex TYPE c[restrict N])
> +{
> + for (int i = 0; i < N; i++)
> + c[i] = a[i] * ~b[i];
> +}
> --
> 2.18.1
>
@@ -5922,6 +5922,12 @@
(UNSPEC_COMPLEX_FMUL "fmulc")
(UNSPEC_COMPLEX_FCMUL "fcmulc")])
+(define_int_attr conj_op
+ [(UNSPEC_COMPLEX_FMA "")
+ (UNSPEC_COMPLEX_FCMA "_conj")
+ (UNSPEC_COMPLEX_FMUL "")
+ (UNSPEC_COMPLEX_FCMUL "_conj")])
+
(define_mode_attr complexmove
[(V32HF "avx512f_loadv16sf")
(V16HF "avx512vl_loadv8sf")
@@ -6003,6 +6009,15 @@
DONE;
})
+(define_expand "cmla<conj_op><mode>4"
+ [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+ (unspec:VF_AVX512FP16VL
+ [(match_operand:VF_AVX512FP16VL 1 "vector_operand")
+ (match_operand:VF_AVX512FP16VL 2 "vector_operand")
+ (match_operand:VF_AVX512FP16VL 3 "vector_operand")]
+ UNSPEC_COMPLEX_F_C_MA))]
+ "TARGET_AVX512FP16")
+
(define_insn "fma_<complexopname>_<mode><sdc_maskz_name><round_name>"
[(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v")
(unspec:VF_AVX512FP16VL
@@ -6084,6 +6099,14 @@
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
+(define_expand "cmul<conj_op><mode>3"
+ [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
+ (unspec:VF_AVX512FP16VL
+ [(match_operand:VF_AVX512FP16VL 1 "vector_operand")
+ (match_operand:VF_AVX512FP16VL 2 "vector_operand")]
+ UNSPEC_COMPLEX_F_C_MUL))]
+ "TARGET_AVX512FP16")
+
(define_insn "<avx512>_<complexopname>_<mode><maskc_name><round_name>"
[(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v")
(unspec:VF_AVX512FP16VL
new file mode 100644
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vfmaddcph\[ \\t\]" 1 } } */
+/* { dg-final { scan-assembler-not "vfmadd\[123]*ph\[ \\t\]"} } */
+/* { dg-final { scan-assembler-not "vfmadd\[123]*sh\[ \\t\]"} } */
+/* { dg-final { scan-assembler-times "vfcmaddcph\[ \\t\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfmulcph\[ \\t\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfcmulcph\[ \\t\]" 1 } } */
+
+#include<complex.h>
+#define TYPE _Float16
+#define N 16
+
+void fma0 (_Complex TYPE *a, _Complex TYPE *b,
+ _Complex TYPE *c)
+{
+ for (int i = 0; i < N; i++)
+ c[i] += a[i] * b[i];
+}
+
+void fmaconj (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
+ _Complex TYPE c[restrict N])
+{
+ for (int i = 0; i < N; i++)
+ c[i] += a[i] * ~b[i];
+}
+
+void fmul (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
+ _Complex TYPE c[restrict N])
+{
+ for (int i = 0; i < N; i++)
+ c[i] = a[i] * b[i];
+}
+
+void fmulconj (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
+ _Complex TYPE c[restrict N])
+{
+ for (int i = 0; i < N; i++)
+ c[i] = a[i] * ~b[i];
+}