[4/7] AVX512FP16: Add fix(uns)?_truncmn2 for HF scalar and vector modes

Message ID 20210923054640.1406227-5-hongtao.liu@intel.com
State Committed
Commit 60698a19c77aef8c96d14238975e12fad653e7fb
Headers
Series AVX512FP16: Support bunch of expanders for HFmode and vector HFmodes |

Commit Message

liuhongt Sept. 23, 2021, 5:46 a.m. UTC
  From: Hongyu Wang <hongyu.wang@intel.com>

NB: 64bit/32bit vectorize for HFmode is not supported for now, will
adjust this patch when V2HF/V4HF operations supported.

gcc/ChangeLog:

	* config/i386/i386.md (fix<fixunssuffix>_trunchf<mode>2): New expander.
	(fixuns_trunchfhi2): Likewise.
	(*fixuns_trunchfsi2zext): New define_insn.
	* config/i386/sse.md (ssePHmodelower): New mode_attr.
	(fix<fixunssuffix>_trunc<ssePHmodelower><mode>2):
	New expander for same element vector fix_truncate.
	(fix<fixunssuffix>_trunc<ssePHmodelower><mode>2):
	Likewise for V4HF to V4SI/V4DI fix_truncate.
	(fix<fixunssuffix>_truncv2hfv2di2):
	Likeise for V2HF to V2DI fix_truncate.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx512fp16-trunchf.c: New test.
	* gcc.target/i386/avx512fp16-truncvnhf.c: Ditto.
---
 gcc/config/i386/i386.md                       | 29 +++++++++
 gcc/config/i386/sse.md                        | 43 +++++++++++++
 .../gcc.target/i386/avx512fp16-trunchf.c      | 59 ++++++++++++++++++
 .../gcc.target/i386/avx512fp16-truncvnhf.c    | 61 +++++++++++++++++++
 4 files changed, 192 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-trunchf.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-truncvnhf.c
  

Patch

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index a087e557d7f..c6279e620c9 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4810,6 +4810,16 @@  (define_expand "fix_trunc<mode>di2"
    }
 })
 
+(define_insn "fix<fixunssuffix>_trunchf<mode>2"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(any_fix:SWI48
+	  (match_operand:HF 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512FP16"
+  "vcvttsh2<fixsuffix>si\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
 ;; Signed conversion to SImode.
 
 (define_expand "fix_truncxfsi2"
@@ -4917,6 +4927,17 @@  (define_insn "fixuns_trunc<mode>si2_avx512f"
    (set_attr "prefix" "evex")
    (set_attr "mode" "SI")])
 
+(define_insn "*fixuns_trunchfsi2zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (unsigned_fix:SI
+	    (match_operand:HF 1 "nonimmediate_operand" "vm"))))]
+  "TARGET_64BIT && TARGET_AVX512FP16"
+  "vcvttsh2usi\t{%1, %k0|%k0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "SI")])
+
 (define_insn "*fixuns_trunc<mode>si2_avx512f_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
@@ -4949,6 +4970,14 @@  (define_insn_and_split "*fixuns_trunc<mode>_1"
 ;; Without these patterns, we'll try the unsigned SI conversion which
 ;; is complex for SSE, rather than the signed SI conversion, which isn't.
 
+(define_expand "fixuns_trunchfhi2"
+  [(set (match_dup 2)
+	(fix:SI (match_operand:HF 1 "nonimmediate_operand")))
+   (set (match_operand:HI 0 "nonimmediate_operand")
+	(subreg:HI (match_dup 2) 0))]
+  "TARGET_AVX512FP16"
+  "operands[2] = gen_reg_rtx (SImode);")
+
 (define_expand "fixuns_trunc<mode>hi2"
   [(set (match_dup 2)
 	(fix:SI (match_operand:MODEF 1 "nonimmediate_operand")))
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 1ca95984afc..f8a5f197f3c 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1034,6 +1034,13 @@  (define_mode_attr ssePHmode
    (V8DI "V8HF") (V4DI "V8HF") (V2DI "V8HF")
    (V8DF "V8HF") (V16SF "V16HF") (V8SF "V8HF")])
 
+;; Mapping of vector modes to vector hf modes of same element.
+(define_mode_attr ssePHmodelower
+  [(V32HI "v32hf") (V16HI "v16hf") (V8HI "v8hf")
+   (V16SI "v16hf") (V8SI "v8hf") (V4SI "v4hf")
+   (V8DI "v8hf") (V4DI "v4hf") (V2DI "v2hf")
+   (V8DF "v8hf") (V16SF "v16hf") (V8SF "v8hf")])
+
 ;; Mapping of vector modes to packed single mode of the same size
 (define_mode_attr ssePSmode
   [(V16SI "V16SF") (V8DF "V16SF")
@@ -6175,6 +6182,12 @@  (define_insn "avx512fp16_vcvt<floatsuffix>si2sh<rex64namesuffix><round_name>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "HF")])
 
+(define_expand "fix<fixunssuffix>_trunc<ssePHmodelower><mode>2"
+  [(set (match_operand:VI2H_AVX512VL 0 "register_operand")
+	(any_fix:VI2H_AVX512VL
+	  (match_operand:<ssePHmode> 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16")
+
 (define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2<mask_name><round_saeonly_name>"
   [(set (match_operand:VI2H_AVX512VL 0 "register_operand" "=v")
 	(any_fix:VI2H_AVX512VL
@@ -6185,6 +6198,21 @@  (define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2<mask_name><round_saeonly
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_expand "fix<fixunssuffix>_truncv4hf<mode>2"
+  [(set (match_operand:VI4_128_8_256 0 "register_operand")
+	(any_fix:VI4_128_8_256
+	  (match_operand:V4HF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+{
+  if (!MEM_P (operands[1]))
+    {
+      operands[1] = lowpart_subreg (V8HFmode, operands[1], V4HFmode);
+      emit_insn (gen_avx512fp16_fix<fixunssuffix>_trunc<mode>2 (operands[0],
+								operands[1]));
+      DONE;
+    }
+})
+
 (define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2<mask_name>"
   [(set (match_operand:VI4_128_8_256 0 "register_operand" "=v")
 	(any_fix:VI4_128_8_256
@@ -6207,6 +6235,21 @@  (define_insn "*avx512fp16_fix<fixunssuffix>_trunc<mode>2_load<mask_name>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_expand "fix<fixunssuffix>_truncv2hfv2di2"
+  [(set (match_operand:V2DI 0 "register_operand")
+	(any_fix:V2DI
+	  (match_operand:V2HF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+{
+  if (!MEM_P (operands[1]))
+    {
+      operands[1] = lowpart_subreg (V8HFmode, operands[1], V2HFmode);
+      emit_insn (gen_avx512fp16_fix<fixunssuffix>_truncv2di2 (operands[0],
+							      operands[1]));
+      DONE;
+  }
+})
+
 (define_insn "avx512fp16_fix<fixunssuffix>_truncv2di2<mask_name>"
   [(set (match_operand:V2DI 0 "register_operand" "=v")
 	(any_fix:V2DI
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-trunchf.c b/gcc/testsuite/gcc.target/i386/avx512fp16-trunchf.c
new file mode 100644
index 00000000000..2c025b7803c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-trunchf.c
@@ -0,0 +1,59 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16" } */
+/* { dg-final { scan-assembler-times "vcvttsh2si\[ \\t\]+\[^\{\n\]*(?:%xmm\[0-9\]|\\(%esp\\))+, %eax(?:\n|\[ \\t\]+#)" 3 } } */
+/* { dg-final { scan-assembler-times "vcvttsh2usi\[ \\t\]+\[^\{\n\]*(?:%xmm\[0-9\]|\\(%esp\\))+, %eax(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttsh2si\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+, %rax(?:\n|\[ \\t\]+#)" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttsh2usi\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+, %rax(?:\n|\[ \\t\]+#)" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "xorl\[ \\t\]+%edx, %edx" { target ia32 } } } */
+
+#include <immintrin.h>
+
+short
+__attribute__ ((noinline, noclone))
+trunc_f16_to_si16 (_Float16 f)
+{
+  return f;
+}
+
+unsigned short
+__attribute__ ((noinline, noclone))
+trunc_f16_to_su16 (_Float16 f)
+{
+  return f;
+}
+
+int
+__attribute__ ((noinline, noclone))
+trunc_f16_to_si32 (_Float16 f)
+{
+  return f;
+}
+
+unsigned int
+__attribute__ ((noinline, noclone))
+trunc_f16_to_su32 (_Float16 f)
+{
+  return f;
+}
+
+long long
+__attribute__ ((noinline, noclone))
+trunc_f16_to_si64 (_Float16 f)
+{
+  return f;
+}
+
+unsigned long long
+__attribute__ ((noinline, noclone))
+trunc_f16_to_su64 (_Float16 f)
+{
+  return f;
+}
+
+unsigned long long
+__attribute__ ((noinline, noclone))
+trunc_f16_to_su64_zext (_Float16 f)
+{
+  return (unsigned int) f;
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-truncvnhf.c b/gcc/testsuite/gcc.target/i386/avx512fp16-truncvnhf.c
new file mode 100644
index 00000000000..ee55cd12300
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-truncvnhf.c
@@ -0,0 +1,61 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -ftree-slp-vectorize -mprefer-vector-width=512" } */
+
+extern long long di[8];
+extern unsigned long long udi[8];
+extern int si[16];
+extern unsigned int usi[16];
+extern short hi[32];
+extern unsigned short uhi[32];
+extern _Float16 hf[32];
+
+#define DO_PRAGMA(X) _Pragma(#X)
+
+#define FIX_TRUNCHFVV(size, mode)	    \
+  void __attribute__ ((noinline, noclone))  \
+fix_trunc##size##hf##v##size##mode ()   \
+{\
+  int i;  \
+  DO_PRAGMA (GCC unroll size)	\
+  for (i = 0; i < size; i++)  \
+    mode[i] = hf[i];  \
+}
+
+FIX_TRUNCHFVV(32, hi)
+FIX_TRUNCHFVV(16, hi)
+FIX_TRUNCHFVV(8, hi)
+FIX_TRUNCHFVV(16, si)
+FIX_TRUNCHFVV(8, si)
+FIX_TRUNCHFVV(4, si)
+FIX_TRUNCHFVV(8, di)
+FIX_TRUNCHFVV(4, di)
+FIX_TRUNCHFVV(2, di)
+
+FIX_TRUNCHFVV(32, uhi)
+FIX_TRUNCHFVV(16, uhi)
+FIX_TRUNCHFVV(8, uhi)
+FIX_TRUNCHFVV(16, usi)
+FIX_TRUNCHFVV(8, usi)
+FIX_TRUNCHFVV(4, usi)
+FIX_TRUNCHFVV(8, udi)
+FIX_TRUNCHFVV(4, udi)
+FIX_TRUNCHFVV(2, udi)
+
+/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */