@@ -4810,6 +4810,16 @@ (define_expand "fix_trunc<mode>di2"
}
})
+(define_insn "fix<fixunssuffix>_trunchf<mode>2"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (any_fix:SWI48
+ (match_operand:HF 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512FP16"
+ "vcvttsh2<fixsuffix>si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
;; Signed conversion to SImode.
(define_expand "fix_truncxfsi2"
@@ -4917,6 +4927,17 @@ (define_insn "fixuns_trunc<mode>si2_avx512f"
(set_attr "prefix" "evex")
(set_attr "mode" "SI")])
+(define_insn "*fixuns_trunchfsi2zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (unsigned_fix:SI
+ (match_operand:HF 1 "nonimmediate_operand" "vm"))))]
+ "TARGET_64BIT && TARGET_AVX512FP16"
+ "vcvttsh2usi\t{%1, %k0|%k0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "SI")])
+
(define_insn "*fixuns_trunc<mode>si2_avx512f_zext"
[(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI
@@ -4949,6 +4970,14 @@ (define_insn_and_split "*fixuns_trunc<mode>_1"
;; Without these patterns, we'll try the unsigned SI conversion which
;; is complex for SSE, rather than the signed SI conversion, which isn't.
+(define_expand "fixuns_trunchfhi2"
+ [(set (match_dup 2)
+ (fix:SI (match_operand:HF 1 "nonimmediate_operand")))
+ (set (match_operand:HI 0 "nonimmediate_operand")
+ (subreg:HI (match_dup 2) 0))]
+ "TARGET_AVX512FP16"
+ "operands[2] = gen_reg_rtx (SImode);")
+
(define_expand "fixuns_trunc<mode>hi2"
[(set (match_dup 2)
(fix:SI (match_operand:MODEF 1 "nonimmediate_operand")))
@@ -1034,6 +1034,13 @@ (define_mode_attr ssePHmode
(V8DI "V8HF") (V4DI "V8HF") (V2DI "V8HF")
(V8DF "V8HF") (V16SF "V16HF") (V8SF "V8HF")])
+;; Mapping of vector modes to vector hf modes of same element.
+(define_mode_attr ssePHmodelower
+ [(V32HI "v32hf") (V16HI "v16hf") (V8HI "v8hf")
+ (V16SI "v16hf") (V8SI "v8hf") (V4SI "v4hf")
+ (V8DI "v8hf") (V4DI "v4hf") (V2DI "v2hf")
+ (V8DF "v8hf") (V16SF "v16hf") (V8SF "v8hf")])
+
;; Mapping of vector modes to packed single mode of the same size
(define_mode_attr ssePSmode
[(V16SI "V16SF") (V8DF "V16SF")
@@ -6175,6 +6182,12 @@ (define_insn "avx512fp16_vcvt<floatsuffix>si2sh<rex64namesuffix><round_name>"
(set_attr "prefix" "evex")
(set_attr "mode" "HF")])
+(define_expand "fix<fixunssuffix>_trunc<ssePHmodelower><mode>2"
+ [(set (match_operand:VI2H_AVX512VL 0 "register_operand")
+ (any_fix:VI2H_AVX512VL
+ (match_operand:<ssePHmode> 1 "nonimmediate_operand")))]
+ "TARGET_AVX512FP16")
+
(define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2<mask_name><round_saeonly_name>"
[(set (match_operand:VI2H_AVX512VL 0 "register_operand" "=v")
(any_fix:VI2H_AVX512VL
@@ -6185,6 +6198,21 @@ (define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2<mask_name><round_saeonly
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_expand "fix<fixunssuffix>_truncv4hf<mode>2"
+ [(set (match_operand:VI4_128_8_256 0 "register_operand")
+ (any_fix:VI4_128_8_256
+ (match_operand:V4HF 1 "nonimmediate_operand")))]
+ "TARGET_AVX512FP16 && TARGET_AVX512VL"
+{
+ if (!MEM_P (operands[1]))
+ {
+ operands[1] = lowpart_subreg (V8HFmode, operands[1], V4HFmode);
+ emit_insn (gen_avx512fp16_fix<fixunssuffix>_trunc<mode>2 (operands[0],
+ operands[1]));
+ DONE;
+ }
+})
+
(define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2<mask_name>"
[(set (match_operand:VI4_128_8_256 0 "register_operand" "=v")
(any_fix:VI4_128_8_256
@@ -6207,6 +6235,21 @@ (define_insn "*avx512fp16_fix<fixunssuffix>_trunc<mode>2_load<mask_name>"
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_expand "fix<fixunssuffix>_truncv2hfv2di2"
+ [(set (match_operand:V2DI 0 "register_operand")
+ (any_fix:V2DI
+ (match_operand:V2HF 1 "nonimmediate_operand")))]
+ "TARGET_AVX512FP16 && TARGET_AVX512VL"
+{
+ if (!MEM_P (operands[1]))
+ {
+ operands[1] = lowpart_subreg (V8HFmode, operands[1], V2HFmode);
+ emit_insn (gen_avx512fp16_fix<fixunssuffix>_truncv2di2 (operands[0],
+ operands[1]));
+ DONE;
+ }
+})
+
(define_insn "avx512fp16_fix<fixunssuffix>_truncv2di2<mask_name>"
[(set (match_operand:V2DI 0 "register_operand" "=v")
(any_fix:V2DI
new file mode 100644
@@ -0,0 +1,59 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16" } */
+/* { dg-final { scan-assembler-times "vcvttsh2si\[ \\t\]+\[^\{\n\]*(?:%xmm\[0-9\]|\\(%esp\\))+, %eax(?:\n|\[ \\t\]+#)" 3 } } */
+/* { dg-final { scan-assembler-times "vcvttsh2usi\[ \\t\]+\[^\{\n\]*(?:%xmm\[0-9\]|\\(%esp\\))+, %eax(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttsh2si\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+, %rax(?:\n|\[ \\t\]+#)" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttsh2usi\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+, %rax(?:\n|\[ \\t\]+#)" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "xorl\[ \\t\]+%edx, %edx" { target ia32 } } } */
+
+#include <immintrin.h>
+
+short
+__attribute__ ((noinline, noclone))
+trunc_f16_to_si16 (_Float16 f)
+{
+ return f;
+}
+
+unsigned short
+__attribute__ ((noinline, noclone))
+trunc_f16_to_su16 (_Float16 f)
+{
+ return f;
+}
+
+int
+__attribute__ ((noinline, noclone))
+trunc_f16_to_si32 (_Float16 f)
+{
+ return f;
+}
+
+unsigned int
+__attribute__ ((noinline, noclone))
+trunc_f16_to_su32 (_Float16 f)
+{
+ return f;
+}
+
+long long
+__attribute__ ((noinline, noclone))
+trunc_f16_to_si64 (_Float16 f)
+{
+ return f;
+}
+
+unsigned long long
+__attribute__ ((noinline, noclone))
+trunc_f16_to_su64 (_Float16 f)
+{
+ return f;
+}
+
+unsigned long long
+__attribute__ ((noinline, noclone))
+trunc_f16_to_su64_zext (_Float16 f)
+{
+ return (unsigned int) f;
+}
+
new file mode 100644
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -ftree-slp-vectorize -mprefer-vector-width=512" } */
+
+extern long long di[8];
+extern unsigned long long udi[8];
+extern int si[16];
+extern unsigned int usi[16];
+extern short hi[32];
+extern unsigned short uhi[32];
+extern _Float16 hf[32];
+
+#define DO_PRAGMA(X) _Pragma(#X)
+
+#define FIX_TRUNCHFVV(size, mode) \
+ void __attribute__ ((noinline, noclone)) \
+fix_trunc##size##hf##v##size##mode () \
+{\
+ int i; \
+ DO_PRAGMA (GCC unroll size) \
+ for (i = 0; i < size; i++) \
+ mode[i] = hf[i]; \
+}
+
+FIX_TRUNCHFVV(32, hi)
+FIX_TRUNCHFVV(16, hi)
+FIX_TRUNCHFVV(8, hi)
+FIX_TRUNCHFVV(16, si)
+FIX_TRUNCHFVV(8, si)
+FIX_TRUNCHFVV(4, si)
+FIX_TRUNCHFVV(8, di)
+FIX_TRUNCHFVV(4, di)
+FIX_TRUNCHFVV(2, di)
+
+FIX_TRUNCHFVV(32, uhi)
+FIX_TRUNCHFVV(16, uhi)
+FIX_TRUNCHFVV(8, uhi)
+FIX_TRUNCHFVV(16, usi)
+FIX_TRUNCHFVV(8, usi)
+FIX_TRUNCHFVV(4, usi)
+FIX_TRUNCHFVV(8, udi)
+FIX_TRUNCHFVV(4, udi)
+FIX_TRUNCHFVV(2, udi)
+
+/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */