@@ -14054,7 +14054,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
tmp1 = gen_reg_rtx (SImode);
emit_move_insn (tmp1, gen_lowpart (SImode, val));
- /* Insert the SImode value as low element of a V4SImode vector. */
+ /* Insert the SImode value as low element of a V4SImode vector. */
tmp2 = gen_reg_rtx (V4SImode);
emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
@@ -14638,7 +14638,7 @@ ix86_expand_vector_init_interleave (machine_mode mode,
switch (mode)
{
case E_V8HFmode:
- gen_load_even = gen_vec_setv8hf;
+ gen_load_even = gen_vec_interleave_lowv8hf;
gen_interleave_first_low = gen_vec_interleave_lowv4si;
gen_interleave_second_low = gen_vec_interleave_lowv2di;
inner_mode = HFmode;
@@ -14673,35 +14673,40 @@ ix86_expand_vector_init_interleave (machine_mode mode,
op = ops [i + i];
if (inner_mode == HFmode)
{
- /* Convert HFmode to HImode. */
- op1 = gen_reg_rtx (HImode);
- op1 = gen_rtx_SUBREG (HImode, force_reg (HFmode, op), 0);
- op = gen_reg_rtx (HImode);
- emit_move_insn (op, op1);
+ rtx even, odd;
+ /* Use vpuncklwd to pack 2 HFmode. */
+ op0 = gen_reg_rtx (V8HFmode);
+ even = lowpart_subreg (V8HFmode, force_reg (HFmode, op), HFmode);
+ odd = lowpart_subreg (V8HFmode,
+ force_reg (HFmode, ops[i + i + 1]),
+ HFmode);
+ emit_insn (gen_load_even (op0, even, odd));
}
+ else
+ {
+ /* Extend the odd elment to SImode using a paradoxical SUBREG. */
+ op0 = gen_reg_rtx (SImode);
+ emit_move_insn (op0, gen_lowpart (SImode, op));
- /* Extend the odd elment to SImode using a paradoxical SUBREG. */
- op0 = gen_reg_rtx (SImode);
- emit_move_insn (op0, gen_lowpart (SImode, op));
-
- /* Insert the SImode value as low element of V4SImode vector. */
- op1 = gen_reg_rtx (V4SImode);
- op0 = gen_rtx_VEC_MERGE (V4SImode,
- gen_rtx_VEC_DUPLICATE (V4SImode,
- op0),
- CONST0_RTX (V4SImode),
- const1_rtx);
- emit_insn (gen_rtx_SET (op1, op0));
+ /* Insert the SImode value as low element of V4SImode vector. */
+ op1 = gen_reg_rtx (V4SImode);
+ op0 = gen_rtx_VEC_MERGE (V4SImode,
+ gen_rtx_VEC_DUPLICATE (V4SImode,
+ op0),
+ CONST0_RTX (V4SImode),
+ const1_rtx);
+ emit_insn (gen_rtx_SET (op1, op0));
- /* Cast the V4SImode vector back to a vector in orignal mode. */
- op0 = gen_reg_rtx (mode);
- emit_move_insn (op0, gen_lowpart (mode, op1));
+ /* Cast the V4SImode vector back to a vector in orignal mode. */
+ op0 = gen_reg_rtx (mode);
+ emit_move_insn (op0, gen_lowpart (mode, op1));
- /* Load even elements into the second position. */
- emit_insn (gen_load_even (op0,
- force_reg (inner_mode,
- ops [i + i + 1]),
- const1_rtx));
+ /* Load even elements into the second position. */
+ emit_insn (gen_load_even (op0,
+ force_reg (inner_mode,
+ ops[i + i + 1]),
+ const1_rtx));
+ }
/* Cast vector to FIRST_IMODE vector. */
ops[i] = gen_reg_rtx (first_imode);
@@ -15182,6 +15187,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
machine_mode inner_mode = GET_MODE_INNER (mode);
machine_mode half_mode;
bool use_vec_merge = false;
+ bool blendm_const = false;
rtx tmp;
static rtx (*gen_extract[7][2]) (rtx, rtx)
= {
@@ -15369,7 +15375,14 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
return;
case E_V8HFmode:
- use_vec_merge = true;
+ if (TARGET_AVX2)
+ {
+ mmode = SImode;
+ gen_blendm = gen_sse4_1_pblendph;
+ blendm_const = true;
+ }
+ else
+ use_vec_merge = true;
break;
case E_V8HImode:
@@ -15396,10 +15409,20 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
goto half;
case E_V16HFmode:
- half_mode = V8HFmode;
- j = 6;
- n = 8;
- goto half;
+ if (TARGET_AVX2)
+ {
+ mmode = SImode;
+ gen_blendm = gen_avx2_pblendph;
+ blendm_const = true;
+ break;
+ }
+ else
+ {
+ half_mode = V8HFmode;
+ j = 6;
+ n = 8;
+ goto half;
+ }
case E_V16HImode:
half_mode = V8HImode;
@@ -15560,15 +15583,15 @@ quarter:
{
tmp = gen_reg_rtx (mode);
emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
+ rtx merge_mask = gen_int_mode (HOST_WIDE_INT_1U << elt, mmode);
/* The avx512*_blendm<mode> expanders have different operand order
from VEC_MERGE. In VEC_MERGE, the first input operand is used for
elements where the mask is set and second input operand otherwise,
in {sse,avx}*_*blend* the first input operand is used for elements
where the mask is clear and second input operand otherwise. */
- emit_insn (gen_blendm (target, target, tmp,
- force_reg (mmode,
- gen_int_mode (HOST_WIDE_INT_1U << elt,
- mmode))));
+ if (!blendm_const)
+ merge_mask = force_reg (mmode, merge_mask);
+ emit_insn (gen_blendm (target, target, tmp, merge_mask));
}
else if (use_vec_merge)
{
@@ -19443,8 +19443,11 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to,
/* Vector registers do not support QI or HImode loads. If we don't
disallow a change to these modes, reload will assume it's ok to
drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
- the vec_dupv4hi pattern. */
- if (GET_MODE_SIZE (from) < 4)
+ the vec_dupv4hi pattern.
+ NB: AVX512FP16 supports vmovw which can load 16bit data to sse
+ register. */
+ int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : 4;
+ if (GET_MODE_SIZE (from) < mov_size)
return false;
}
@@ -806,6 +806,7 @@ (define_mode_iterator VF_AVX512
(V8SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
V16SF V8DF])
+(define_mode_iterator V8_128 [V8HI V8HF])
(define_mode_iterator V16_256 [V16HI V16HF])
(define_mode_iterator V32_512 [V32HI V32HF])
@@ -9891,16 +9892,33 @@ (define_insn_and_split "*vec_extract<mode>_0"
"operands[1] = gen_lowpart (HFmode, operands[1]);")
(define_insn "*vec_extracthf"
- [(set (match_operand:HF 0 "register_sse4nonimm_operand" "=r,m")
+ [(set (match_operand:HF 0 "register_sse4nonimm_operand" "=*r,m,x,v")
(vec_select:HF
- (match_operand:V8HF 1 "register_operand" "v,v")
+ (match_operand:V8HF 1 "register_operand" "v,v,0,v")
(parallel
[(match_operand:SI 2 "const_0_to_7_operand")])))]
"TARGET_SSE2"
- "@
- vpextrw\t{%2, %1, %k0|%k0, %1, %2}
- vpextrw\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "sselog1")
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return "vpextrw\t{%2, %1, %k0|%k0, %1, %2}";
+ case 1:
+ return "vpextrw\t{%2, %1, %0|%0, %1, %2}";
+
+ case 2:
+ operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
+ return "psrldq\t{%2, %0|%0, %2}";
+ case 3:
+ operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
+ return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
+
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "isa" "*,*,noavx,avx")
+ (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1")
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "TI")])
@@ -15359,12 +15377,12 @@ (define_insn "vec_interleave_lowv16qi<mask_name>"
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
-(define_insn "avx512bw_interleave_highv32hi<mask_name>"
- [(set (match_operand:V32HI 0 "register_operand" "=v")
- (vec_select:V32HI
- (vec_concat:V64HI
- (match_operand:V32HI 1 "register_operand" "v")
- (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
+(define_insn "avx512bw_interleave_high<mode><mask_name>"
+ [(set (match_operand:V32_512 0 "register_operand" "=v")
+ (vec_select:V32_512
+ (vec_concat:<ssedoublevecmode>
+ (match_operand:V32_512 1 "register_operand" "v")
+ (match_operand:V32_512 2 "nonimmediate_operand" "vm"))
(parallel [(const_int 4) (const_int 36)
(const_int 5) (const_int 37)
(const_int 6) (const_int 38)
@@ -15387,12 +15405,12 @@ (define_insn "avx512bw_interleave_highv32hi<mask_name>"
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "avx2_interleave_highv16hi<mask_name>"
- [(set (match_operand:V16HI 0 "register_operand" "=Yw")
- (vec_select:V16HI
- (vec_concat:V32HI
- (match_operand:V16HI 1 "register_operand" "Yw")
- (match_operand:V16HI 2 "nonimmediate_operand" "Ywm"))
+(define_insn "avx2_interleave_high<mode><mask_name>"
+ [(set (match_operand:V16_256 0 "register_operand" "=Yw")
+ (vec_select:V16_256
+ (vec_concat:<ssedoublevecmode>
+ (match_operand:V16_256 1 "register_operand" "Yw")
+ (match_operand:V16_256 2 "nonimmediate_operand" "Ywm"))
(parallel [(const_int 4) (const_int 20)
(const_int 5) (const_int 21)
(const_int 6) (const_int 22)
@@ -15407,12 +15425,12 @@ (define_insn "avx2_interleave_highv16hi<mask_name>"
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
-(define_insn "vec_interleave_highv8hi<mask_name>"
- [(set (match_operand:V8HI 0 "register_operand" "=x,Yw")
- (vec_select:V8HI
- (vec_concat:V16HI
- (match_operand:V8HI 1 "register_operand" "0,Yw")
- (match_operand:V8HI 2 "vector_operand" "xBm,Ywm"))
+(define_insn "vec_interleave_high<mode><mask_name>"
+ [(set (match_operand:V8_128 0 "register_operand" "=x,Yw")
+ (vec_select:V8_128
+ (vec_concat:<ssedoublevecmode>
+ (match_operand:V8_128 1 "register_operand" "0,Yw")
+ (match_operand:V8_128 2 "vector_operand" "xBm,Ywm"))
(parallel [(const_int 4) (const_int 12)
(const_int 5) (const_int 13)
(const_int 6) (const_int 14)
@@ -15427,12 +15445,12 @@ (define_insn "vec_interleave_highv8hi<mask_name>"
(set_attr "prefix" "orig,maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
- [(set (match_operand:V32HI 0 "register_operand" "=v")
- (vec_select:V32HI
- (vec_concat:V64HI
- (match_operand:V32HI 1 "register_operand" "v")
- (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
+(define_insn "<mask_codefor>avx512bw_interleave_low<mode><mask_name>"
+ [(set (match_operand:V32_512 0 "register_operand" "=v")
+ (vec_select:V32_512
+ (vec_concat:<ssedoublevecmode>
+ (match_operand:V32_512 1 "register_operand" "v")
+ (match_operand:V32_512 2 "nonimmediate_operand" "vm"))
(parallel [(const_int 0) (const_int 32)
(const_int 1) (const_int 33)
(const_int 2) (const_int 34)
@@ -15455,12 +15473,12 @@ (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "avx2_interleave_lowv16hi<mask_name>"
- [(set (match_operand:V16HI 0 "register_operand" "=Yw")
- (vec_select:V16HI
- (vec_concat:V32HI
- (match_operand:V16HI 1 "register_operand" "Yw")
- (match_operand:V16HI 2 "nonimmediate_operand" "Ywm"))
+(define_insn "avx2_interleave_low<mode><mask_name>"
+ [(set (match_operand:V16_256 0 "register_operand" "=Yw")
+ (vec_select:V16_256
+ (vec_concat:<ssedoublevecmode>
+ (match_operand:V16_256 1 "register_operand" "Yw")
+ (match_operand:V16_256 2 "nonimmediate_operand" "Ywm"))
(parallel [(const_int 0) (const_int 16)
(const_int 1) (const_int 17)
(const_int 2) (const_int 18)
@@ -15475,12 +15493,12 @@ (define_insn "avx2_interleave_lowv16hi<mask_name>"
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
-(define_insn "vec_interleave_lowv8hi<mask_name>"
- [(set (match_operand:V8HI 0 "register_operand" "=x,Yw")
- (vec_select:V8HI
- (vec_concat:V16HI
- (match_operand:V8HI 1 "register_operand" "0,Yw")
- (match_operand:V8HI 2 "vector_operand" "xBm,Ywm"))
+(define_insn "vec_interleave_low<mode><mask_name>"
+ [(set (match_operand:V8_128 0 "register_operand" "=x,Yw")
+ (vec_select:V8_128
+ (vec_concat:<ssedoublevecmode>
+ (match_operand:V8_128 1 "register_operand" "0,Yw")
+ (match_operand:V8_128 2 "vector_operand" "xBm,Ywm"))
(parallel [(const_int 0) (const_int 8)
(const_int 1) (const_int 9)
(const_int 2) (const_int 10)
@@ -15655,6 +15673,7 @@ (define_mode_attr pinsr_evex_isa
(V4SI "avx512dq") (V2DI "avx512dq")])
;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
+;; For V8HFmode and TARGET_AVX2, broadcastw + pblendw should be better.
(define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
[(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v")
(vec_merge:PINSR_MODE
@@ -15664,7 +15683,8 @@ (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
(match_operand:SI 3 "const_int_operand")))]
"TARGET_SSE2
&& ((unsigned) exact_log2 (INTVAL (operands[3]))
- < GET_MODE_NUNITS (<MODE>mode))"
+ < GET_MODE_NUNITS (<MODE>mode))
+ && !(<MODE>mode == V8HFmode && TARGET_AVX2)"
{
operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
@@ -15672,26 +15692,18 @@ (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
{
case 0:
if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
- return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
+ return "pinsr<sseintmodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
/* FALLTHRU */
case 1:
- return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
+ return "pinsr<sseintmodesuffix>\t{%3, %2, %0|%0, %2, %3}";
case 2:
case 4:
if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
- {
- if (<MODE>mode == V8HFmode)
- return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
- else
- return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
- }
+ return "vpinsr<sseintmodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
/* FALLTHRU */
case 3:
case 5:
- if (<MODE>mode == V8HFmode)
- return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
- else
- return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+ return "vpinsr<sseintmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
default:
gcc_unreachable ();
}
@@ -19179,11 +19191,14 @@ (define_insn_and_split "*<sse4_1_avx2>_pblendvb_lt_subreg_not"
(lt:VI1_AVX2 (match_dup 3) (match_dup 4))] UNSPEC_BLENDV))]
"operands[3] = gen_lowpart (<MODE>mode, operands[3]);")
-(define_insn "sse4_1_pblendw"
- [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
- (vec_merge:V8HI
- (match_operand:V8HI 2 "vector_operand" "YrBm,*xBm,xm")
- (match_operand:V8HI 1 "register_operand" "0,0,x")
+(define_mode_attr blendsuf
+ [(V8HI "w") (V8HF "ph")])
+
+(define_insn "sse4_1_pblend<blendsuf>"
+ [(set (match_operand:V8_128 0 "register_operand" "=Yr,*x,x")
+ (vec_merge:V8_128
+ (match_operand:V8_128 2 "vector_operand" "YrBm,*xBm,xm")
+ (match_operand:V8_128 1 "register_operand" "0,0,x")
(match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
"TARGET_SSE4_1"
"@
@@ -19210,6 +19225,47 @@ (define_expand "avx2_pblendw"
operands[3] = GEN_INT (val << 8 | val);
})
+(define_expand "avx2_pblendph"
+ [(set (match_operand:V16HF 0 "register_operand")
+ (vec_merge:V16HF
+ (match_operand:V16HF 2 "register_operand")
+ (match_operand:V16HF 1 "register_operand")
+ (match_operand:SI 3 "const_int_operand")))]
+ "TARGET_AVX2
+ && !((INTVAL (operands[3]) & 0xff) && (INTVAL (operands[3]) & 0xff00))"
+{
+ int mask = INTVAL (operands[3]);
+ if (mask == 0)
+ emit_move_insn (operands[0], operands[2]);
+ else
+ {
+ rtx tmp = gen_reg_rtx (V16HImode);
+ rtx blendw_idx, blendd_idx;
+
+ if (mask & 0xff)
+ {
+ blendw_idx = GEN_INT (mask & 0xff);
+ blendd_idx = GEN_INT (15);
+ }
+ else
+ {
+ blendw_idx = GEN_INT (mask >> 8 & 0xff);
+ blendd_idx = GEN_INT (240);
+ }
+ operands[1] = lowpart_subreg (V16HImode, operands[1], V16HFmode);
+ operands[2] = lowpart_subreg (V16HImode, operands[2], V16HFmode);
+ emit_insn (gen_avx2_pblendw (tmp, operands[1], operands[2], blendw_idx));
+
+ operands[0] = lowpart_subreg (V8SImode, operands[0], V16HFmode);
+ tmp = lowpart_subreg (V8SImode, tmp, V16HImode);
+ operands[1] = lowpart_subreg (V8SImode, operands[1], V16HImode);
+ emit_insn (gen_avx2_pblenddv8si (operands[0], operands[1],
+ tmp, blendd_idx));
+ }
+
+ DONE;
+})
+
(define_insn "*avx2_pblendw"
[(set (match_operand:V16HI 0 "register_operand" "=x")
(vec_merge:V16HI
@@ -1,8 +1,8 @@
/* { dg-do compile } */
/* { dg-options "-mavx512fp16 -O2" } */
-/* { dg-final { scan-assembler-times "(?:vmovsh|vmovw)" 2 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "vpinsrw" 1 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "vpinsrw" 2 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vmovsh" 1 } } */
+/* { dg-final { scan-assembler-times "vpblendw" 1 } } */
+/* { dg-final { scan-assembler "vpbroadcastw" } } */
typedef _Float16 __v8hf __attribute__ ((__vector_size__ (16)));
typedef _Float16 __m128h __attribute__ ((__vector_size__ (16), __may_alias__));
new file mode 100644
@@ -0,0 +1,65 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512fp16 -mavx512vl -O2" } */
+
+typedef _Float16 v8hf __attribute__((vector_size (16)));
+typedef _Float16 v16hf __attribute__((vector_size (32)));
+typedef _Float16 v32hf __attribute__((vector_size (64)));
+
+#define VEC_EXTRACT(V,S,IDX) \
+ S \
+ __attribute__((noipa)) \
+ vec_extract_##V##_##IDX (V v) \
+ { \
+ return v[IDX]; \
+ }
+
+#define VEC_SET(V,S,IDX) \
+ V \
+ __attribute__((noipa)) \
+ vec_set_##V##_##IDX (V v, S s) \
+ { \
+ v[IDX] = s; \
+ return v; \
+ }
+
+v8hf
+vec_init_v8hf (_Float16 a1, _Float16 a2, _Float16 a3, _Float16 a4, _Float16 a5,
+_Float16 a6, _Float16 a7, _Float16 a8)
+{
+ return __extension__ (v8hf) {a1, a2, a3, a4, a5, a6, a7, a8};
+}
+
+/* { dg-final { scan-assembler-times "vpunpcklwd" 4 } } */
+/* { dg-final { scan-assembler-times "vpunpckldq" 2 } } */
+/* { dg-final { scan-assembler-times "vpunpcklqdq" 1 } } */
+
+VEC_EXTRACT (v8hf, _Float16, 4);
+VEC_EXTRACT (v16hf, _Float16, 3);
+VEC_EXTRACT (v16hf, _Float16, 8);
+VEC_EXTRACT (v16hf, _Float16, 15);
+VEC_EXTRACT (v32hf, _Float16, 5);
+VEC_EXTRACT (v32hf, _Float16, 8);
+VEC_EXTRACT (v32hf, _Float16, 14);
+VEC_EXTRACT (v32hf, _Float16, 16);
+VEC_EXTRACT (v32hf, _Float16, 24);
+VEC_EXTRACT (v32hf, _Float16, 28);
+/* { dg-final { scan-assembler-times "vpsrldq\[\t ]*\\\$8" 2 } } */
+/* { dg-final { scan-assembler-times "vpsrldq\[\t ]*\\\$6" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrldq\[\t ]*\\\$14" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrldq\[\t ]*\\\$10" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrldq\[\t ]*\\\$12" 1 } } */
+/* { dg-final { scan-assembler-times "vextract" 9 } } */
+
+VEC_SET (v8hf, _Float16, 4);
+VEC_SET (v16hf, _Float16, 3);
+VEC_SET (v16hf, _Float16, 8);
+VEC_SET (v16hf, _Float16, 15);
+VEC_SET (v32hf, _Float16, 5);
+VEC_SET (v32hf, _Float16, 8);
+VEC_SET (v32hf, _Float16, 14);
+VEC_SET (v32hf, _Float16, 16);
+VEC_SET (v32hf, _Float16, 24);
+VEC_SET (v32hf, _Float16, 28);
+/* { dg-final { scan-assembler-times "vpbroadcastw" 10 } } */
+/* { dg-final { scan-assembler-times "vpblendw" 4 } } */
+/* { dg-final { scan-assembler-times "vpblendd" 3 } } */
new file mode 100644
@@ -0,0 +1,95 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -mavx512fp16" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512fp16 } */
+
+#define AVX512VL
+#define AVX512FP16
+
+#ifndef CHECK
+#define CHECK "avx512f-helper.h"
+#endif
+
+#include CHECK
+#include "pr102327-1.c"
+
+#define RUNCHECK_VEC_EXTRACT(U,V,S,IDX) \
+ do \
+ { \
+ S tmp = vec_extract_##V##_##IDX ((V)U.x); \
+ if (tmp != U.a[IDX]) \
+ abort(); \
+ } \
+ while (0)
+
+#define RUNCHECK_VEC_SET(UTYPE,U,V,S,IDX,NUM) \
+ do \
+ { \
+ S tmp = 3.0f; \
+ UTYPE res; \
+ res.x = vec_set_##V##_##IDX ((V)U.x, tmp); \
+ for (int i = 0; i != NUM; i++) \
+ if (i == IDX) \
+ { \
+ if (res.a[i] != tmp) \
+ abort (); \
+ } \
+ else if (res.a[i] != U.a[i]) \
+ abort(); \
+ } \
+ while (0)
+
+void
+test_256 (void)
+{
+ union512h g1;
+ union256h t1;
+ union128h x1;
+ int sign = 1;
+
+ int i = 0;
+ for (i = 0; i < 32; i++)
+ {
+ g1.a[i] = 56.78 * (i - 30) * sign;
+ sign = -sign;
+ }
+
+ for (i = 0; i != 16; i++)
+ {
+ t1.a[i] = 90.12 * (i + 40) * sign;
+ sign = -sign;
+ }
+
+ for (i = 0; i != 8; i++)
+ {
+ x1.a[i] = 90.12 * (i + 40) * sign;
+ sign = -sign;
+ }
+
+ RUNCHECK_VEC_EXTRACT (x1, v8hf, _Float16, 4);
+ RUNCHECK_VEC_EXTRACT (t1, v16hf, _Float16, 3);
+ RUNCHECK_VEC_EXTRACT (t1, v16hf, _Float16, 8);
+ RUNCHECK_VEC_EXTRACT (t1, v16hf, _Float16, 15);
+ RUNCHECK_VEC_EXTRACT (g1, v32hf, _Float16, 5);
+ RUNCHECK_VEC_EXTRACT (g1, v32hf, _Float16, 8);
+ RUNCHECK_VEC_EXTRACT (g1, v32hf, _Float16, 14);
+ RUNCHECK_VEC_EXTRACT (g1, v32hf, _Float16, 16);
+ RUNCHECK_VEC_EXTRACT (g1, v32hf, _Float16, 24);
+ RUNCHECK_VEC_EXTRACT (g1, v32hf, _Float16, 28);
+
+ RUNCHECK_VEC_SET (union128h, x1, v8hf, _Float16, 4, 8);
+ RUNCHECK_VEC_SET (union256h, t1, v16hf, _Float16, 3, 16);
+ RUNCHECK_VEC_SET (union256h, t1, v16hf, _Float16, 8, 16);
+ RUNCHECK_VEC_SET (union256h, t1, v16hf, _Float16, 15, 16);
+ RUNCHECK_VEC_SET (union512h, g1, v32hf, _Float16, 5, 32);
+ RUNCHECK_VEC_SET (union512h, g1, v32hf, _Float16, 8, 32);
+ RUNCHECK_VEC_SET (union512h, g1, v32hf, _Float16, 14, 32);
+ RUNCHECK_VEC_SET (union512h, g1, v32hf, _Float16, 16, 32);
+ RUNCHECK_VEC_SET (union512h, g1, v32hf, _Float16, 24, 32);
+ RUNCHECK_VEC_SET (union512h, g1, v32hf, _Float16, 28, 32);
+}
+
+void
+test_128()
+{
+}