@@ -14855,6 +14855,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
goto widen;
case E_V8HImode:
+ case E_V8HFmode:
if (TARGET_AVX2)
return ix86_vector_duplicate_value (mode, target, val);
@@ -14871,15 +14872,22 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
dperm.one_operand_p = true;
- /* Extend to SImode using a paradoxical SUBREG. */
- tmp1 = gen_reg_rtx (SImode);
- emit_move_insn (tmp1, gen_lowpart (SImode, val));
-
- /* Insert the SImode value as low element of a V4SImode vector. */
- tmp2 = gen_reg_rtx (V4SImode);
- emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
- emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
+ if (mode == V8HFmode)
+ tmp1 = lowpart_subreg (V8HFmode, force_reg (HFmode, val), HFmode);
+ else
+ {
+ /* Extend to SImode using a paradoxical SUBREG. */
+ tmp1 = gen_reg_rtx (SImode);
+ emit_move_insn (tmp1, gen_lowpart (SImode, val));
+
+ /* Insert the SImode value as
+ low element of a V4SImode vector. */
+ tmp2 = gen_reg_rtx (V4SImode);
+ emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
+ tmp1 = gen_lowpart (mode, tmp2);
+ }
+ emit_move_insn (dperm.op0, tmp1);
ok = (expand_vec_perm_1 (&dperm)
|| expand_vec_perm_broadcast_1 (&dperm));
gcc_assert (ok);
@@ -14926,12 +14934,15 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
}
case E_V16HImode:
+ case E_V16HFmode:
case E_V32QImode:
if (TARGET_AVX2)
return ix86_vector_duplicate_value (mode, target, val);
else
{
- machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
+ machine_mode hvmode = (mode == V16HImode ? V8HImode
+ : mode == V16HFmode ? V8HFmode
+ : V16QImode);
rtx x = gen_reg_rtx (hvmode);
ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
@@ -14942,13 +14953,16 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
}
return true;
- case E_V64QImode:
case E_V32HImode:
+ case E_V32HFmode:
+ case E_V64QImode:
if (TARGET_AVX512BW)
return ix86_vector_duplicate_value (mode, target, val);
else
{
- machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
+ machine_mode hvmode = (mode == V32HImode ? V16HImode
+ : mode == V32HFmode ? V16HFmode
+ : V32QImode);
rtx x = gen_reg_rtx (hvmode);
ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
@@ -14959,11 +14973,6 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
}
return true;
- case E_V8HFmode:
- case E_V16HFmode:
- case E_V32HFmode:
- return ix86_vector_duplicate_value (mode, target, val);
-
default:
return false;
}
@@ -15912,7 +15921,8 @@ ix86_expand_vector_set_var (rtx target, rtx val, rtx idx)
/* 512-bits vector byte/word broadcast and comparison only available
under TARGET_AVX512BW, break 512-bits vector into two 256-bits vector
when without TARGET_AVX512BW. */
- if ((mode == V32HImode || mode == V64QImode) && !TARGET_AVX512BW)
+ if ((mode == V32HImode || mode == V32HFmode || mode == V64QImode)
+ && !TARGET_AVX512BW)
{
gcc_assert (TARGET_AVX512F);
rtx vhi, vlo, idx_hi;
@@ -15926,6 +15936,12 @@ ix86_expand_vector_set_var (rtx target, rtx val, rtx idx)
extract_hi = gen_vec_extract_hi_v32hi;
extract_lo = gen_vec_extract_lo_v32hi;
}
+ else if (mode == V32HFmode)
+ {
+ half_mode = V16HFmode;
+ extract_hi = gen_vec_extract_hi_v32hf;
+ extract_lo = gen_vec_extract_lo_v32hf;
+ }
else
{
half_mode = V32QImode;
@@ -15973,7 +15989,6 @@ ix86_expand_vector_set_var (rtx target, rtx val, rtx idx)
case E_V16SFmode:
cmp_mode = V16SImode;
break;
- /* TARGET_AVX512FP16 implies TARGET_AVX512BW. */
case E_V8HFmode:
cmp_mode = V8HImode;
break;
@@ -16538,6 +16553,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
break;
case E_V8HImode:
+ case E_V8HFmode:
case E_V2HImode:
use_vec_extr = TARGET_SSE2;
break;
@@ -16704,25 +16720,29 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
return;
case E_V32HFmode:
- tmp = gen_reg_rtx (V16HFmode);
- if (elt < 16)
- emit_insn (gen_vec_extract_lo_v32hf (tmp, vec));
- else
- emit_insn (gen_vec_extract_hi_v32hf (tmp, vec));
- ix86_expand_vector_extract (false, target, tmp, elt & 15);
- return;
+ if (TARGET_AVX512BW)
+ {
+ tmp = gen_reg_rtx (V16HFmode);
+ if (elt < 16)
+ emit_insn (gen_vec_extract_lo_v32hf (tmp, vec));
+ else
+ emit_insn (gen_vec_extract_hi_v32hf (tmp, vec));
+ ix86_expand_vector_extract (false, target, tmp, elt & 15);
+ return;
+ }
+ break;
case E_V16HFmode:
- tmp = gen_reg_rtx (V8HFmode);
- if (elt < 8)
- emit_insn (gen_vec_extract_lo_v16hf (tmp, vec));
- else
- emit_insn (gen_vec_extract_hi_v16hf (tmp, vec));
- ix86_expand_vector_extract (false, target, tmp, elt & 7);
- return;
-
- case E_V8HFmode:
- use_vec_extr = true;
+ if (TARGET_AVX)
+ {
+ tmp = gen_reg_rtx (V8HFmode);
+ if (elt < 8)
+ emit_insn (gen_vec_extract_lo_v16hf (tmp, vec));
+ else
+ emit_insn (gen_vec_extract_hi_v16hf (tmp, vec));
+ ix86_expand_vector_extract (false, target, tmp, elt & 7);
+ return;
+ }
break;
case E_V8QImode:
@@ -21443,6 +21463,34 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
return true;
+ case E_V8HFmode:
+ /* This can be implemented via interleave and pshufd. */
+ if (d->testing_p)
+ return true;
+
+ if (elt >= nelt2)
+ {
+ gen = gen_vec_interleave_highv8hf;
+ elt -= nelt2;
+ }
+ else
+ gen = gen_vec_interleave_lowv8hf;
+ nelt2 /= 2;
+
+ dest = gen_reg_rtx (vmode);
+ emit_insn (gen (dest, op0, op0));
+
+ vmode = V4SImode;
+ op0 = gen_lowpart (vmode, dest);
+
+ memset (perm2, elt, 4);
+ dest = gen_reg_rtx (vmode);
+ ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
+ gcc_assert (ok);
+
+ emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
+ return true;
+
case E_V32QImode:
case E_V16HImode:
case E_V8SImode:
@@ -266,9 +266,7 @@
(define_mode_iterator VI12HF_AVX512VL
[V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
- (V32HF "TARGET_AVX512FP16")
- (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
- (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")])
+ V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")])
;; Same iterator, but without supposed TARGET_AVX512BW
(define_mode_iterator VI12_AVX512VLBW
@@ -285,8 +283,7 @@
(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
- (V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16")
- (V8HF "TARGET_AVX512FP16")
+ (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
@@ -311,10 +308,10 @@
;; All 256bit and 512bit vector modes
(define_mode_iterator V_256_512
- [V32QI V16HI V8SI V4DI V8SF V4DF
- (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
- (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
- (V16HF "TARGET_AVX512FP16") (V32HF "TARGET_AVX512FP16")])
+ [V32QI V16HI V16HF V8SI V4DI V8SF V4DF
+ (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V32HF "TARGET_AVX512F")
+ (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+ (V8DF "TARGET_AVX512F")])
;; All vector float modes
(define_mode_iterator VF
@@ -24892,8 +24889,8 @@
"operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);")
(define_insn "avx_vbroadcastf128_<mode>"
- [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v")
- (vec_concat:V_256
+ [(set (match_operand:V_256H 0 "register_operand" "=x,x,x,v,v,v,v")
+ (vec_concat:V_256H
(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x,m,0,m,0")
(match_dup 1)))]
"TARGET_AVX"