Commit Message
For AVX512-FP16, HFmode only supports vcmpsh whose dest is mask
register, so for movhfcc, it's
vcmpsh op2, op1, %k1
vmovsh op1, op2{%k1}
mov op2, dest
gcc/ChangeLog:
PR target/102639
* config/i386/i386-expand.c (ix86_valid_mask_cmp_mode): Handle
HFmode.
(ix86_use_mask_cmp_p): Ditto.
(ix86_expand_sse_movcc): Ditto.
* config/i386/i386.md (setcc_hf_mask): New define_insn.
(movhf_mask): Ditto.
(UNSPEC_MOVCC_MASK): New unspec.
* config/i386/sse.md (UNSPEC_PCMP): Move to i386.md.
gcc/testsuite/ChangeLog:
* g++.target/i386/pr102639.C: New test.
---
gcc/config/i386/i386-expand.c | 19 ++++++++++---
gcc/config/i386/i386.md | 34 +++++++++++++++++++++++-
gcc/config/i386/sse.md | 1 -
gcc/testsuite/g++.target/i386/pr102639.C | 19 +++++++++++++
4 files changed, 67 insertions(+), 6 deletions(-)
create mode 100644 gcc/testsuite/g++.target/i386/pr102639.C
Comments
On Fri, Oct 8, 2021 at 5:31 PM liuhongt <hongtao.liu@intel.com> wrote:
>
> For AVX512-FP16, HFmode only supports vcmpsh whose dest is mask
> register, so for movhfcc, it's
>
> vcmpsh op2, op1, %k1
> vmovsh op1, op2{%k1}
> mov op2, dest
>
> gcc/ChangeLog:
>
> PR target/102639
> * config/i386/i386-expand.c (ix86_valid_mask_cmp_mode): Handle
> HFmode.
> (ix86_use_mask_cmp_p): Ditto.
> (ix86_expand_sse_movcc): Ditto.
> * config/i386/i386.md (setcc_hf_mask): New define_insn.
> (movhf_mask): Ditto.
> (UNSPEC_MOVCC_MASK): New unspec.
> * config/i386/sse.md (UNSPEC_PCMP): Move to i386.md.
>
> gcc/testsuite/ChangeLog:
> * g++.target/i386/pr102639.C: New test.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Also no new failures for AVX512FP16 runtime tests under for sde{-m32,}.
Committed to trunk.
> ---
> gcc/config/i386/i386-expand.c | 19 ++++++++++---
> gcc/config/i386/i386.md | 34 +++++++++++++++++++++++-
> gcc/config/i386/sse.md | 1 -
> gcc/testsuite/g++.target/i386/pr102639.C | 19 +++++++++++++
> 4 files changed, 67 insertions(+), 6 deletions(-)
> create mode 100644 gcc/testsuite/g++.target/i386/pr102639.C
>
> diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
> index 4780b993917..3c4a07d4d7d 100644
> --- a/gcc/config/i386/i386-expand.c
> +++ b/gcc/config/i386/i386-expand.c
> @@ -3613,6 +3613,10 @@ ix86_valid_mask_cmp_mode (machine_mode mode)
> if (TARGET_XOP && !TARGET_AVX512F)
> return false;
>
> + /* HFmode only supports vcmpsh whose dest is mask register. */
> + if (TARGET_AVX512FP16 && mode == HFmode)
> + return true;
> +
> /* AVX512F is needed for mask operation. */
> if (!(TARGET_AVX512F && VECTOR_MODE_P (mode)))
> return false;
> @@ -3634,7 +3638,9 @@ ix86_use_mask_cmp_p (machine_mode mode, machine_mode cmp_mode,
> {
> int vector_size = GET_MODE_SIZE (mode);
>
> - if (vector_size < 16)
> + if (cmp_mode == HFmode)
> + return true;
> + else if (vector_size < 16)
> return false;
> else if (vector_size == 64)
> return true;
> @@ -3750,7 +3756,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
> && GET_MODE_CLASS (cmpmode) == MODE_INT)
> {
> gcc_assert (ix86_valid_mask_cmp_mode (mode));
> - /* Using vector move with mask register. */
> + /* Using scalar/vector move with mask register. */
> cmp = force_reg (cmpmode, cmp);
> /* Optimize for mask zero. */
> op_true = (op_true != CONST0_RTX (mode)
> @@ -3769,8 +3775,13 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
> std::swap (op_true, op_false);
> }
>
> - rtx vec_merge = gen_rtx_VEC_MERGE (mode, op_true, op_false, cmp);
> - emit_insn (gen_rtx_SET (dest, vec_merge));
> + if (mode == HFmode)
> + emit_insn (gen_movhf_mask (dest, op_true, op_false, cmp));
> + else
> + {
> + rtx vec_merge = gen_rtx_VEC_MERGE (mode, op_true, op_false, cmp);
> + emit_insn (gen_rtx_SET (dest, vec_merge));
> + }
> return;
> }
> else if (vector_all_ones_operand (op_true, mode)
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 04cb3bf6a33..c7ae4ac5fbc 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -117,6 +117,7 @@ (define_c_enum "unspec" [
> ;; For SSE/MMX support:
> UNSPEC_FIX_NOTRUNC
> UNSPEC_MASKMOV
> + UNSPEC_MOVCC_MASK
> UNSPEC_MOVMSK
> UNSPEC_BLENDV
> UNSPEC_PSHUFB
> @@ -125,8 +126,9 @@ (define_c_enum "unspec" [
> UNSPEC_RSQRT
> UNSPEC_PSADBW
>
> - ;; For AVX512F support
> + ;; For AVX/AVX512F support
> UNSPEC_SCALEF
> + UNSPEC_PCMP
>
> ;; Generic math support
> UNSPEC_IEEE_MIN ; not commutative
> @@ -13608,6 +13610,20 @@ (define_insn "setcc_<mode>_sse"
> (set_attr "length_immediate" "1")
> (set_attr "prefix" "orig,vex")
> (set_attr "mode" "<MODE>")])
> +
> +(define_insn "setcc_hf_mask"
> + [(set (match_operand:QI 0 "register_operand" "=k")
> + (unspec:QI
> + [(match_operand:HF 1 "register_operand" "v")
> + (match_operand:HF 2 "nonimmediate_operand" "vm")
> + (match_operand:SI 3 "const_0_to_31_operand" "n")]
> + UNSPEC_PCMP))]
> + "TARGET_AVX512FP16"
> + "vcmpsh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
> + [(set_attr "type" "ssecmp")
> + (set_attr "prefix" "evex")
> + (set_attr "mode" "HF")])
> +
>
> ;; Basic conditional jump instructions.
>
> @@ -19841,6 +19857,22 @@ (define_peephole2
> operands[9] = replace_rtx (operands[6], operands[0], operands[1], true);
> })
>
> +(define_insn "movhf_mask"
> + [(set (match_operand:HF 0 "nonimmediate_operand" "=v,m,v")
> + (unspec:HF
> + [(match_operand:HF 1 "nonimmediate_operand" "m,v,v")
> + (match_operand:HF 2 "nonimm_or_0_operand" "0C,0C,0C")
> + (match_operand:QI 3 "register_operand" "Yk,Yk,Yk")]
> + UNSPEC_MOVCC_MASK))]
> + "TARGET_AVX512FP16"
> + "@
> + vmovsh\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}
> + vmovsh\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}
> + vmovsh\t{%d1, %0%{%3%}%N2|%0%{%3%}%N2, %d1}"
> + [(set_attr "type" "ssemov")
> + (set_attr "prefix" "evex")
> + (set_attr "mode" "HF")])
> +
> (define_expand "movhfcc"
> [(set (match_operand:HF 0 "register_operand")
> (if_then_else:HF
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 4559b0ce9c9..a3c4a3f1e62 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -67,7 +67,6 @@ (define_c_enum "unspec" [
> UNSPEC_PCLMUL
>
> ;; For AVX support
> - UNSPEC_PCMP
> UNSPEC_VPERMIL
> UNSPEC_VPERMIL2
> UNSPEC_VPERMIL2F128
> diff --git a/gcc/testsuite/g++.target/i386/pr102639.C b/gcc/testsuite/g++.target/i386/pr102639.C
> new file mode 100644
> index 00000000000..f094e4d1b43
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102639.C
> @@ -0,0 +1,19 @@
> +/* PR target/102639 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -std=c++14 -mavx512fp16" } */
> +/* { dg-final { scan-assembler-times "vminsh" 1 } } */
> +
> +typedef _Float16 v16hf __attribute__((vector_size(2)));
> +v16hf vcond_v16hfv16hfge_b, vcond_v16hfv16hfge_c, vcond_v16hfv16hfge_d,
> + __attribute__vcond_v16hfv16hfge_a;
> +v16hf __attribute__vcond_v16hfv16hfge() {
> + return __attribute__vcond_v16hfv16hfge_a >= vcond_v16hfv16hfge_b
> + ? vcond_v16hfv16hfge_c
> + : vcond_v16hfv16hfge_d;
> +}
> +
> +v16hf __attribute__vcond_v16hfv16hfmax() {
> + return __attribute__vcond_v16hfv16hfge_a < vcond_v16hfv16hfge_b
> + ? __attribute__vcond_v16hfv16hfge_a
> + : vcond_v16hfv16hfge_b;
> +}
> --
> 2.18.1
>
@@ -3613,6 +3613,10 @@ ix86_valid_mask_cmp_mode (machine_mode mode)
if (TARGET_XOP && !TARGET_AVX512F)
return false;
+ /* HFmode only supports vcmpsh whose dest is mask register. */
+ if (TARGET_AVX512FP16 && mode == HFmode)
+ return true;
+
/* AVX512F is needed for mask operation. */
if (!(TARGET_AVX512F && VECTOR_MODE_P (mode)))
return false;
@@ -3634,7 +3638,9 @@ ix86_use_mask_cmp_p (machine_mode mode, machine_mode cmp_mode,
{
int vector_size = GET_MODE_SIZE (mode);
- if (vector_size < 16)
+ if (cmp_mode == HFmode)
+ return true;
+ else if (vector_size < 16)
return false;
else if (vector_size == 64)
return true;
@@ -3750,7 +3756,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
&& GET_MODE_CLASS (cmpmode) == MODE_INT)
{
gcc_assert (ix86_valid_mask_cmp_mode (mode));
- /* Using vector move with mask register. */
+ /* Using scalar/vector move with mask register. */
cmp = force_reg (cmpmode, cmp);
/* Optimize for mask zero. */
op_true = (op_true != CONST0_RTX (mode)
@@ -3769,8 +3775,13 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
std::swap (op_true, op_false);
}
- rtx vec_merge = gen_rtx_VEC_MERGE (mode, op_true, op_false, cmp);
- emit_insn (gen_rtx_SET (dest, vec_merge));
+ if (mode == HFmode)
+ emit_insn (gen_movhf_mask (dest, op_true, op_false, cmp));
+ else
+ {
+ rtx vec_merge = gen_rtx_VEC_MERGE (mode, op_true, op_false, cmp);
+ emit_insn (gen_rtx_SET (dest, vec_merge));
+ }
return;
}
else if (vector_all_ones_operand (op_true, mode)
@@ -117,6 +117,7 @@ (define_c_enum "unspec" [
;; For SSE/MMX support:
UNSPEC_FIX_NOTRUNC
UNSPEC_MASKMOV
+ UNSPEC_MOVCC_MASK
UNSPEC_MOVMSK
UNSPEC_BLENDV
UNSPEC_PSHUFB
@@ -125,8 +126,9 @@ (define_c_enum "unspec" [
UNSPEC_RSQRT
UNSPEC_PSADBW
- ;; For AVX512F support
+ ;; For AVX/AVX512F support
UNSPEC_SCALEF
+ UNSPEC_PCMP
;; Generic math support
UNSPEC_IEEE_MIN ; not commutative
@@ -13608,6 +13610,20 @@ (define_insn "setcc_<mode>_sse"
(set_attr "length_immediate" "1")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<MODE>")])
+
+(define_insn "setcc_hf_mask"
+ [(set (match_operand:QI 0 "register_operand" "=k")
+ (unspec:QI
+ [(match_operand:HF 1 "register_operand" "v")
+ (match_operand:HF 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "const_0_to_31_operand" "n")]
+ UNSPEC_PCMP))]
+ "TARGET_AVX512FP16"
+ "vcmpsh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "HF")])
+
;; Basic conditional jump instructions.
@@ -19841,6 +19857,22 @@ (define_peephole2
operands[9] = replace_rtx (operands[6], operands[0], operands[1], true);
})
+(define_insn "movhf_mask"
+ [(set (match_operand:HF 0 "nonimmediate_operand" "=v,m,v")
+ (unspec:HF
+ [(match_operand:HF 1 "nonimmediate_operand" "m,v,v")
+ (match_operand:HF 2 "nonimm_or_0_operand" "0C,0C,0C")
+ (match_operand:QI 3 "register_operand" "Yk,Yk,Yk")]
+ UNSPEC_MOVCC_MASK))]
+ "TARGET_AVX512FP16"
+ "@
+ vmovsh\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}
+ vmovsh\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}
+ vmovsh\t{%d1, %0%{%3%}%N2|%0%{%3%}%N2, %d1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "HF")])
+
(define_expand "movhfcc"
[(set (match_operand:HF 0 "register_operand")
(if_then_else:HF
@@ -67,7 +67,6 @@ (define_c_enum "unspec" [
UNSPEC_PCLMUL
;; For AVX support
- UNSPEC_PCMP
UNSPEC_VPERMIL
UNSPEC_VPERMIL2
UNSPEC_VPERMIL2F128
new file mode 100644
@@ -0,0 +1,19 @@
+/* PR target/102639 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -std=c++14 -mavx512fp16" } */
+/* { dg-final { scan-assembler-times "vminsh" 1 } } */
+
+typedef _Float16 v16hf __attribute__((vector_size(2)));
+v16hf vcond_v16hfv16hfge_b, vcond_v16hfv16hfge_c, vcond_v16hfv16hfge_d,
+ __attribute__vcond_v16hfv16hfge_a;
+v16hf __attribute__vcond_v16hfv16hfge() {
+ return __attribute__vcond_v16hfv16hfge_a >= vcond_v16hfv16hfge_b
+ ? vcond_v16hfv16hfge_c
+ : vcond_v16hfv16hfge_d;
+}
+
+v16hf __attribute__vcond_v16hfv16hfmax() {
+ return __attribute__vcond_v16hfv16hfge_a < vcond_v16hfv16hfge_b
+ ? __attribute__vcond_v16hfv16hfge_a
+ : vcond_v16hfv16hfge_b;
+}