diff mbox series

i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]

Message ID DM4PR11MB54879E9AB6112FAA5BC1587EEC619@DM4PR11MB5487.namprd11.prod.outlook.com
State New
Headers show
Series i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811] | expand

Commit Message

Bill Schmidt via Gcc-patches Nov. 24, 2021, 8:43 a.m. UTC
Hi,

vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c.
Cleared before conversion, updated  movhi_internal and ix86_can_change_mode_class. And fixed some commit message.

OK for master?

gcc/ChangeLog:

	PR target/102811
	* config/i386/i386.c (ix86_can_change_mode_class): Allow 16 bit data in XMM register
	for TARGET_SSE2.
	* config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for TARGET_F16C.
	(extendhfdf2): Restrict extendhfdf for TARGET_AVX512FP16 only.
	(*extendhf<mode>2): Rename from extendhf<mode>2.
	(truncsfhf2): Likewise.
	(truncdfhf2): Likewise.
	(*trunc<mode>2): Likewise.

gcc/testsuite/ChangeLog:

	PR target/102811
	* gcc.target/i386/pr90773-21.c: Optimize movhi_internal,
	also allow pextrw replace vmovd + movw.
	* gcc.target/i386/pr90773-23.c: Ditto.
	* gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test.
---
 gcc/config/i386/i386.c                        |  5 +-
 gcc/config/i386/i386.md                       | 74 +++++++++++++++++--
 .../i386/avx512vl-vcvtps2ph-pr102811.c        | 11 +++
 gcc/testsuite/gcc.target/i386/pr90773-21.c    |  2 +-
 gcc/testsuite/gcc.target/i386/pr90773-23.c    |  2 +-
 5 files changed, 83 insertions(+), 11 deletions(-)  create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c

+32\\(%\[\^,\]+\\)" 1 } } */
--
2.18.1

Comments

Uros Bizjak Nov. 24, 2021, 8:48 a.m. UTC | #1
On Wed, Nov 24, 2021 at 9:44 AM Kong, Lingling <lingling.kong@intel.com> wrote:
>
> Hi,
>
> vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c.
> Cleared before conversion, updated  movhi_internal and ix86_can_change_mode_class. And fixed some commit message.
>
> OK for master?

OK, with a small adjustment to ChangeLog.

Thanks,
Uros.

> gcc/ChangeLog:
>
>         PR target/102811
>         * config/i386/i386.c (ix86_can_change_mode_class): Allow 16 bit data in XMM register
>         for TARGET_SSE2.
>         * config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for TARGET_F16C.
>         (extendhfdf2): Restrict extendhfdf for TARGET_AVX512FP16 only.
>         (*extendhf<mode>2): Rename from extendhf<mode>2.
>         (truncsfhf2): Likewise.
>         (truncdfhf2): Likewise.
>         (*trunc<mode>2): Likewise.
>
> gcc/testsuite/ChangeLog:
>
>         PR target/102811
>         * gcc.target/i386/pr90773-21.c: Optimize movhi_internal,
>         also allow pextrw replace vmovd + movw.

Just write:

* gcc.target/i386/pr90773-21.c: Allow pextrw instead of movw.

>         * gcc.target/i386/pr90773-23.c: Ditto.
>         * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test.
> ---
>  gcc/config/i386/i386.c                        |  5 +-
>  gcc/config/i386/i386.md                       | 74 +++++++++++++++++--
>  .../i386/avx512vl-vcvtps2ph-pr102811.c        | 11 +++
>  gcc/testsuite/gcc.target/i386/pr90773-21.c    |  2 +-
>  gcc/testsuite/gcc.target/i386/pr90773-23.c    |  2 +-
>  5 files changed, 83 insertions(+), 11 deletions(-)  create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e94efdf39fb..4b813533961 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -19485,9 +19485,8 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to,
>          disallow a change to these modes, reload will assume it's ok to
>          drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
>          the vec_dupv4hi pattern.
> -        NB: AVX512FP16 supports vmovw which can load 16bit data to sse
> -        register.  */
> -      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : 4;
> +        NB: SSE2 can load 16bit data to sse register via pinsrw.  */
> +      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 :
> +4;
>        if (GET_MODE_SIZE (from) < mov_size)
>         return false;
>      }
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 6eb9de81921..6ee264f1151 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -2525,6 +2525,16 @@
>      case TYPE_SSEMOV:
>        return ix86_output_ssemov (insn, operands);
>
> +    case TYPE_SSELOG:
> +      if (SSE_REG_P (operands[0]))
> +       return MEM_P (operands[1])
> +         ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
> +         : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
> +      else
> +       return MEM_P (operands[1])
> +         ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
> +         : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
> +
>      case TYPE_MSKLOG:
>        if (operands[1] == const0_rtx)
>         return "kxorw\t%0, %0, %0";
> @@ -2540,13 +2550,17 @@
>      }
>  }
>    [(set (attr "isa")
> -       (cond [(eq_attr "alternative" "9,10,11,12,13")
> -                 (const_string "avx512fp16")
> +       (cond [(eq_attr "alternative" "9,10,11,12")
> +                 (const_string "sse2")
> +              (eq_attr "alternative" "13")
> +                 (const_string "sse4")
>                ]
>                (const_string "*")))
>     (set (attr "type")
>       (cond [(eq_attr "alternative" "9,10,11,12,13")
> -             (const_string "ssemov")
> +             (if_then_else (match_test "TARGET_AVX512FP16")
> +               (const_string "ssemov")
> +               (const_string "sselog"))
>             (eq_attr "alternative" "4,5,6,7")
>               (const_string "mskmov")
>             (eq_attr "alternative" "8")
> @@ -4574,8 +4588,32 @@
>    emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
>  })
>
> -(define_insn "extendhf<mode>2"
> -  [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v")
> +(define_expand "extendhfsf2"
> +  [(set (match_operand:SF 0 "register_operand")
> +       (float_extend:SF
> +         (match_operand:HF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
> +{
> +  if (!TARGET_AVX512FP16)
> +    {
> +      rtx res = gen_reg_rtx (V4SFmode);
> +      rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
> +
> +      ix86_expand_vector_set (false, tmp, operands[1], 0);
> +      emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp)));
> +      emit_move_insn (operands[0], gen_lowpart (SFmode, res));
> +      DONE;
> +    }
> +})
> +
> +(define_expand "extendhfdf2"
> +  [(set (match_operand:DF 0 "register_operand")
> +       (float_extend:DF
> +         (match_operand:HF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16")
> +
> +(define_insn "*extendhf<mode>2"
> +  [(set (match_operand:MODEF 0 "register_operand" "=v")
>          (float_extend:MODEF
>           (match_operand:HF 1 "nonimmediate_operand" "vm")))]
>    "TARGET_AVX512FP16"
> @@ -4766,7 +4804,31 @@
>
>  ;; Conversion from {SF,DF}mode to HFmode.
>
> -(define_insn "trunc<mode>hf2"
> +(define_expand "truncsfhf2"
> +  [(set (match_operand:HF 0 "register_operand")
> +       (float_truncate:HF
> +         (match_operand:SF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
> +  {
> +    if (!TARGET_AVX512FP16)
> +    {
> +      rtx res = gen_reg_rtx (V8HFmode);
> +      rtx tmp = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
> +
> +      ix86_expand_vector_set (false, tmp, operands[1], 0);
> +      emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT (4)));
> +      emit_move_insn (operands[0], gen_lowpart (HFmode, res));
> +      DONE;
> +    }
> +  })
> +
> +(define_expand "truncdfhf2"
> +  [(set (match_operand:HF 0 "register_operand")
> +       (float_truncate:HF
> +         (match_operand:DF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16")
> +
> +(define_insn "*trunc<mode>hf2"
>    [(set (match_operand:HF 0 "register_operand" "=v")
>         (float_truncate:HF
>           (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
> new file mode 100644
> index 00000000000..dfbfb167953
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */
> +/* { dg-final { scan-assembler-times "vpxor\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */
> +/* { dg-final { scan-assembler-not "__truncsfhf2\[ \\t\]"} } */
> +/* { dg-final { scan-assembler-not "__extendhfsf2\[ \\t\]"} } */
> +_Float16 test (_Float16 a, _Float16 b)
> +{
> +  return a + b;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr90773-21.c b/gcc/testsuite/gcc.target/i386/pr90773-21.c
> index 5bbb387a3ea..0d620fff83c 100644
> --- a/gcc/testsuite/gcc.target/i386/pr90773-21.c
> +++ b/gcc/testsuite/gcc.target/i386/pr90773-21.c
> @@ -10,4 +10,4 @@ foo (int c)
>  }
>
>  /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
> -/* { dg-final { scan-assembler-times "movw\[\\t \]%.*, 32\\(%\[\^,\]+\\)" 1 } } */
> +/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \].*,
> +32\\(%\[\^,\]+\\)" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr90773-23.c b/gcc/testsuite/gcc.target/i386/pr90773-23.c
> index ca4a86f30b8..b7369e802e1 100644
> --- a/gcc/testsuite/gcc.target/i386/pr90773-23.c
> +++ b/gcc/testsuite/gcc.target/i386/pr90773-23.c
> @@ -10,4 +10,4 @@ foo (void)
>  }
>
>  /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
> -/* { dg-final { scan-assembler-times "movw\[\\t \]+.+, 32\\(%\[\^,\]+\\)" 1 } } */
> +/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \]+.+,
> +32\\(%\[\^,\]+\\)" 1 } } */
> --
> 2.18.1
>
Bill Schmidt via Gcc-patches Nov. 24, 2021, 9:04 a.m. UTC | #2
OK, This is the patch I prepare to check in.

-----Original Message-----
From: Uros Bizjak <ubizjak@gmail.com> 
Sent: Wednesday, November 24, 2021 4:49 PM
To: Kong, Lingling <lingling.kong@intel.com>
Cc: Liu, Hongtao <hongtao.liu@intel.com>; gcc-patches@gcc.gnu.org
Subject: Re: [PATCH] i386: vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c [PR 102811]

On Wed, Nov 24, 2021 at 9:44 AM Kong, Lingling <lingling.kong@intel.com> wrote:
>
> Hi,
>
> vcvtph2ps and vcvtps2ph should be used to convert _Float16 to SFmode with -mf16c. So added define_insn extendhfsf2 and truncsfhf2 for target_f16c.
> Cleared before conversion, updated  movhi_internal and ix86_can_change_mode_class. And fixed some commit message.
>
> OK for master?

OK, with a small adjustment to ChangeLog.

Thanks,
Uros.

> gcc/ChangeLog:
>
>         PR target/102811
>         * config/i386/i386.c (ix86_can_change_mode_class): Allow 16 bit data in XMM register
>         for TARGET_SSE2.
>         * config/i386/i386.md (extendhfsf2): Add extenndhfsf2 for TARGET_F16C.
>         (extendhfdf2): Restrict extendhfdf for TARGET_AVX512FP16 only.
>         (*extendhf<mode>2): Rename from extendhf<mode>2.
>         (truncsfhf2): Likewise.
>         (truncdfhf2): Likewise.
>         (*trunc<mode>2): Likewise.
>
> gcc/testsuite/ChangeLog:
>
>         PR target/102811
>         * gcc.target/i386/pr90773-21.c: Optimize movhi_internal,
>         also allow pextrw replace vmovd + movw.

Just write:

* gcc.target/i386/pr90773-21.c: Allow pextrw instead of movw.

>         * gcc.target/i386/pr90773-23.c: Ditto.
>         * gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c: New test.
> ---
>  gcc/config/i386/i386.c                        |  5 +-
>  gcc/config/i386/i386.md                       | 74 +++++++++++++++++--
>  .../i386/avx512vl-vcvtps2ph-pr102811.c        | 11 +++
>  gcc/testsuite/gcc.target/i386/pr90773-21.c    |  2 +-
>  gcc/testsuite/gcc.target/i386/pr90773-23.c    |  2 +-
>  5 files changed, 83 insertions(+), 11 deletions(-)  create mode 
> 100644 gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 
> e94efdf39fb..4b813533961 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -19485,9 +19485,8 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to,
>          disallow a change to these modes, reload will assume it's ok to
>          drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
>          the vec_dupv4hi pattern.
> -        NB: AVX512FP16 supports vmovw which can load 16bit data to sse
> -        register.  */
> -      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : 4;
> +        NB: SSE2 can load 16bit data to sse register via pinsrw.  */
> +      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 :
> +4;
>        if (GET_MODE_SIZE (from) < mov_size)
>         return false;
>      }
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 
> 6eb9de81921..6ee264f1151 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -2525,6 +2525,16 @@
>      case TYPE_SSEMOV:
>        return ix86_output_ssemov (insn, operands);
>
> +    case TYPE_SSELOG:
> +      if (SSE_REG_P (operands[0]))
> +       return MEM_P (operands[1])
> +         ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
> +         : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
> +      else
> +       return MEM_P (operands[1])
> +         ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
> +         : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
> +
>      case TYPE_MSKLOG:
>        if (operands[1] == const0_rtx)
>         return "kxorw\t%0, %0, %0";
> @@ -2540,13 +2550,17 @@
>      }
>  }
>    [(set (attr "isa")
> -       (cond [(eq_attr "alternative" "9,10,11,12,13")
> -                 (const_string "avx512fp16")
> +       (cond [(eq_attr "alternative" "9,10,11,12")
> +                 (const_string "sse2")
> +              (eq_attr "alternative" "13")
> +                 (const_string "sse4")
>                ]
>                (const_string "*")))
>     (set (attr "type")
>       (cond [(eq_attr "alternative" "9,10,11,12,13")
> -             (const_string "ssemov")
> +             (if_then_else (match_test "TARGET_AVX512FP16")
> +               (const_string "ssemov")
> +               (const_string "sselog"))
>             (eq_attr "alternative" "4,5,6,7")
>               (const_string "mskmov")
>             (eq_attr "alternative" "8") @@ -4574,8 +4588,32 @@
>    emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
>  })
>
> -(define_insn "extendhf<mode>2"
> -  [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v")
> +(define_expand "extendhfsf2"
> +  [(set (match_operand:SF 0 "register_operand")
> +       (float_extend:SF
> +         (match_operand:HF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
> +{
> +  if (!TARGET_AVX512FP16)
> +    {
> +      rtx res = gen_reg_rtx (V4SFmode);
> +      rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
> +
> +      ix86_expand_vector_set (false, tmp, operands[1], 0);
> +      emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp)));
> +      emit_move_insn (operands[0], gen_lowpart (SFmode, res));
> +      DONE;
> +    }
> +})
> +
> +(define_expand "extendhfdf2"
> +  [(set (match_operand:DF 0 "register_operand")
> +       (float_extend:DF
> +         (match_operand:HF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16")
> +
> +(define_insn "*extendhf<mode>2"
> +  [(set (match_operand:MODEF 0 "register_operand" "=v")
>          (float_extend:MODEF
>           (match_operand:HF 1 "nonimmediate_operand" "vm")))]
>    "TARGET_AVX512FP16"
> @@ -4766,7 +4804,31 @@
>
>  ;; Conversion from {SF,DF}mode to HFmode.
>
> -(define_insn "trunc<mode>hf2"
> +(define_expand "truncsfhf2"
> +  [(set (match_operand:HF 0 "register_operand")
> +       (float_truncate:HF
> +         (match_operand:SF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
> +  {
> +    if (!TARGET_AVX512FP16)
> +    {
> +      rtx res = gen_reg_rtx (V8HFmode);
> +      rtx tmp = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
> +
> +      ix86_expand_vector_set (false, tmp, operands[1], 0);
> +      emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT (4)));
> +      emit_move_insn (operands[0], gen_lowpart (HFmode, res));
> +      DONE;
> +    }
> +  })
> +
> +(define_expand "truncdfhf2"
> +  [(set (match_operand:HF 0 "register_operand")
> +       (float_truncate:HF
> +         (match_operand:DF 1 "nonimmediate_operand")))]
> +  "TARGET_AVX512FP16")
> +
> +(define_insn "*trunc<mode>hf2"
>    [(set (match_operand:HF 0 "register_operand" "=v")
>         (float_truncate:HF
>           (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] diff 
> --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c 
> b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
> new file mode 100644
> index 00000000000..dfbfb167953
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */
> +/* { dg-final { scan-assembler-times "vpxor\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */
> +/* { dg-final { scan-assembler-not "__truncsfhf2\[ \\t\]"} } */
> +/* { dg-final { scan-assembler-not "__extendhfsf2\[ \\t\]"} } */
> +_Float16 test (_Float16 a, _Float16 b) {
> +  return a + b;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr90773-21.c 
> b/gcc/testsuite/gcc.target/i386/pr90773-21.c
> index 5bbb387a3ea..0d620fff83c 100644
> --- a/gcc/testsuite/gcc.target/i386/pr90773-21.c
> +++ b/gcc/testsuite/gcc.target/i386/pr90773-21.c
> @@ -10,4 +10,4 @@ foo (int c)
>  }
>
>  /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, 
> \\(%\[\^,\]+\\)" 1 } } */
> -/* { dg-final { scan-assembler-times "movw\[\\t \]%.*, 
> 32\\(%\[\^,\]+\\)" 1 } } */
> +/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \].*, 
> +32\\(%\[\^,\]+\\)" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr90773-23.c 
> b/gcc/testsuite/gcc.target/i386/pr90773-23.c
> index ca4a86f30b8..b7369e802e1 100644
> --- a/gcc/testsuite/gcc.target/i386/pr90773-23.c
> +++ b/gcc/testsuite/gcc.target/i386/pr90773-23.c
> @@ -10,4 +10,4 @@ foo (void)
>  }
>
>  /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, 
> \\(%\[\^,\]+\\)" 1 } } */
> -/* { dg-final { scan-assembler-times "movw\[\\t \]+.+, 
> 32\\(%\[\^,\]+\\)" 1 } } */
> +/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \]+.+, 
> +32\\(%\[\^,\]+\\)" 1 } } */
> --
> 2.18.1
>
diff mbox series

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e94efdf39fb..4b813533961 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19485,9 +19485,8 @@  ix86_can_change_mode_class (machine_mode from, machine_mode to,
 	 disallow a change to these modes, reload will assume it's ok to
 	 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
 	 the vec_dupv4hi pattern.
-	 NB: AVX512FP16 supports vmovw which can load 16bit data to sse
-	 register.  */
-      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_AVX512FP16 ? 2 : 4;
+	 NB: SSE2 can load 16bit data to sse register via pinsrw.  */
+      int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 
+4;
       if (GET_MODE_SIZE (from) < mov_size)
 	return false;
     }
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 6eb9de81921..6ee264f1151 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2525,6 +2525,16 @@ 
     case TYPE_SSEMOV:
       return ix86_output_ssemov (insn, operands);
 
+    case TYPE_SSELOG:
+      if (SSE_REG_P (operands[0]))
+	return MEM_P (operands[1])
+	  ? "pinsrw\t{$0, %1, %0|%0, %1, 0}"
+	  : "pinsrw\t{$0, %k1, %0|%0, %k1, 0}";
+      else
+	return MEM_P (operands[1])
+	  ? "pextrw\t{$0, %1, %0|%0, %1, 0}"
+	  : "pextrw\t{$0, %1, %k0|%k0, %k1, 0}";
+
     case TYPE_MSKLOG:
       if (operands[1] == const0_rtx)
 	return "kxorw\t%0, %0, %0";
@@ -2540,13 +2550,17 @@ 
     }
 }
   [(set (attr "isa")
-	(cond [(eq_attr "alternative" "9,10,11,12,13")
-		  (const_string "avx512fp16")
+	(cond [(eq_attr "alternative" "9,10,11,12")
+		  (const_string "sse2")
+	       (eq_attr "alternative" "13")
+		  (const_string "sse4")
 	       ]
 	       (const_string "*")))
    (set (attr "type")
      (cond [(eq_attr "alternative" "9,10,11,12,13")
-	      (const_string "ssemov")
+	      (if_then_else (match_test "TARGET_AVX512FP16")
+		(const_string "ssemov")
+		(const_string "sselog"))
 	    (eq_attr "alternative" "4,5,6,7")
 	      (const_string "mskmov")
 	    (eq_attr "alternative" "8")
@@ -4574,8 +4588,32 @@ 
   emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
 })
 
-(define_insn "extendhf<mode>2"
-  [(set (match_operand:MODEF 0 "nonimm_ssenomem_operand" "=v")
+(define_expand "extendhfsf2"
+  [(set (match_operand:SF 0 "register_operand")
+	(float_extend:SF
+	  (match_operand:HF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
+{
+  if (!TARGET_AVX512FP16)
+    {
+      rtx res = gen_reg_rtx (V4SFmode);
+      rtx tmp = force_reg (V8HFmode, CONST0_RTX (V8HFmode));
+
+      ix86_expand_vector_set (false, tmp, operands[1], 0);
+      emit_insn (gen_vcvtph2ps (res, gen_lowpart (V8HImode, tmp)));
+      emit_move_insn (operands[0], gen_lowpart (SFmode, res));
+      DONE;
+    }
+})
+
+(define_expand "extendhfdf2"
+  [(set (match_operand:DF 0 "register_operand")
+	(float_extend:DF
+	  (match_operand:HF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16")
+
+(define_insn "*extendhf<mode>2"
+  [(set (match_operand:MODEF 0 "register_operand" "=v")
         (float_extend:MODEF
 	  (match_operand:HF 1 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512FP16"
@@ -4766,7 +4804,31 @@ 
 
 ;; Conversion from {SF,DF}mode to HFmode.
 
-(define_insn "trunc<mode>hf2"
+(define_expand "truncsfhf2"
+  [(set (match_operand:HF 0 "register_operand")
+	(float_truncate:HF
+	  (match_operand:SF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16 || TARGET_F16C || TARGET_AVX512VL"
+  {
+    if (!TARGET_AVX512FP16)
+    {
+      rtx res = gen_reg_rtx (V8HFmode);
+      rtx tmp = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
+
+      ix86_expand_vector_set (false, tmp, operands[1], 0);
+      emit_insn (gen_vcvtps2ph (gen_lowpart (V8HImode, res), tmp, GEN_INT (4)));
+      emit_move_insn (operands[0], gen_lowpart (HFmode, res));
+      DONE;
+    }
+  })
+
+(define_expand "truncdfhf2"
+  [(set (match_operand:HF 0 "register_operand")
+	(float_truncate:HF
+	  (match_operand:DF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16")
+
+(define_insn "*trunc<mode>hf2"
   [(set (match_operand:HF 0 "register_operand" "=v")
        (float_truncate:HF
          (match_operand:MODEF 1 "nonimmediate_operand" "vm")))] diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
new file mode 100644
index 00000000000..dfbfb167953
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr102811.c
@@ -0,0 +1,11 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mf16c -mno-avx512fp16" } */
+/* { dg-final { scan-assembler-times "vpxor\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtph2ps\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtps2ph\[ \\t\]" 1 } } */
+/* { dg-final { scan-assembler-not "__truncsfhf2\[ \\t\]"} } */
+/* { dg-final { scan-assembler-not "__extendhfsf2\[ \\t\]"} } */
+_Float16 test (_Float16 a, _Float16 b)
+{
+  return a + b;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-21.c b/gcc/testsuite/gcc.target/i386/pr90773-21.c
index 5bbb387a3ea..0d620fff83c 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-21.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-21.c
@@ -10,4 +10,4 @@  foo (int c)
 }
 
 /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
-/* { dg-final { scan-assembler-times "movw\[\\t \]%.*, 32\\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \].*, 
+32\\(%\[\^,\]+\\)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr90773-23.c b/gcc/testsuite/gcc.target/i386/pr90773-23.c
index ca4a86f30b8..b7369e802e1 100644
--- a/gcc/testsuite/gcc.target/i386/pr90773-23.c
+++ b/gcc/testsuite/gcc.target/i386/pr90773-23.c
@@ -10,4 +10,4 @@  foo (void)
 }
 
 /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
-/* { dg-final { scan-assembler-times "movw\[\\t \]+.+, 32\\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:movw|pextrw)\[\\t \]+.+,