[GIMPLE] Simplify (_Float16) ceil ((double) x) to .CEIL (x) when available.

Message ID 20210924112552.2524168-1-hongtao.liu@intel.com
State New
Headers
Series [GIMPLE] Simplify (_Float16) ceil ((double) x) to .CEIL (x) when available. |

Commit Message

liuhongt Sept. 24, 2021, 11:25 a.m. UTC
  Hi:
  Related discussion in [1] and PR.

  Bootstrapped and regtest on x86_64-linux-gnu{-m32,}.
  Ok for trunk?

[1] https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574330.html

gcc/ChangeLog:

	PR target/102464
	* config/i386/i386.c (ix86_optab_supported_p):
	Return true for HFmode.
	* match.pd: Simplify (_Float16) ceil ((double) x) to
	__builtin_ceilf16 (a) when a is _Float16 type and
	direct_internal_fn_supported_p.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr102464.c: New test.
---
 gcc/config/i386/i386.c                   | 20 +++++++-----
 gcc/match.pd                             | 28 +++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr102464.c | 39 ++++++++++++++++++++++++
 3 files changed, 79 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102464.c
  

Comments

Uros Bizjak Sept. 24, 2021, 4:30 p.m. UTC | #1
On Fri, Sep 24, 2021 at 1:26 PM liuhongt <hongtao.liu@intel.com> wrote:
>
> Hi:
>   Related discussion in [1] and PR.
>
>   Bootstrapped and regtest on x86_64-linux-gnu{-m32,}.
>   Ok for trunk?
>
> [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574330.html
>
> gcc/ChangeLog:
>
>         PR target/102464
>         * config/i386/i386.c (ix86_optab_supported_p):
>         Return true for HFmode.
>         * match.pd: Simplify (_Float16) ceil ((double) x) to
>         __builtin_ceilf16 (a) when a is _Float16 type and
>         direct_internal_fn_supported_p.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr102464.c: New test.

OK for x86 part.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386.c                   | 20 +++++++-----
>  gcc/match.pd                             | 28 +++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr102464.c | 39 ++++++++++++++++++++++++
>  3 files changed, 79 insertions(+), 8 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102464.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index ba89e111d28..3767fe9806d 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -23582,20 +23582,24 @@ ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
>        return opt_type == OPTIMIZE_FOR_SPEED;
>
>      case rint_optab:
> -      if (SSE_FLOAT_MODE_P (mode1)
> -         && TARGET_SSE_MATH
> -         && !flag_trapping_math
> -         && !TARGET_SSE4_1)
> +      if (mode1 == HFmode)
> +       return true;
> +      else if (SSE_FLOAT_MODE_P (mode1)
> +              && TARGET_SSE_MATH
> +              && !flag_trapping_math
> +              && !TARGET_SSE4_1)
>         return opt_type == OPTIMIZE_FOR_SPEED;
>        return true;
>
>      case floor_optab:
>      case ceil_optab:
>      case btrunc_optab:
> -      if (SSE_FLOAT_MODE_P (mode1)
> -         && TARGET_SSE_MATH
> -         && !flag_trapping_math
> -         && TARGET_SSE4_1)
> +      if (mode1 == HFmode)
> +       return true;
> +      else if (SSE_FLOAT_MODE_P (mode1)
> +              && TARGET_SSE_MATH
> +              && !flag_trapping_math
> +              && TARGET_SSE4_1)
>         return true;
>        return opt_type == OPTIMIZE_FOR_SPEED;
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index a9791ceb74a..9ccec8b6ce3 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -6191,6 +6191,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>     (froms (convert float_value_p@0))
>     (convert (tos @0)))))
>
> +#if GIMPLE
> +(match float16_value_p
> + @0
> + (if (TYPE_MAIN_VARIANT (TREE_TYPE (@0)) == float16_type_node)))
> +(for froms (BUILT_IN_TRUNCL BUILT_IN_TRUNC BUILT_IN_TRUNCF
> +           BUILT_IN_FLOORL BUILT_IN_FLOOR BUILT_IN_FLOORF
> +           BUILT_IN_CEILL BUILT_IN_CEIL BUILT_IN_CEILF
> +           BUILT_IN_ROUNDEVENL BUILT_IN_ROUNDEVEN BUILT_IN_ROUNDEVENF
> +           BUILT_IN_ROUNDL BUILT_IN_ROUND BUILT_IN_ROUNDF
> +           BUILT_IN_NEARBYINTL BUILT_IN_NEARBYINT BUILT_IN_NEARBYINTF
> +           BUILT_IN_RINTL BUILT_IN_RINT BUILT_IN_RINTF)
> +     tos (IFN_TRUNC IFN_TRUNC IFN_TRUNC
> +         IFN_FLOOR IFN_FLOOR IFN_FLOOR
> +         IFN_CEIL IFN_CEIL IFN_CEIL
> +         IFN_ROUNDEVEN IFN_ROUNDEVEN IFN_ROUNDEVEN
> +         IFN_ROUND IFN_ROUND IFN_ROUND
> +         IFN_NEARBYINT IFN_NEARBYINT IFN_NEARBYINT
> +         IFN_RINT IFN_RINT IFN_RINT)
> + /* (_Float16) round ((doube) x) -> __built_in_roundf16 (x), etc.,
> +    if x is a _Float16.  */
> + (simplify
> +   (convert (froms (convert float16_value_p@0)))
> +     (if (types_match (type, TREE_TYPE (@0))
> +         && direct_internal_fn_supported_p (as_internal_fn (tos),
> +                                            type, OPTIMIZE_FOR_BOTH))
> +       (tos @0))))
> +#endif
> +
>  (for froms (XFLOORL XCEILL XROUNDL XRINTL)
>       tos (XFLOOR XCEIL XROUND XRINT)
>   /* llfloorl(extend(x)) -> llfloor(x), etc., if x is a double.  */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102464.c b/gcc/testsuite/gcc.target/i386/pr102464.c
> new file mode 100644
> index 00000000000..e3e060ee80b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102464.c
> @@ -0,0 +1,39 @@
> +/* PR target/102464.  */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512fp16" } */
> +
> +#define FOO(FUNC,SUFFIX)                       \
> +  _Float16                                     \
> +  foo_##FUNC##_##SUFFIX (_Float16 a)           \
> +  {                                            \
> +    return __builtin_##FUNC##SUFFIX (a);       \
> +  }
> +
> +FOO (roundeven, f16);
> +FOO (roundeven, f);
> +FOO (roundeven, );
> +FOO (roundeven, l);
> +FOO (trunc, f16);
> +FOO (trunc, f);
> +FOO (trunc, );
> +FOO (trunc, l);
> +FOO (ceil, f16);
> +FOO (ceil, f);
> +FOO (ceil, );
> +FOO (ceil, l);
> +FOO (floor, f16);
> +FOO (floor, f);
> +FOO (floor, );
> +FOO (floor, l);
> +FOO (nearbyint, f16);
> +FOO (nearbyint, f);
> +FOO (nearbyint, );
> +FOO (nearbyint, l);
> +FOO (rint, f16);
> +FOO (rint, f);
> +FOO (rint, );
> +FOO (rint, l);
> +
> +/* { dg-final { scan-assembler-not "vcvtsh2s\[sd\]" } } */
> +/* { dg-final { scan-assembler-not "extendhfxf" } } */
> +/* { dg-final { scan-assembler-times "vrndscalesh\[^\n\r\]*xmm\[0-9\]" 24 } } */
> --
> 2.27.0
>
  
Richard Biener Sept. 27, 2021, 12:53 p.m. UTC | #2
On Fri, Sep 24, 2021 at 1:26 PM liuhongt <hongtao.liu@intel.com> wrote:
>
> Hi:
>   Related discussion in [1] and PR.
>
>   Bootstrapped and regtest on x86_64-linux-gnu{-m32,}.
>   Ok for trunk?
>
> [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574330.html
>
> gcc/ChangeLog:
>
>         PR target/102464
>         * config/i386/i386.c (ix86_optab_supported_p):
>         Return true for HFmode.
>         * match.pd: Simplify (_Float16) ceil ((double) x) to
>         __builtin_ceilf16 (a) when a is _Float16 type and
>         direct_internal_fn_supported_p.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr102464.c: New test.
> ---
>  gcc/config/i386/i386.c                   | 20 +++++++-----
>  gcc/match.pd                             | 28 +++++++++++++++++
>  gcc/testsuite/gcc.target/i386/pr102464.c | 39 ++++++++++++++++++++++++
>  3 files changed, 79 insertions(+), 8 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102464.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index ba89e111d28..3767fe9806d 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -23582,20 +23582,24 @@ ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
>        return opt_type == OPTIMIZE_FOR_SPEED;
>
>      case rint_optab:
> -      if (SSE_FLOAT_MODE_P (mode1)
> -         && TARGET_SSE_MATH
> -         && !flag_trapping_math
> -         && !TARGET_SSE4_1)
> +      if (mode1 == HFmode)
> +       return true;
> +      else if (SSE_FLOAT_MODE_P (mode1)
> +              && TARGET_SSE_MATH
> +              && !flag_trapping_math
> +              && !TARGET_SSE4_1)
>         return opt_type == OPTIMIZE_FOR_SPEED;
>        return true;
>
>      case floor_optab:
>      case ceil_optab:
>      case btrunc_optab:
> -      if (SSE_FLOAT_MODE_P (mode1)
> -         && TARGET_SSE_MATH
> -         && !flag_trapping_math
> -         && TARGET_SSE4_1)
> +      if (mode1 == HFmode)
> +       return true;
> +      else if (SSE_FLOAT_MODE_P (mode1)
> +              && TARGET_SSE_MATH
> +              && !flag_trapping_math
> +              && TARGET_SSE4_1)
>         return true;
>        return opt_type == OPTIMIZE_FOR_SPEED;
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index a9791ceb74a..9ccec8b6ce3 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -6191,6 +6191,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>     (froms (convert float_value_p@0))
>     (convert (tos @0)))))
>
> +#if GIMPLE
> +(match float16_value_p
> + @0
> + (if (TYPE_MAIN_VARIANT (TREE_TYPE (@0)) == float16_type_node)))
> +(for froms (BUILT_IN_TRUNCL BUILT_IN_TRUNC BUILT_IN_TRUNCF
> +           BUILT_IN_FLOORL BUILT_IN_FLOOR BUILT_IN_FLOORF
> +           BUILT_IN_CEILL BUILT_IN_CEIL BUILT_IN_CEILF
> +           BUILT_IN_ROUNDEVENL BUILT_IN_ROUNDEVEN BUILT_IN_ROUNDEVENF
> +           BUILT_IN_ROUNDL BUILT_IN_ROUND BUILT_IN_ROUNDF
> +           BUILT_IN_NEARBYINTL BUILT_IN_NEARBYINT BUILT_IN_NEARBYINTF
> +           BUILT_IN_RINTL BUILT_IN_RINT BUILT_IN_RINTF)

we do have patterns that convert (truncl (convert floatval)) to
(float)truncf (val),
your's does (_Float16)trunc ((double) float16) -> truncF16 (float16), doesn't it
make sense to have trunc ((double) float16) -> (double)trunfF16
(float16) as well?

Why do you conditionalize on GIMPLE here?

That said, I wonder whether we can somehow address pattern explosion here,
eliding the outer (convert ...) from the match would help a bit already.

The related patterns use optimize && canonicalize_math_p as well btw., not
sure whether either is appropriate here since there are no _Float16 math
functions available.

> +     tos (IFN_TRUNC IFN_TRUNC IFN_TRUNC
> +         IFN_FLOOR IFN_FLOOR IFN_FLOOR
> +         IFN_CEIL IFN_CEIL IFN_CEIL
> +         IFN_ROUNDEVEN IFN_ROUNDEVEN IFN_ROUNDEVEN
> +         IFN_ROUND IFN_ROUND IFN_ROUND
> +         IFN_NEARBYINT IFN_NEARBYINT IFN_NEARBYINT
> +         IFN_RINT IFN_RINT IFN_RINT)
> + /* (_Float16) round ((doube) x) -> __built_in_roundf16 (x), etc.,
> +    if x is a _Float16.  */
> + (simplify
> +   (convert (froms (convert float16_value_p@0)))
> +     (if (types_match (type, TREE_TYPE (@0))
> +         && direct_internal_fn_supported_p (as_internal_fn (tos),
> +                                            type, OPTIMIZE_FOR_BOTH))
> +       (tos @0))))
> +#endif
> +
>  (for froms (XFLOORL XCEILL XROUNDL XRINTL)
>       tos (XFLOOR XCEIL XROUND XRINT)
>   /* llfloorl(extend(x)) -> llfloor(x), etc., if x is a double.  */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102464.c b/gcc/testsuite/gcc.target/i386/pr102464.c
> new file mode 100644
> index 00000000000..e3e060ee80b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102464.c
> @@ -0,0 +1,39 @@
> +/* PR target/102464.  */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512fp16" } */
> +
> +#define FOO(FUNC,SUFFIX)                       \
> +  _Float16                                     \
> +  foo_##FUNC##_##SUFFIX (_Float16 a)           \
> +  {                                            \
> +    return __builtin_##FUNC##SUFFIX (a);       \
> +  }
> +
> +FOO (roundeven, f16);
> +FOO (roundeven, f);
> +FOO (roundeven, );
> +FOO (roundeven, l);
> +FOO (trunc, f16);
> +FOO (trunc, f);
> +FOO (trunc, );
> +FOO (trunc, l);
> +FOO (ceil, f16);
> +FOO (ceil, f);
> +FOO (ceil, );
> +FOO (ceil, l);
> +FOO (floor, f16);
> +FOO (floor, f);
> +FOO (floor, );
> +FOO (floor, l);
> +FOO (nearbyint, f16);
> +FOO (nearbyint, f);
> +FOO (nearbyint, );
> +FOO (nearbyint, l);
> +FOO (rint, f16);
> +FOO (rint, f);
> +FOO (rint, );
> +FOO (rint, l);
> +
> +/* { dg-final { scan-assembler-not "vcvtsh2s\[sd\]" } } */
> +/* { dg-final { scan-assembler-not "extendhfxf" } } */
> +/* { dg-final { scan-assembler-times "vrndscalesh\[^\n\r\]*xmm\[0-9\]" 24 } } */
> --
> 2.27.0
>
  
Hongtao Liu Sept. 28, 2021, 2:07 a.m. UTC | #3
On Mon, Sep 27, 2021 at 8:53 PM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Fri, Sep 24, 2021 at 1:26 PM liuhongt <hongtao.liu@intel.com> wrote:
> >
> > Hi:
> >   Related discussion in [1] and PR.
> >
> >   Bootstrapped and regtest on x86_64-linux-gnu{-m32,}.
> >   Ok for trunk?
> >
> > [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574330.html
> >
> > gcc/ChangeLog:
> >
> >         PR target/102464
> >         * config/i386/i386.c (ix86_optab_supported_p):
> >         Return true for HFmode.
> >         * match.pd: Simplify (_Float16) ceil ((double) x) to
> >         __builtin_ceilf16 (a) when a is _Float16 type and
> >         direct_internal_fn_supported_p.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.target/i386/pr102464.c: New test.
> > ---
> >  gcc/config/i386/i386.c                   | 20 +++++++-----
> >  gcc/match.pd                             | 28 +++++++++++++++++
> >  gcc/testsuite/gcc.target/i386/pr102464.c | 39 ++++++++++++++++++++++++
> >  3 files changed, 79 insertions(+), 8 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102464.c
> >
> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > index ba89e111d28..3767fe9806d 100644
> > --- a/gcc/config/i386/i386.c
> > +++ b/gcc/config/i386/i386.c
> > @@ -23582,20 +23582,24 @@ ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
> >        return opt_type == OPTIMIZE_FOR_SPEED;
> >
> >      case rint_optab:
> > -      if (SSE_FLOAT_MODE_P (mode1)
> > -         && TARGET_SSE_MATH
> > -         && !flag_trapping_math
> > -         && !TARGET_SSE4_1)
> > +      if (mode1 == HFmode)
> > +       return true;
> > +      else if (SSE_FLOAT_MODE_P (mode1)
> > +              && TARGET_SSE_MATH
> > +              && !flag_trapping_math
> > +              && !TARGET_SSE4_1)
> >         return opt_type == OPTIMIZE_FOR_SPEED;
> >        return true;
> >
> >      case floor_optab:
> >      case ceil_optab:
> >      case btrunc_optab:
> > -      if (SSE_FLOAT_MODE_P (mode1)
> > -         && TARGET_SSE_MATH
> > -         && !flag_trapping_math
> > -         && TARGET_SSE4_1)
> > +      if (mode1 == HFmode)
> > +       return true;
> > +      else if (SSE_FLOAT_MODE_P (mode1)
> > +              && TARGET_SSE_MATH
> > +              && !flag_trapping_math
> > +              && TARGET_SSE4_1)
> >         return true;
> >        return opt_type == OPTIMIZE_FOR_SPEED;
> >
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index a9791ceb74a..9ccec8b6ce3 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -6191,6 +6191,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >     (froms (convert float_value_p@0))
> >     (convert (tos @0)))))
> >
> > +#if GIMPLE
> > +(match float16_value_p
> > + @0
> > + (if (TYPE_MAIN_VARIANT (TREE_TYPE (@0)) == float16_type_node)))
> > +(for froms (BUILT_IN_TRUNCL BUILT_IN_TRUNC BUILT_IN_TRUNCF
> > +           BUILT_IN_FLOORL BUILT_IN_FLOOR BUILT_IN_FLOORF
> > +           BUILT_IN_CEILL BUILT_IN_CEIL BUILT_IN_CEILF
> > +           BUILT_IN_ROUNDEVENL BUILT_IN_ROUNDEVEN BUILT_IN_ROUNDEVENF
> > +           BUILT_IN_ROUNDL BUILT_IN_ROUND BUILT_IN_ROUNDF
> > +           BUILT_IN_NEARBYINTL BUILT_IN_NEARBYINT BUILT_IN_NEARBYINTF
> > +           BUILT_IN_RINTL BUILT_IN_RINT BUILT_IN_RINTF)
>
> we do have patterns that convert (truncl (convert floatval)) to
> (float)truncf (val),
> your's does (_Float16)trunc ((double) float16) -> truncF16 (float16), doesn't it
> make sense to have trunc ((double) float16) -> (double)trunfF16
> (float16) as well?
>
> Why do you conditionalize on GIMPLE here?
To avoid
error: ‘direct_internal_fn_supported_p’ was not declared in this scope

>
> That said, I wonder whether we can somehow address pattern explosion here,
> eliding the outer (convert ...) from the match would help a bit already.
>
> The related patterns use optimize && canonicalize_math_p as well btw., not
> sure whether either is appropriate here since there are no _Float16 math
> functions available.
Yes, that's why I didn't follow the existing pattern, i think we can
add optimize back to the condition, but not canonicalize_math_p ()
since there's no math function for _Float16.
Also w/o the outer (convert ..), it looks like a canonicalization to
transform ceil ((double) a) to (double) __builtin_ceilf16 (a) but not
an optimization.
>
> > +     tos (IFN_TRUNC IFN_TRUNC IFN_TRUNC
> > +         IFN_FLOOR IFN_FLOOR IFN_FLOOR
> > +         IFN_CEIL IFN_CEIL IFN_CEIL
> > +         IFN_ROUNDEVEN IFN_ROUNDEVEN IFN_ROUNDEVEN
> > +         IFN_ROUND IFN_ROUND IFN_ROUND
> > +         IFN_NEARBYINT IFN_NEARBYINT IFN_NEARBYINT
> > +         IFN_RINT IFN_RINT IFN_RINT)
> > + /* (_Float16) round ((doube) x) -> __built_in_roundf16 (x), etc.,
> > +    if x is a _Float16.  */
> > + (simplify
> > +   (convert (froms (convert float16_value_p@0)))
> > +     (if (types_match (type, TREE_TYPE (@0))
> > +         && direct_internal_fn_supported_p (as_internal_fn (tos),
> > +                                            type, OPTIMIZE_FOR_BOTH))
> > +       (tos @0))))
> > +#endif
> > +
> >  (for froms (XFLOORL XCEILL XROUNDL XRINTL)
> >       tos (XFLOOR XCEIL XROUND XRINT)
> >   /* llfloorl(extend(x)) -> llfloor(x), etc., if x is a double.  */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr102464.c b/gcc/testsuite/gcc.target/i386/pr102464.c
> > new file mode 100644
> > index 00000000000..e3e060ee80b
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr102464.c
> > @@ -0,0 +1,39 @@
> > +/* PR target/102464.  */
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -mavx512fp16" } */
> > +
> > +#define FOO(FUNC,SUFFIX)                       \
> > +  _Float16                                     \
> > +  foo_##FUNC##_##SUFFIX (_Float16 a)           \
> > +  {                                            \
> > +    return __builtin_##FUNC##SUFFIX (a);       \
> > +  }
> > +
> > +FOO (roundeven, f16);
> > +FOO (roundeven, f);
> > +FOO (roundeven, );
> > +FOO (roundeven, l);
> > +FOO (trunc, f16);
> > +FOO (trunc, f);
> > +FOO (trunc, );
> > +FOO (trunc, l);
> > +FOO (ceil, f16);
> > +FOO (ceil, f);
> > +FOO (ceil, );
> > +FOO (ceil, l);
> > +FOO (floor, f16);
> > +FOO (floor, f);
> > +FOO (floor, );
> > +FOO (floor, l);
> > +FOO (nearbyint, f16);
> > +FOO (nearbyint, f);
> > +FOO (nearbyint, );
> > +FOO (nearbyint, l);
> > +FOO (rint, f16);
> > +FOO (rint, f);
> > +FOO (rint, );
> > +FOO (rint, l);
> > +
> > +/* { dg-final { scan-assembler-not "vcvtsh2s\[sd\]" } } */
> > +/* { dg-final { scan-assembler-not "extendhfxf" } } */
> > +/* { dg-final { scan-assembler-times "vrndscalesh\[^\n\r\]*xmm\[0-9\]" 24 } } */
> > --
> > 2.27.0
> >
  
Richard Biener Sept. 28, 2021, 7:45 a.m. UTC | #4
On Tue, Sep 28, 2021 at 4:01 AM Hongtao Liu <crazylht@gmail.com> wrote:
>
> On Mon, Sep 27, 2021 at 8:53 PM Richard Biener
> <richard.guenther@gmail.com> wrote:
> >
> > On Fri, Sep 24, 2021 at 1:26 PM liuhongt <hongtao.liu@intel.com> wrote:
> > >
> > > Hi:
> > >   Related discussion in [1] and PR.
> > >
> > >   Bootstrapped and regtest on x86_64-linux-gnu{-m32,}.
> > >   Ok for trunk?
> > >
> > > [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574330.html
> > >
> > > gcc/ChangeLog:
> > >
> > >         PR target/102464
> > >         * config/i386/i386.c (ix86_optab_supported_p):
> > >         Return true for HFmode.
> > >         * match.pd: Simplify (_Float16) ceil ((double) x) to
> > >         __builtin_ceilf16 (a) when a is _Float16 type and
> > >         direct_internal_fn_supported_p.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >         * gcc.target/i386/pr102464.c: New test.
> > > ---
> > >  gcc/config/i386/i386.c                   | 20 +++++++-----
> > >  gcc/match.pd                             | 28 +++++++++++++++++
> > >  gcc/testsuite/gcc.target/i386/pr102464.c | 39 ++++++++++++++++++++++++
> > >  3 files changed, 79 insertions(+), 8 deletions(-)
> > >  create mode 100644 gcc/testsuite/gcc.target/i386/pr102464.c
> > >
> > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > > index ba89e111d28..3767fe9806d 100644
> > > --- a/gcc/config/i386/i386.c
> > > +++ b/gcc/config/i386/i386.c
> > > @@ -23582,20 +23582,24 @@ ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
> > >        return opt_type == OPTIMIZE_FOR_SPEED;
> > >
> > >      case rint_optab:
> > > -      if (SSE_FLOAT_MODE_P (mode1)
> > > -         && TARGET_SSE_MATH
> > > -         && !flag_trapping_math
> > > -         && !TARGET_SSE4_1)
> > > +      if (mode1 == HFmode)
> > > +       return true;
> > > +      else if (SSE_FLOAT_MODE_P (mode1)
> > > +              && TARGET_SSE_MATH
> > > +              && !flag_trapping_math
> > > +              && !TARGET_SSE4_1)
> > >         return opt_type == OPTIMIZE_FOR_SPEED;
> > >        return true;
> > >
> > >      case floor_optab:
> > >      case ceil_optab:
> > >      case btrunc_optab:
> > > -      if (SSE_FLOAT_MODE_P (mode1)
> > > -         && TARGET_SSE_MATH
> > > -         && !flag_trapping_math
> > > -         && TARGET_SSE4_1)
> > > +      if (mode1 == HFmode)
> > > +       return true;
> > > +      else if (SSE_FLOAT_MODE_P (mode1)
> > > +              && TARGET_SSE_MATH
> > > +              && !flag_trapping_math
> > > +              && TARGET_SSE4_1)
> > >         return true;
> > >        return opt_type == OPTIMIZE_FOR_SPEED;
> > >
> > > diff --git a/gcc/match.pd b/gcc/match.pd
> > > index a9791ceb74a..9ccec8b6ce3 100644
> > > --- a/gcc/match.pd
> > > +++ b/gcc/match.pd
> > > @@ -6191,6 +6191,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> > >     (froms (convert float_value_p@0))
> > >     (convert (tos @0)))))
> > >
> > > +#if GIMPLE
> > > +(match float16_value_p
> > > + @0
> > > + (if (TYPE_MAIN_VARIANT (TREE_TYPE (@0)) == float16_type_node)))
> > > +(for froms (BUILT_IN_TRUNCL BUILT_IN_TRUNC BUILT_IN_TRUNCF
> > > +           BUILT_IN_FLOORL BUILT_IN_FLOOR BUILT_IN_FLOORF
> > > +           BUILT_IN_CEILL BUILT_IN_CEIL BUILT_IN_CEILF
> > > +           BUILT_IN_ROUNDEVENL BUILT_IN_ROUNDEVEN BUILT_IN_ROUNDEVENF
> > > +           BUILT_IN_ROUNDL BUILT_IN_ROUND BUILT_IN_ROUNDF
> > > +           BUILT_IN_NEARBYINTL BUILT_IN_NEARBYINT BUILT_IN_NEARBYINTF
> > > +           BUILT_IN_RINTL BUILT_IN_RINT BUILT_IN_RINTF)
> >
> > we do have patterns that convert (truncl (convert floatval)) to
> > (float)truncf (val),
> > your's does (_Float16)trunc ((double) float16) -> truncF16 (float16), doesn't it
> > make sense to have trunc ((double) float16) -> (double)trunfF16
> > (float16) as well?
> >
> > Why do you conditionalize on GIMPLE here?
> To avoid
> error: ‘direct_internal_fn_supported_p’ was not declared in this scope

You probably have to amend generic-match-head.c by adding the relevant include.
But maybe we don't want to feed too many internal function calls into the
early GENERIC ... so OK to leave as-is then.

> >
> > That said, I wonder whether we can somehow address pattern explosion here,
> > eliding the outer (convert ...) from the match would help a bit already.
> >
> > The related patterns use optimize && canonicalize_math_p as well btw., not
> > sure whether either is appropriate here since there are no _Float16 math
> > functions available.
> Yes, that's why I didn't follow the existing pattern, i think we can
> add optimize back to the condition, but not canonicalize_math_p ()
> since there's no math function for _Float16.
> Also w/o the outer (convert ..), it looks like a canonicalization to
> transform ceil ((double) a) to (double) __builtin_ceilf16 (a) but not
> an optimization.

Yes, that's likely why we have canonicalize_math_p () on the others.

So I think the original patch is OK.

Thanks,
Richard.

> >
> > > +     tos (IFN_TRUNC IFN_TRUNC IFN_TRUNC
> > > +         IFN_FLOOR IFN_FLOOR IFN_FLOOR
> > > +         IFN_CEIL IFN_CEIL IFN_CEIL
> > > +         IFN_ROUNDEVEN IFN_ROUNDEVEN IFN_ROUNDEVEN
> > > +         IFN_ROUND IFN_ROUND IFN_ROUND
> > > +         IFN_NEARBYINT IFN_NEARBYINT IFN_NEARBYINT
> > > +         IFN_RINT IFN_RINT IFN_RINT)
> > > + /* (_Float16) round ((doube) x) -> __built_in_roundf16 (x), etc.,
> > > +    if x is a _Float16.  */
> > > + (simplify
> > > +   (convert (froms (convert float16_value_p@0)))
> > > +     (if (types_match (type, TREE_TYPE (@0))
> > > +         && direct_internal_fn_supported_p (as_internal_fn (tos),
> > > +                                            type, OPTIMIZE_FOR_BOTH))
> > > +       (tos @0))))
> > > +#endif
> > > +
> > >  (for froms (XFLOORL XCEILL XROUNDL XRINTL)
> > >       tos (XFLOOR XCEIL XROUND XRINT)
> > >   /* llfloorl(extend(x)) -> llfloor(x), etc., if x is a double.  */
> > > diff --git a/gcc/testsuite/gcc.target/i386/pr102464.c b/gcc/testsuite/gcc.target/i386/pr102464.c
> > > new file mode 100644
> > > index 00000000000..e3e060ee80b
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/i386/pr102464.c
> > > @@ -0,0 +1,39 @@
> > > +/* PR target/102464.  */
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2 -mavx512fp16" } */
> > > +
> > > +#define FOO(FUNC,SUFFIX)                       \
> > > +  _Float16                                     \
> > > +  foo_##FUNC##_##SUFFIX (_Float16 a)           \
> > > +  {                                            \
> > > +    return __builtin_##FUNC##SUFFIX (a);       \
> > > +  }
> > > +
> > > +FOO (roundeven, f16);
> > > +FOO (roundeven, f);
> > > +FOO (roundeven, );
> > > +FOO (roundeven, l);
> > > +FOO (trunc, f16);
> > > +FOO (trunc, f);
> > > +FOO (trunc, );
> > > +FOO (trunc, l);
> > > +FOO (ceil, f16);
> > > +FOO (ceil, f);
> > > +FOO (ceil, );
> > > +FOO (ceil, l);
> > > +FOO (floor, f16);
> > > +FOO (floor, f);
> > > +FOO (floor, );
> > > +FOO (floor, l);
> > > +FOO (nearbyint, f16);
> > > +FOO (nearbyint, f);
> > > +FOO (nearbyint, );
> > > +FOO (nearbyint, l);
> > > +FOO (rint, f16);
> > > +FOO (rint, f);
> > > +FOO (rint, );
> > > +FOO (rint, l);
> > > +
> > > +/* { dg-final { scan-assembler-not "vcvtsh2s\[sd\]" } } */
> > > +/* { dg-final { scan-assembler-not "extendhfxf" } } */
> > > +/* { dg-final { scan-assembler-times "vrndscalesh\[^\n\r\]*xmm\[0-9\]" 24 } } */
> > > --
> > > 2.27.0
> > >
>
>
>
> --
> BR,
> Hongtao
  
Joseph Myers Sept. 28, 2021, 8:50 p.m. UTC | #5
On Tue, 28 Sep 2021, Hongtao Liu via Gcc-patches wrote:

> Yes, that's why I didn't follow the existing pattern, i think we can
> add optimize back to the condition, but not canonicalize_math_p ()
> since there's no math function for _Float16.

At some point we should add _Float16 functions to glibc, but that's 
trickier than for the _FloatN / _FloatNx types that are already supported 
there because of the implications for minimum compiler versions supported 
(_Float16 functions could be supported in glibc for AArch64 and Arm by 
increasing the minimum compiler version for building glibc for those 
architectures to GCC 7, but not for x86_64 and x86 until we're ready to 
require GCC 12 or later to build glibc for those architectures), and we'd 
need to think carefully about design questions such as when functions are 
implemented using _Float16 or float arithmetic or just wrap float 
functions as a starting point.  And for generating calls to such functions 
GCC would need to know whether the libm implementation provides them or 
not.
  

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ba89e111d28..3767fe9806d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -23582,20 +23582,24 @@  ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
       return opt_type == OPTIMIZE_FOR_SPEED;
 
     case rint_optab:
-      if (SSE_FLOAT_MODE_P (mode1)
-	  && TARGET_SSE_MATH
-	  && !flag_trapping_math
-	  && !TARGET_SSE4_1)
+      if (mode1 == HFmode)
+	return true;
+      else if (SSE_FLOAT_MODE_P (mode1)
+	       && TARGET_SSE_MATH
+	       && !flag_trapping_math
+	       && !TARGET_SSE4_1)
 	return opt_type == OPTIMIZE_FOR_SPEED;
       return true;
 
     case floor_optab:
     case ceil_optab:
     case btrunc_optab:
-      if (SSE_FLOAT_MODE_P (mode1)
-	  && TARGET_SSE_MATH
-	  && !flag_trapping_math
-	  && TARGET_SSE4_1)
+      if (mode1 == HFmode)
+	return true;
+      else if (SSE_FLOAT_MODE_P (mode1)
+	       && TARGET_SSE_MATH
+	       && !flag_trapping_math
+	       && TARGET_SSE4_1)
 	return true;
       return opt_type == OPTIMIZE_FOR_SPEED;
 
diff --git a/gcc/match.pd b/gcc/match.pd
index a9791ceb74a..9ccec8b6ce3 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -6191,6 +6191,34 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
    (froms (convert float_value_p@0))
    (convert (tos @0)))))
 
+#if GIMPLE
+(match float16_value_p
+ @0
+ (if (TYPE_MAIN_VARIANT (TREE_TYPE (@0)) == float16_type_node)))
+(for froms (BUILT_IN_TRUNCL BUILT_IN_TRUNC BUILT_IN_TRUNCF
+	    BUILT_IN_FLOORL BUILT_IN_FLOOR BUILT_IN_FLOORF
+	    BUILT_IN_CEILL BUILT_IN_CEIL BUILT_IN_CEILF
+	    BUILT_IN_ROUNDEVENL BUILT_IN_ROUNDEVEN BUILT_IN_ROUNDEVENF
+	    BUILT_IN_ROUNDL BUILT_IN_ROUND BUILT_IN_ROUNDF
+	    BUILT_IN_NEARBYINTL BUILT_IN_NEARBYINT BUILT_IN_NEARBYINTF
+	    BUILT_IN_RINTL BUILT_IN_RINT BUILT_IN_RINTF)
+     tos (IFN_TRUNC IFN_TRUNC IFN_TRUNC
+	  IFN_FLOOR IFN_FLOOR IFN_FLOOR
+	  IFN_CEIL IFN_CEIL IFN_CEIL
+	  IFN_ROUNDEVEN IFN_ROUNDEVEN IFN_ROUNDEVEN
+	  IFN_ROUND IFN_ROUND IFN_ROUND
+	  IFN_NEARBYINT IFN_NEARBYINT IFN_NEARBYINT
+	  IFN_RINT IFN_RINT IFN_RINT)
+ /* (_Float16) round ((doube) x) -> __built_in_roundf16 (x), etc.,
+    if x is a _Float16.  */
+ (simplify
+   (convert (froms (convert float16_value_p@0)))
+     (if (types_match (type, TREE_TYPE (@0))
+	  && direct_internal_fn_supported_p (as_internal_fn (tos),
+					     type, OPTIMIZE_FOR_BOTH))
+       (tos @0))))
+#endif
+
 (for froms (XFLOORL XCEILL XROUNDL XRINTL)
      tos (XFLOOR XCEIL XROUND XRINT)
  /* llfloorl(extend(x)) -> llfloor(x), etc., if x is a double.  */
diff --git a/gcc/testsuite/gcc.target/i386/pr102464.c b/gcc/testsuite/gcc.target/i386/pr102464.c
new file mode 100644
index 00000000000..e3e060ee80b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102464.c
@@ -0,0 +1,39 @@ 
+/* PR target/102464.  */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16" } */
+
+#define FOO(FUNC,SUFFIX)                       \
+  _Float16                                     \
+  foo_##FUNC##_##SUFFIX (_Float16 a)           \
+  {                                            \
+    return __builtin_##FUNC##SUFFIX (a);       \
+  }
+
+FOO (roundeven, f16);
+FOO (roundeven, f);
+FOO (roundeven, );
+FOO (roundeven, l);
+FOO (trunc, f16);
+FOO (trunc, f);
+FOO (trunc, );
+FOO (trunc, l);
+FOO (ceil, f16);
+FOO (ceil, f);
+FOO (ceil, );
+FOO (ceil, l);
+FOO (floor, f16);
+FOO (floor, f);
+FOO (floor, );
+FOO (floor, l);
+FOO (nearbyint, f16);
+FOO (nearbyint, f);
+FOO (nearbyint, );
+FOO (nearbyint, l);
+FOO (rint, f16);
+FOO (rint, f);
+FOO (rint, );
+FOO (rint, l);
+
+/* { dg-final { scan-assembler-not "vcvtsh2s\[sd\]" } } */
+/* { dg-final { scan-assembler-not "extendhfxf" } } */
+/* { dg-final { scan-assembler-times "vrndscalesh\[^\n\r\]*xmm\[0-9\]" 24 } } */