[5/5] vect: Add support for fmax and fmin reductions
Commit Message
This patch adds support for reductions involving calls to fmax*()
and fmin*(), without the -ffast-math flags that allow them to be
converted to MAX_EXPR and MIN_EXPR.
Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install?
Richard
gcc/
* doc/md.texi (reduc_fmin_scal_@var{m}): Document.
(reduc_fmax_scal_@var{m}): Likewise.
* optabs.def (reduc_fmax_scal_optab): New optab.
(reduc_fmin_scal_optab): Likewise
* internal-fn.def (REDUC_FMAX, REDUC_FMIN): New functions.
* tree-vect-loop.c (reduction_fn_for_scalar_code): Handle
CASE_CFN_FMAX and CASE_CFN_FMIN.
(neutral_op_for_reduction): Likewise.
(needs_fold_left_reduction_p): Likewise.
* config/aarch64/iterators.md (FMAXMINV): New iterator.
(fmaxmin): Handle UNSPEC_FMAXNMV and UNSPEC_FMINNMV.
* config/aarch64/aarch64-simd.md (reduc_<optab>_scal_<mode>): Fix
unspec mode.
(reduc_<fmaxmin>_scal_<mode>): New pattern.
* config/aarch64/aarch64-sve.md (reduc_<fmaxmin>_scal_<mode>):
Likewise.
gcc/testsuite/
* gcc.dg/vect/vect-fmax-1.c: New test.
* gcc.dg/vect/vect-fmax-2.c: Likewise.
* gcc.dg/vect/vect-fmax-3.c: Likewise.
* gcc.dg/vect/vect-fmin-1.c: New test.
* gcc.dg/vect/vect-fmin-2.c: Likewise.
* gcc.dg/vect/vect-fmin-3.c: Likewise.
* gcc.target/aarch64/fmaxnm_1.c: Likewise.
* gcc.target/aarch64/fmaxnm_2.c: Likewise.
* gcc.target/aarch64/fminnm_1.c: Likewise.
* gcc.target/aarch64/fminnm_2.c: Likewise.
* gcc.target/aarch64/sve/fmaxnm_1.c: Likewise.
* gcc.target/aarch64/sve/fmaxnm_2.c: Likewise.
* gcc.target/aarch64/sve/fminnm_1.c: Likewise.
* gcc.target/aarch64/sve/fminnm_2.c: Likewise.
---
gcc/config/aarch64/aarch64-simd.md | 15 +++-
gcc/config/aarch64/aarch64-sve.md | 11 +++
gcc/config/aarch64/iterators.md | 4 +
gcc/doc/md.texi | 8 ++
gcc/internal-fn.def | 4 +
gcc/optabs.def | 2 +
gcc/testsuite/gcc.dg/vect/vect-fmax-1.c | 83 ++++++++++++++++++
gcc/testsuite/gcc.dg/vect/vect-fmax-2.c | 7 ++
gcc/testsuite/gcc.dg/vect/vect-fmax-3.c | 83 ++++++++++++++++++
gcc/testsuite/gcc.dg/vect/vect-fmin-1.c | 86 +++++++++++++++++++
gcc/testsuite/gcc.dg/vect/vect-fmin-2.c | 9 ++
gcc/testsuite/gcc.dg/vect/vect-fmin-3.c | 83 ++++++++++++++++++
gcc/testsuite/gcc.target/aarch64/fmaxnm_1.c | 24 ++++++
gcc/testsuite/gcc.target/aarch64/fmaxnm_2.c | 20 +++++
gcc/testsuite/gcc.target/aarch64/fminnm_1.c | 24 ++++++
gcc/testsuite/gcc.target/aarch64/fminnm_2.c | 20 +++++
.../gcc.target/aarch64/sve/fmaxnm_2.c | 22 +++++
.../gcc.target/aarch64/sve/fmaxnm_3.c | 18 ++++
.../gcc.target/aarch64/sve/fminnm_2.c | 22 +++++
.../gcc.target/aarch64/sve/fminnm_3.c | 18 ++++
gcc/tree-vect-loop.c | 45 ++++++++--
21 files changed, 599 insertions(+), 9 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-fmax-1.c
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-fmax-2.c
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-fmax-3.c
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-fmin-1.c
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-fmin-2.c
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-fmin-3.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/fmaxnm_1.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/fmaxnm_2.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/fminnm_1.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/fminnm_2.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_2.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_3.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/fminnm_2.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/fminnm_3.c
Comments
On Wed, Nov 10, 2021 at 1:49 PM Richard Sandiford via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> This patch adds support for reductions involving calls to fmax*()
> and fmin*(), without the -ffast-math flags that allow them to be
> converted to MAX_EXPR and MIN_EXPR.
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install?
OK.
Thanks,
Richard.
> Richard
>
>
> gcc/
> * doc/md.texi (reduc_fmin_scal_@var{m}): Document.
> (reduc_fmax_scal_@var{m}): Likewise.
> * optabs.def (reduc_fmax_scal_optab): New optab.
> (reduc_fmin_scal_optab): Likewise
> * internal-fn.def (REDUC_FMAX, REDUC_FMIN): New functions.
> * tree-vect-loop.c (reduction_fn_for_scalar_code): Handle
> CASE_CFN_FMAX and CASE_CFN_FMIN.
> (neutral_op_for_reduction): Likewise.
> (needs_fold_left_reduction_p): Likewise.
> * config/aarch64/iterators.md (FMAXMINV): New iterator.
> (fmaxmin): Handle UNSPEC_FMAXNMV and UNSPEC_FMINNMV.
> * config/aarch64/aarch64-simd.md (reduc_<optab>_scal_<mode>): Fix
> unspec mode.
> (reduc_<fmaxmin>_scal_<mode>): New pattern.
> * config/aarch64/aarch64-sve.md (reduc_<fmaxmin>_scal_<mode>):
> Likewise.
>
> gcc/testsuite/
> * gcc.dg/vect/vect-fmax-1.c: New test.
> * gcc.dg/vect/vect-fmax-2.c: Likewise.
> * gcc.dg/vect/vect-fmax-3.c: Likewise.
> * gcc.dg/vect/vect-fmin-1.c: New test.
> * gcc.dg/vect/vect-fmin-2.c: Likewise.
> * gcc.dg/vect/vect-fmin-3.c: Likewise.
> * gcc.target/aarch64/fmaxnm_1.c: Likewise.
> * gcc.target/aarch64/fmaxnm_2.c: Likewise.
> * gcc.target/aarch64/fminnm_1.c: Likewise.
> * gcc.target/aarch64/fminnm_2.c: Likewise.
> * gcc.target/aarch64/sve/fmaxnm_1.c: Likewise.
> * gcc.target/aarch64/sve/fmaxnm_2.c: Likewise.
> * gcc.target/aarch64/sve/fminnm_1.c: Likewise.
> * gcc.target/aarch64/sve/fminnm_2.c: Likewise.
> ---
> gcc/config/aarch64/aarch64-simd.md | 15 +++-
> gcc/config/aarch64/aarch64-sve.md | 11 +++
> gcc/config/aarch64/iterators.md | 4 +
> gcc/doc/md.texi | 8 ++
> gcc/internal-fn.def | 4 +
> gcc/optabs.def | 2 +
> gcc/testsuite/gcc.dg/vect/vect-fmax-1.c | 83 ++++++++++++++++++
> gcc/testsuite/gcc.dg/vect/vect-fmax-2.c | 7 ++
> gcc/testsuite/gcc.dg/vect/vect-fmax-3.c | 83 ++++++++++++++++++
> gcc/testsuite/gcc.dg/vect/vect-fmin-1.c | 86 +++++++++++++++++++
> gcc/testsuite/gcc.dg/vect/vect-fmin-2.c | 9 ++
> gcc/testsuite/gcc.dg/vect/vect-fmin-3.c | 83 ++++++++++++++++++
> gcc/testsuite/gcc.target/aarch64/fmaxnm_1.c | 24 ++++++
> gcc/testsuite/gcc.target/aarch64/fmaxnm_2.c | 20 +++++
> gcc/testsuite/gcc.target/aarch64/fminnm_1.c | 24 ++++++
> gcc/testsuite/gcc.target/aarch64/fminnm_2.c | 20 +++++
> .../gcc.target/aarch64/sve/fmaxnm_2.c | 22 +++++
> .../gcc.target/aarch64/sve/fmaxnm_3.c | 18 ++++
> .../gcc.target/aarch64/sve/fminnm_2.c | 22 +++++
> .../gcc.target/aarch64/sve/fminnm_3.c | 18 ++++
> gcc/tree-vect-loop.c | 45 ++++++++--
> 21 files changed, 599 insertions(+), 9 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/vect/vect-fmax-1.c
> create mode 100644 gcc/testsuite/gcc.dg/vect/vect-fmax-2.c
> create mode 100644 gcc/testsuite/gcc.dg/vect/vect-fmax-3.c
> create mode 100644 gcc/testsuite/gcc.dg/vect/vect-fmin-1.c
> create mode 100644 gcc/testsuite/gcc.dg/vect/vect-fmin-2.c
> create mode 100644 gcc/testsuite/gcc.dg/vect/vect-fmin-3.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/fmaxnm_1.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/fmaxnm_2.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/fminnm_1.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/fminnm_2.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_2.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_3.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/fminnm_2.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/fminnm_3.c
>
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index 35d55a3e51e..8e7d783f7f3 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -3624,8 +3624,8 @@ (define_insn "popcount<mode>2"
> ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
> (define_expand "reduc_<optab>_scal_<mode>"
> [(match_operand:<VEL> 0 "register_operand")
> - (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
> - FMAXMINV)]
> + (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
> + FMAXMINV)]
> "TARGET_SIMD"
> {
> rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
> @@ -3637,6 +3637,17 @@ (define_expand "reduc_<optab>_scal_<mode>"
> }
> )
>
> +(define_expand "reduc_<fmaxmin>_scal_<mode>"
> + [(match_operand:<VEL> 0 "register_operand")
> + (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
> + FMAXMINNMV)]
> + "TARGET_SIMD"
> + {
> + emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
> + DONE;
> + }
> +)
> +
> ;; Likewise for integer cases, signed and unsigned.
> (define_expand "reduc_<optab>_scal_<mode>"
> [(match_operand:<VEL> 0 "register_operand")
> diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
> index 0f5bf5ea8cb..9ef968840c2 100644
> --- a/gcc/config/aarch64/aarch64-sve.md
> +++ b/gcc/config/aarch64/aarch64-sve.md
> @@ -8566,6 +8566,17 @@ (define_expand "reduc_<optab>_scal_<mode>"
> }
> )
>
> +(define_expand "reduc_<fmaxmin>_scal_<mode>"
> + [(match_operand:<VEL> 0 "register_operand")
> + (unspec:<VEL> [(match_operand:SVE_FULL_F 1 "register_operand")]
> + FMAXMINNMV)]
> + "TARGET_SVE"
> + {
> + emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
> + DONE;
> + }
> +)
> +
> ;; Predicated floating-point tree reductions.
> (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
> [(set (match_operand:<VEL> 0 "register_operand" "=w")
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index e8eebd863a6..fb568ddc4a0 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -2510,6 +2510,8 @@ (define_int_iterator MAXMINV [UNSPEC_UMAXV UNSPEC_UMINV
> (define_int_iterator FMAXMINV [UNSPEC_FMAXV UNSPEC_FMINV
> UNSPEC_FMAXNMV UNSPEC_FMINNMV])
>
> +(define_int_iterator FMAXMINNMV [UNSPEC_FMAXNMV UNSPEC_FMINNMV])
> +
> (define_int_iterator SVE_INT_ADDV [UNSPEC_SADDV UNSPEC_UADDV])
>
> (define_int_iterator USADDLP [UNSPEC_SADDLP UNSPEC_UADDLP])
> @@ -3216,8 +3218,10 @@ (define_int_attr optab [(UNSPEC_ANDF "and")
>
> (define_int_attr fmaxmin [(UNSPEC_FMAX "fmax_nan")
> (UNSPEC_FMAXNM "fmax")
> + (UNSPEC_FMAXNMV "fmax")
> (UNSPEC_FMIN "fmin_nan")
> (UNSPEC_FMINNM "fmin")
> + (UNSPEC_FMINNMV "fmin")
> (UNSPEC_COND_FMAXNM "fmax")
> (UNSPEC_COND_FMINNM "fmin")])
>
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index 589f841ea74..8fd0f8d2fe1 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -5400,6 +5400,14 @@ Find the unsigned minimum/maximum of the elements of a vector. The vector is
> operand 1, and operand 0 is the scalar result, with mode equal to the mode of
> the elements of the input vector.
>
> +@cindex @code{reduc_fmin_scal_@var{m}} instruction pattern
> +@cindex @code{reduc_fmax_scal_@var{m}} instruction pattern
> +@item @samp{reduc_fmin_scal_@var{m}}, @samp{reduc_fmax_scal_@var{m}}
> +Find the floating-point minimum/maximum of the elements of a vector,
> +using the same rules as @code{fmin@var{m}3} and @code{fmax@var{m}3}.
> +Operand 1 is a vector of mode @var{m} and operand 0 is the scalar
> +result, which has mode @code{GET_MODE_INNER (@var{m})}.
> +
> @cindex @code{reduc_plus_scal_@var{m}} instruction pattern
> @item @samp{reduc_plus_scal_@var{m}}
> Compute the sum of the elements of a vector. The vector is operand 1, and
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index bb4d8ab8096..acb0dbda556 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -216,6 +216,10 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (REDUC_MAX, ECF_CONST | ECF_NOTHROW, first,
> reduc_smax_scal, reduc_umax_scal, unary)
> DEF_INTERNAL_SIGNED_OPTAB_FN (REDUC_MIN, ECF_CONST | ECF_NOTHROW, first,
> reduc_smin_scal, reduc_umin_scal, unary)
> +DEF_INTERNAL_OPTAB_FN (REDUC_FMAX, ECF_CONST | ECF_NOTHROW,
> + reduc_fmax_scal, unary)
> +DEF_INTERNAL_OPTAB_FN (REDUC_FMIN, ECF_CONST | ECF_NOTHROW,
> + reduc_fmin_scal, unary)
> DEF_INTERNAL_OPTAB_FN (REDUC_AND, ECF_CONST | ECF_NOTHROW,
> reduc_and_scal, unary)
> DEF_INTERNAL_OPTAB_FN (REDUC_IOR, ECF_CONST | ECF_NOTHROW,
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index e25f4c9a346..cef6054b378 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -335,6 +335,8 @@ OPTAB_D (fmax_optab, "fmax$a3")
> OPTAB_D (fmin_optab, "fmin$a3")
>
> /* Vector reduction to a scalar. */
> +OPTAB_D (reduc_fmax_scal_optab, "reduc_fmax_scal_$a")
> +OPTAB_D (reduc_fmin_scal_optab, "reduc_fmin_scal_$a")
> OPTAB_D (reduc_smax_scal_optab, "reduc_smax_scal_$a")
> OPTAB_D (reduc_smin_scal_optab, "reduc_smin_scal_$a")
> OPTAB_D (reduc_plus_scal_optab, "reduc_plus_scal_$a")
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-fmax-1.c b/gcc/testsuite/gcc.dg/vect/vect-fmax-1.c
> new file mode 100644
> index 00000000000..841ffab5666
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-fmax-1.c
> @@ -0,0 +1,83 @@
> +#include "tree-vect.h"
> +
> +#ifndef TYPE
> +#define TYPE float
> +#define FN __builtin_fmaxf
> +#endif
> +
> +TYPE __attribute__((noipa))
> +test (TYPE x, TYPE *ptr, int n)
> +{
> + for (int i = 0; i < n; ++i)
> + x = FN (x, ptr[i]);
> + return x;
> +}
> +
> +#define N 128
> +#define HALF (N / 2)
> +
> +int
> +main (void)
> +{
> + check_vect ();
> +
> + TYPE a[N];
> +
> + for (int i = 0; i < N; ++i)
> + a[i] = i;
> +
> + if (test (-1, a, 1) != 0)
> + __builtin_abort ();
> + if (test (-1, a, 64) != 63)
> + __builtin_abort ();
> + if (test (-1, a, 65) != 64)
> + __builtin_abort ();
> + if (test (-1, a, 66) != 65)
> + __builtin_abort ();
> + if (test (-1, a, 67) != 66)
> + __builtin_abort ();
> + if (test (-1, a, 128) != 127)
> + __builtin_abort ();
> + if (test (127, a, 128) != 127)
> + __builtin_abort ();
> + if (test (128, a, 128) != 128)
> + __builtin_abort ();
> +
> + for (int i = 0; i < N; ++i)
> + a[i] = -i;
> +
> + if (test (-60, a, 4) != 0)
> + __builtin_abort ();
> + if (test (0, a, 4) != 0)
> + __builtin_abort ();
> + if (test (1, a, 4) != 1)
> + __builtin_abort ();
> +
> + for (int i = 0; i < HALF; ++i)
> + {
> + a[i] = i;
> + a[HALF + i] = HALF - i;
> + }
> +
> + if (test (0, a, HALF - 16) != HALF - 17)
> + __builtin_abort ();
> + if (test (0, a, HALF - 2) != HALF - 3)
> + __builtin_abort ();
> + if (test (0, a, HALF - 1) != HALF - 2)
> + __builtin_abort ();
> + if (test (0, a, HALF) != HALF - 1)
> + __builtin_abort ();
> + if (test (0, a, HALF + 1) != HALF)
> + __builtin_abort ();
> + if (test (0, a, HALF + 2) != HALF)
> + __builtin_abort ();
> + if (test (0, a, HALF + 3) != HALF)
> + __builtin_abort ();
> + if (test (0, a, HALF + 16) != HALF)
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_max_reduc } } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-fmax-2.c b/gcc/testsuite/gcc.dg/vect/vect-fmax-2.c
> new file mode 100644
> index 00000000000..3d1f64416d5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-fmax-2.c
> @@ -0,0 +1,7 @@
> +#define TYPE double
> +#define FN __builtin_fmax
> +
> +#include "vect-fmax-1.c"
> +
> +/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_max_reduc } } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-fmax-3.c b/gcc/testsuite/gcc.dg/vect/vect-fmax-3.c
> new file mode 100644
> index 00000000000..f711ed0563e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-fmax-3.c
> @@ -0,0 +1,83 @@
> +#include "tree-vect.h"
> +
> +void __attribute__((noipa))
> +test (double x0, double x1, double *restrict res, double *restrict ptr, int n)
> +{
> + for (int i = 0; i < n; i += 2)
> + {
> + x0 = __builtin_fmax (x0, ptr[i + 0]);
> + x1 = __builtin_fmax (x1, ptr[i + 1]);
> + }
> + res[0] = x0;
> + res[1] = x1;
> +}
> +
> +#define N 128
> +#define HALF (N / 2)
> +
> +int
> +main (void)
> +{
> + check_vect ();
> +
> + double res[2], a[N];
> +
> + for (int i = 0; i < N; i += 2)
> + {
> + a[i] = i < HALF ? i : HALF;
> + a[i + 1] = i / 8;
> + }
> +
> + test (-1, -1, res, a, 2);
> + if (res[0] != 0 || res[1] != 0)
> + __builtin_abort ();
> +
> + test (-1, -1, res, a, 6);
> + if (res[0] != 4 || res[1] != 0)
> + __builtin_abort ();
> +
> + test (-1, -1, res, a, 8);
> + if (res[0] != 6 || res[1] != 0)
> + __builtin_abort ();
> +
> + test (-1, -1, res, a, 10);
> + if (res[0] != 8 || res[1] != 1)
> + __builtin_abort ();
> +
> + test (-1, -1, res, a, HALF - 2);
> + if (res[0] != HALF - 4 || res[1] != HALF / 8 - 1)
> + __builtin_abort ();
> +
> + test (-1, -1, res, a, HALF);
> + if (res[0] != HALF - 2 || res[1] != HALF / 8 - 1)
> + __builtin_abort ();
> +
> + test (-1, -1, res, a, HALF + 2);
> + if (res[0] != HALF || res[1] != HALF / 8)
> + __builtin_abort ();
> +
> + test (-1, -1, res, a, HALF + 8);
> + if (res[0] != HALF || res[1] != HALF / 8)
> + __builtin_abort ();
> +
> + test (-1, -1, res, a, HALF + 10);
> + if (res[0] != HALF || res[1] != HALF / 8 + 1)
> + __builtin_abort ();
> +
> + test (-1, -1, res, a, N);
> + if (res[0] != HALF || res[1] != N / 8 - 1)
> + __builtin_abort ();
> +
> + test (HALF + 1, -1, res, a, N);
> + if (res[0] != HALF + 1 || res[1] != N / 8 - 1)
> + __builtin_abort ();
> +
> + test (HALF + 1, N, res, a, N);
> + if (res[0] != HALF + 1 || res[1] != N)
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_max_reduc } } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-fmin-1.c b/gcc/testsuite/gcc.dg/vect/vect-fmin-1.c
> new file mode 100644
> index 00000000000..3d5f843a9db
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-fmin-1.c
> @@ -0,0 +1,86 @@
> +#include "tree-vect.h"
> +
> +#ifndef TYPE
> +#define TYPE float
> +#define FN __builtin_fminf
> +#endif
> +
> +TYPE __attribute__((noipa))
> +test (TYPE x, TYPE *ptr, int n)
> +{
> + for (int i = 0; i < n; ++i)
> + x = FN (x, ptr[i]);
> + return x;
> +}
> +
> +#define N 128
> +#define HALF (N / 2)
> +
> +int
> +main (void)
> +{
> + check_vect ();
> +
> + TYPE a[N];
> +
> + for (int i = 0; i < N; ++i)
> + a[i] = -i;
> +
> + if (test (1, a, 1) != 0)
> + __builtin_abort ();
> + if (test (1, a, 64) != -63)
> + __builtin_abort ();
> + if (test (1, a, 65) != -64)
> + __builtin_abort ();
> + if (test (1, a, 66) != -65)
> + __builtin_abort ();
> + if (test (1, a, 67) != -66)
> + __builtin_abort ();
> + if (test (1, a, 128) != -127)
> + __builtin_abort ();
> + if (test (-127, a, 128) != -127)
> + __builtin_abort ();
> + if (test (-128, a, 128) != -128)
> + __builtin_abort ();
> +
> + for (int i = 0; i < N; ++i)
> + a[i] = i;
> +
> + if (test (1, a, 4) != 0)
> + __builtin_abort ();
> + if (test (0, a, 4) != 0)
> + __builtin_abort ();
> + if (test (-1, a, 4) != -1)
> + __builtin_abort ();
> +
> + for (int i = 0; i < HALF; ++i)
> + {
> + a[i] = HALF - i;
> + a[HALF + i] = i;
> + }
> +
> + if (test (N, a, HALF - 16) != 17)
> + __builtin_abort ();
> + if (test (N, a, HALF - 2) != 3)
> + __builtin_abort ();
> + if (test (N, a, HALF - 1) != 2)
> + __builtin_abort ();
> + if (test (N, a, HALF) != 1)
> + __builtin_abort ();
> + if (test (N, a, HALF + 1) != 0)
> + __builtin_abort ();
> + if (test (N, a, HALF + 2) != 0)
> + __builtin_abort ();
> + if (test (N, a, HALF + 3) != 0)
> + __builtin_abort ();
> + if (test (N, a, HALF + 16) != 0)
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_max_reduc } } } */
> +
> +/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_max_reduc } } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-fmin-2.c b/gcc/testsuite/gcc.dg/vect/vect-fmin-2.c
> new file mode 100644
> index 00000000000..21e45cca55a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-fmin-2.c
> @@ -0,0 +1,9 @@
> +#ifndef TYPE
> +#define TYPE double
> +#define FN __builtin_fmin
> +#endif
> +
> +#include "vect-fmin-1.c"
> +
> +/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_max_reduc } } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-fmin-3.c b/gcc/testsuite/gcc.dg/vect/vect-fmin-3.c
> new file mode 100644
> index 00000000000..cc38bf43909
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-fmin-3.c
> @@ -0,0 +1,83 @@
> +#include "tree-vect.h"
> +
> +void __attribute__((noipa))
> +test (double x0, double x1, double *restrict res, double *restrict ptr, int n)
> +{
> + for (int i = 0; i < n; i += 2)
> + {
> + x0 = __builtin_fmin (x0, ptr[i + 0]);
> + x1 = __builtin_fmin (x1, ptr[i + 1]);
> + }
> + res[0] = x0;
> + res[1] = x1;
> +}
> +
> +#define N 128
> +#define HALF (N / 2)
> +
> +int
> +main (void)
> +{
> + check_vect ();
> +
> + double res[2], a[N];
> +
> + for (int i = 0; i < N; i += 2)
> + {
> + a[i] = i < HALF ? HALF - i : 0;
> + a[i + 1] = -i / 8;
> + }
> +
> + test (N, N, res, a, 2);
> + if (res[0] != HALF || res[1] != 0)
> + __builtin_abort ();
> +
> + test (N, N, res, a, 6);
> + if (res[0] != HALF - 4 || res[1] != 0)
> + __builtin_abort ();
> +
> + test (N, N, res, a, 8);
> + if (res[0] != HALF - 6 || res[1] != 0)
> + __builtin_abort ();
> +
> + test (N, N, res, a, 10);
> + if (res[0] != HALF - 8 || res[1] != -1)
> + __builtin_abort ();
> +
> + test (N, N, res, a, HALF - 2);
> + if (res[0] != 4 || res[1] != -HALF / 8 + 1)
> + __builtin_abort ();
> +
> + test (N, N, res, a, HALF);
> + if (res[0] != 2 || res[1] != -HALF / 8 + 1)
> + __builtin_abort ();
> +
> + test (N, N, res, a, HALF + 2);
> + if (res[0] != 0 || res[1] != -HALF / 8)
> + __builtin_abort ();
> +
> + test (N, N, res, a, HALF + 8);
> + if (res[0] != 0 || res[1] != -HALF / 8)
> + __builtin_abort ();
> +
> + test (N, N, res, a, HALF + 10);
> + if (res[0] != 0 || res[1] != -HALF / 8 - 1)
> + __builtin_abort ();
> +
> + test (N, N, res, a, N);
> + if (res[0] != 0 || res[1] != -N / 8 + 1)
> + __builtin_abort ();
> +
> + test (-1, N, res, a, N);
> + if (res[0] != -1 || res[1] != -N / 8 + 1)
> + __builtin_abort ();
> +
> + test (-1, -N / 8, res, a, N);
> + if (res[0] != -1 || res[1] != -N / 8)
> + __builtin_abort ();
> +
> + return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_max_reduc } } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/fmaxnm_1.c b/gcc/testsuite/gcc.target/aarch64/fmaxnm_1.c
> new file mode 100644
> index 00000000000..40c36c7a3dc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/fmaxnm_1.c
> @@ -0,0 +1,24 @@
> +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
> +
> +#pragma GCC target "+nosve"
> +
> +float
> +f1 (float x, float *ptr)
> +{
> + for (int i = 0; i < 128; ++i)
> + x = __builtin_fmaxf (x, ptr[i]);
> + return x;
> +}
> +
> +double
> +f2 (double x, double *ptr)
> +{
> + for (int i = 0; i < 128; ++i)
> + x = __builtin_fmax (x, ptr[i]);
> + return x;
> +}
> +
> +/* { dg-final { scan-assembler-times {\tfmaxnm\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tfmaxnmv\ts[0-9]+, v[0-9]+\.4s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tfmaxnm\tv[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tfmaxnmp\td[0-9]+, v[0-9]+\.2d\n} 1 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/fmaxnm_2.c b/gcc/testsuite/gcc.target/aarch64/fmaxnm_2.c
> new file mode 100644
> index 00000000000..6e48ac8eeee
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/fmaxnm_2.c
> @@ -0,0 +1,20 @@
> +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
> +
> +#pragma GCC target "+nosve"
> +
> +void
> +f (double *restrict res, double *restrict ptr)
> +{
> + double x0 = res[0];
> + double x1 = res[1];
> + for (int i = 0; i < 128; i += 2)
> + {
> + x0 = __builtin_fmax (x0, ptr[i + 0]);
> + x1 = __builtin_fmax (x1, ptr[i + 1]);
> + }
> + res[0] = x0;
> + res[1] = x1;
> +}
> +
> +/* { dg-final { scan-assembler-times {\tfmaxnm\tv[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d\n} 1 } } */
> +/* { dg-final { scan-assembler {\tstr\tq[0-9]+, \[x0\]\n} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/fminnm_1.c b/gcc/testsuite/gcc.target/aarch64/fminnm_1.c
> new file mode 100644
> index 00000000000..1cf372b2a6b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/fminnm_1.c
> @@ -0,0 +1,24 @@
> +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
> +
> +#pragma GCC target "+nosve"
> +
> +float
> +f1 (float x, float *ptr)
> +{
> + for (int i = 0; i < 128; ++i)
> + x = __builtin_fminf (x, ptr[i]);
> + return x;
> +}
> +
> +double
> +f2 (double x, double *ptr)
> +{
> + for (int i = 0; i < 128; ++i)
> + x = __builtin_fmin (x, ptr[i]);
> + return x;
> +}
> +
> +/* { dg-final { scan-assembler-times {\tfminnm\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, v[0-9]+\.4s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tfminnm\tv[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tfminnmp\td[0-9]+, v[0-9]+\.2d\n} 1 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/fminnm_2.c b/gcc/testsuite/gcc.target/aarch64/fminnm_2.c
> new file mode 100644
> index 00000000000..543e1884051
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/fminnm_2.c
> @@ -0,0 +1,20 @@
> +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
> +
> +#pragma GCC target "+nosve"
> +
> +void
> +f (double *restrict res, double *restrict ptr)
> +{
> + double x0 = res[0];
> + double x1 = res[1];
> + for (int i = 0; i < 128; i += 2)
> + {
> + x0 = __builtin_fmin (x0, ptr[i + 0]);
> + x1 = __builtin_fmin (x1, ptr[i + 1]);
> + }
> + res[0] = x0;
> + res[1] = x1;
> +}
> +
> +/* { dg-final { scan-assembler-times {\tfminnm\tv[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d\n} 1 } } */
> +/* { dg-final { scan-assembler {\tstr\tq[0-9]+, \[x0\]\n} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_2.c
> new file mode 100644
> index 00000000000..ee3cdc20f96
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_2.c
> @@ -0,0 +1,22 @@
> +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
> +
> +float
> +f1 (float x, float *ptr)
> +{
> + for (int i = 0; i < 128; ++i)
> + x = __builtin_fmaxf (x, ptr[i]);
> + return x;
> +}
> +
> +double
> +f2 (double x, double *ptr)
> +{
> + for (int i = 0; i < 128; ++i)
> + x = __builtin_fmax (x, ptr[i]);
> + return x;
> +}
> +
> +/* { dg-final { scan-assembler {\twhilelo\t(p[0-7])\.s,.*\tfmaxnm\tz[0-9]+\.s, \1/m, z[0-9]+\.s, z[0-9]+\.s\n} } } */
> +/* { dg-final { scan-assembler-times {\tfmaxnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
> +/* { dg-final { scan-assembler {\twhilelo\t(p[0-7])\.d,.*\tfmaxnm\tz[0-9]+\.d, \1/m, z[0-9]+\.d, z[0-9]+\.d\n} } } */
> +/* { dg-final { scan-assembler-times {\tfmaxnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_3.c b/gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_3.c
> new file mode 100644
> index 00000000000..a8eee0f4b26
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_3.c
> @@ -0,0 +1,18 @@
> +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
> +
> +void
> +f (double *restrict res, double *restrict ptr)
> +{
> + double x0 = res[0];
> + double x1 = res[1];
> + for (int i = 0; i < 128; i += 2)
> + {
> + x0 = __builtin_fmax (x0, ptr[i + 0]);
> + x1 = __builtin_fmax (x1, ptr[i + 1]);
> + }
> + res[0] = x0;
> + res[1] = x1;
> +}
> +
> +/* { dg-final { scan-assembler {\twhilelo\t(p[0-7])\.d,.*\tfmaxnm\tz[0-9]+\.d, \1/m, z[0-9]+\.d, z[0-9]+\.d\n} } } */
> +/* { dg-final { scan-assembler-times {\tfmaxnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fminnm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fminnm_2.c
> new file mode 100644
> index 00000000000..10aced05f1a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/fminnm_2.c
> @@ -0,0 +1,22 @@
> +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
> +
> +float
> +f1 (float x, float *ptr)
> +{
> + for (int i = 0; i < 128; ++i)
> + x = __builtin_fminf (x, ptr[i]);
> + return x;
> +}
> +
> +double
> +f2 (double x, double *ptr)
> +{
> + for (int i = 0; i < 128; ++i)
> + x = __builtin_fmin (x, ptr[i]);
> + return x;
> +}
> +
> +/* { dg-final { scan-assembler {\twhilelo\t(p[0-7])\.s,.*\tfminnm\tz[0-9]+\.s, \1/m, z[0-9]+\.s, z[0-9]+\.s\n} } } */
> +/* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
> +/* { dg-final { scan-assembler {\twhilelo\t(p[0-7])\.d,.*\tfminnm\tz[0-9]+\.d, \1/m, z[0-9]+\.d, z[0-9]+\.d\n} } } */
> +/* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fminnm_3.c b/gcc/testsuite/gcc.target/aarch64/sve/fminnm_3.c
> new file mode 100644
> index 00000000000..80ad0160249
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/fminnm_3.c
> @@ -0,0 +1,18 @@
> +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
> +
> +void
> +f (double *restrict res, double *restrict ptr)
> +{
> + double x0 = res[0];
> + double x1 = res[1];
> + for (int i = 0; i < 128; i += 2)
> + {
> + x0 = __builtin_fmin (x0, ptr[i + 0]);
> + x1 = __builtin_fmin (x1, ptr[i + 1]);
> + }
> + res[0] = x0;
> + res[1] = x1;
> +}
> +
> +/* { dg-final { scan-assembler {\twhilelo\t(p[0-7])\.d,.*\tfminnm\tz[0-9]+\.d, \1/m, z[0-9]+\.d, z[0-9]+\.d\n} } } */
> +/* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index cae895a88f2..726cda05e7a 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -3185,9 +3185,22 @@ reduction_fn_for_scalar_code (code_helper code, internal_fn *reduc_fn)
> return true;
>
> default:
> - break;
> - }
> - return false;
> + return false;
> + }
> + else
> + switch (combined_fn (code))
> + {
> + CASE_CFN_FMAX:
> + *reduc_fn = IFN_REDUC_FMAX;
> + return true;
> +
> + CASE_CFN_FMIN:
> + *reduc_fn = IFN_REDUC_FMIN;
> + return true;
> +
> + default:
> + return false;
> + }
> }
>
> /* If there is a neutral value X such that a reduction would not be affected
> @@ -3223,9 +3236,18 @@ neutral_op_for_reduction (tree scalar_type, code_helper code,
> return initial_value;
>
> default:
> - break;
> + return NULL_TREE;
> + }
> + else
> + switch (combined_fn (code))
> + {
> + CASE_CFN_FMIN:
> + CASE_CFN_FMAX:
> + return initial_value;
> +
> + default:
> + return NULL_TREE;
> }
> - return NULL_TREE;
> }
>
> /* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement
> @@ -3255,9 +3277,18 @@ needs_fold_left_reduction_p (tree type, code_helper code)
> return false;
>
> default:
> - break;
> + return !flag_associative_math;
> + }
> + else
> + switch (combined_fn (code))
> + {
> + CASE_CFN_FMIN:
> + CASE_CFN_FMAX:
> + return false;
> +
> + default:
> + return !flag_associative_math;
> }
> - return !flag_associative_math;
> }
>
> if (INTEGRAL_TYPE_P (type))
> --
> 2.25.1
>
@@ -3624,8 +3624,8 @@ (define_insn "popcount<mode>2"
;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
(define_expand "reduc_<optab>_scal_<mode>"
[(match_operand:<VEL> 0 "register_operand")
- (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
- FMAXMINV)]
+ (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
+ FMAXMINV)]
"TARGET_SIMD"
{
rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
@@ -3637,6 +3637,17 @@ (define_expand "reduc_<optab>_scal_<mode>"
}
)
+(define_expand "reduc_<fmaxmin>_scal_<mode>"
+ [(match_operand:<VEL> 0 "register_operand")
+ (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
+ FMAXMINNMV)]
+ "TARGET_SIMD"
+ {
+ emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
+ DONE;
+ }
+)
+
;; Likewise for integer cases, signed and unsigned.
(define_expand "reduc_<optab>_scal_<mode>"
[(match_operand:<VEL> 0 "register_operand")
@@ -8566,6 +8566,17 @@ (define_expand "reduc_<optab>_scal_<mode>"
}
)
+(define_expand "reduc_<fmaxmin>_scal_<mode>"
+ [(match_operand:<VEL> 0 "register_operand")
+ (unspec:<VEL> [(match_operand:SVE_FULL_F 1 "register_operand")]
+ FMAXMINNMV)]
+ "TARGET_SVE"
+ {
+ emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
+ DONE;
+ }
+)
+
;; Predicated floating-point tree reductions.
(define_insn "@aarch64_pred_reduc_<optab>_<mode>"
[(set (match_operand:<VEL> 0 "register_operand" "=w")
@@ -2510,6 +2510,8 @@ (define_int_iterator MAXMINV [UNSPEC_UMAXV UNSPEC_UMINV
(define_int_iterator FMAXMINV [UNSPEC_FMAXV UNSPEC_FMINV
UNSPEC_FMAXNMV UNSPEC_FMINNMV])
+(define_int_iterator FMAXMINNMV [UNSPEC_FMAXNMV UNSPEC_FMINNMV])
+
(define_int_iterator SVE_INT_ADDV [UNSPEC_SADDV UNSPEC_UADDV])
(define_int_iterator USADDLP [UNSPEC_SADDLP UNSPEC_UADDLP])
@@ -3216,8 +3218,10 @@ (define_int_attr optab [(UNSPEC_ANDF "and")
(define_int_attr fmaxmin [(UNSPEC_FMAX "fmax_nan")
(UNSPEC_FMAXNM "fmax")
+ (UNSPEC_FMAXNMV "fmax")
(UNSPEC_FMIN "fmin_nan")
(UNSPEC_FMINNM "fmin")
+ (UNSPEC_FMINNMV "fmin")
(UNSPEC_COND_FMAXNM "fmax")
(UNSPEC_COND_FMINNM "fmin")])
@@ -5400,6 +5400,14 @@ Find the unsigned minimum/maximum of the elements of a vector. The vector is
operand 1, and operand 0 is the scalar result, with mode equal to the mode of
the elements of the input vector.
+@cindex @code{reduc_fmin_scal_@var{m}} instruction pattern
+@cindex @code{reduc_fmax_scal_@var{m}} instruction pattern
+@item @samp{reduc_fmin_scal_@var{m}}, @samp{reduc_fmax_scal_@var{m}}
+Find the floating-point minimum/maximum of the elements of a vector,
+using the same rules as @code{fmin@var{m}3} and @code{fmax@var{m}3}.
+Operand 1 is a vector of mode @var{m} and operand 0 is the scalar
+result, which has mode @code{GET_MODE_INNER (@var{m})}.
+
@cindex @code{reduc_plus_scal_@var{m}} instruction pattern
@item @samp{reduc_plus_scal_@var{m}}
Compute the sum of the elements of a vector. The vector is operand 1, and
@@ -216,6 +216,10 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (REDUC_MAX, ECF_CONST | ECF_NOTHROW, first,
reduc_smax_scal, reduc_umax_scal, unary)
DEF_INTERNAL_SIGNED_OPTAB_FN (REDUC_MIN, ECF_CONST | ECF_NOTHROW, first,
reduc_smin_scal, reduc_umin_scal, unary)
+DEF_INTERNAL_OPTAB_FN (REDUC_FMAX, ECF_CONST | ECF_NOTHROW,
+ reduc_fmax_scal, unary)
+DEF_INTERNAL_OPTAB_FN (REDUC_FMIN, ECF_CONST | ECF_NOTHROW,
+ reduc_fmin_scal, unary)
DEF_INTERNAL_OPTAB_FN (REDUC_AND, ECF_CONST | ECF_NOTHROW,
reduc_and_scal, unary)
DEF_INTERNAL_OPTAB_FN (REDUC_IOR, ECF_CONST | ECF_NOTHROW,
@@ -335,6 +335,8 @@ OPTAB_D (fmax_optab, "fmax$a3")
OPTAB_D (fmin_optab, "fmin$a3")
/* Vector reduction to a scalar. */
+OPTAB_D (reduc_fmax_scal_optab, "reduc_fmax_scal_$a")
+OPTAB_D (reduc_fmin_scal_optab, "reduc_fmin_scal_$a")
OPTAB_D (reduc_smax_scal_optab, "reduc_smax_scal_$a")
OPTAB_D (reduc_smin_scal_optab, "reduc_smin_scal_$a")
OPTAB_D (reduc_plus_scal_optab, "reduc_plus_scal_$a")
new file mode 100644
@@ -0,0 +1,83 @@
+#include "tree-vect.h"
+
+#ifndef TYPE
+#define TYPE float
+#define FN __builtin_fmaxf
+#endif
+
+TYPE __attribute__((noipa))
+test (TYPE x, TYPE *ptr, int n)
+{
+ for (int i = 0; i < n; ++i)
+ x = FN (x, ptr[i]);
+ return x;
+}
+
+#define N 128
+#define HALF (N / 2)
+
+int
+main (void)
+{
+ check_vect ();
+
+ TYPE a[N];
+
+ for (int i = 0; i < N; ++i)
+ a[i] = i;
+
+ if (test (-1, a, 1) != 0)
+ __builtin_abort ();
+ if (test (-1, a, 64) != 63)
+ __builtin_abort ();
+ if (test (-1, a, 65) != 64)
+ __builtin_abort ();
+ if (test (-1, a, 66) != 65)
+ __builtin_abort ();
+ if (test (-1, a, 67) != 66)
+ __builtin_abort ();
+ if (test (-1, a, 128) != 127)
+ __builtin_abort ();
+ if (test (127, a, 128) != 127)
+ __builtin_abort ();
+ if (test (128, a, 128) != 128)
+ __builtin_abort ();
+
+ for (int i = 0; i < N; ++i)
+ a[i] = -i;
+
+ if (test (-60, a, 4) != 0)
+ __builtin_abort ();
+ if (test (0, a, 4) != 0)
+ __builtin_abort ();
+ if (test (1, a, 4) != 1)
+ __builtin_abort ();
+
+ for (int i = 0; i < HALF; ++i)
+ {
+ a[i] = i;
+ a[HALF + i] = HALF - i;
+ }
+
+ if (test (0, a, HALF - 16) != HALF - 17)
+ __builtin_abort ();
+ if (test (0, a, HALF - 2) != HALF - 3)
+ __builtin_abort ();
+ if (test (0, a, HALF - 1) != HALF - 2)
+ __builtin_abort ();
+ if (test (0, a, HALF) != HALF - 1)
+ __builtin_abort ();
+ if (test (0, a, HALF + 1) != HALF)
+ __builtin_abort ();
+ if (test (0, a, HALF + 2) != HALF)
+ __builtin_abort ();
+ if (test (0, a, HALF + 3) != HALF)
+ __builtin_abort ();
+ if (test (0, a, HALF + 16) != HALF)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_max_reduc } } } */
new file mode 100644
@@ -0,0 +1,7 @@
+#define TYPE double
+#define FN __builtin_fmax
+
+#include "vect-fmax-1.c"
+
+/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_max_reduc } } } */
new file mode 100644
@@ -0,0 +1,83 @@
+#include "tree-vect.h"
+
+void __attribute__((noipa))
+test (double x0, double x1, double *restrict res, double *restrict ptr, int n)
+{
+ for (int i = 0; i < n; i += 2)
+ {
+ x0 = __builtin_fmax (x0, ptr[i + 0]);
+ x1 = __builtin_fmax (x1, ptr[i + 1]);
+ }
+ res[0] = x0;
+ res[1] = x1;
+}
+
+#define N 128
+#define HALF (N / 2)
+
+int
+main (void)
+{
+ check_vect ();
+
+ double res[2], a[N];
+
+ for (int i = 0; i < N; i += 2)
+ {
+ a[i] = i < HALF ? i : HALF;
+ a[i + 1] = i / 8;
+ }
+
+ test (-1, -1, res, a, 2);
+ if (res[0] != 0 || res[1] != 0)
+ __builtin_abort ();
+
+ test (-1, -1, res, a, 6);
+ if (res[0] != 4 || res[1] != 0)
+ __builtin_abort ();
+
+ test (-1, -1, res, a, 8);
+ if (res[0] != 6 || res[1] != 0)
+ __builtin_abort ();
+
+ test (-1, -1, res, a, 10);
+ if (res[0] != 8 || res[1] != 1)
+ __builtin_abort ();
+
+ test (-1, -1, res, a, HALF - 2);
+ if (res[0] != HALF - 4 || res[1] != HALF / 8 - 1)
+ __builtin_abort ();
+
+ test (-1, -1, res, a, HALF);
+ if (res[0] != HALF - 2 || res[1] != HALF / 8 - 1)
+ __builtin_abort ();
+
+ test (-1, -1, res, a, HALF + 2);
+ if (res[0] != HALF || res[1] != HALF / 8)
+ __builtin_abort ();
+
+ test (-1, -1, res, a, HALF + 8);
+ if (res[0] != HALF || res[1] != HALF / 8)
+ __builtin_abort ();
+
+ test (-1, -1, res, a, HALF + 10);
+ if (res[0] != HALF || res[1] != HALF / 8 + 1)
+ __builtin_abort ();
+
+ test (-1, -1, res, a, N);
+ if (res[0] != HALF || res[1] != N / 8 - 1)
+ __builtin_abort ();
+
+ test (HALF + 1, -1, res, a, N);
+ if (res[0] != HALF + 1 || res[1] != N / 8 - 1)
+ __builtin_abort ();
+
+ test (HALF + 1, N, res, a, N);
+ if (res[0] != HALF + 1 || res[1] != N)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_max_reduc } } } */
new file mode 100644
@@ -0,0 +1,86 @@
+#include "tree-vect.h"
+
+#ifndef TYPE
+#define TYPE float
+#define FN __builtin_fminf
+#endif
+
+TYPE __attribute__((noipa))
+test (TYPE x, TYPE *ptr, int n)
+{
+ for (int i = 0; i < n; ++i)
+ x = FN (x, ptr[i]);
+ return x;
+}
+
+#define N 128
+#define HALF (N / 2)
+
+int
+main (void)
+{
+ check_vect ();
+
+ TYPE a[N];
+
+ for (int i = 0; i < N; ++i)
+ a[i] = -i;
+
+ if (test (1, a, 1) != 0)
+ __builtin_abort ();
+ if (test (1, a, 64) != -63)
+ __builtin_abort ();
+ if (test (1, a, 65) != -64)
+ __builtin_abort ();
+ if (test (1, a, 66) != -65)
+ __builtin_abort ();
+ if (test (1, a, 67) != -66)
+ __builtin_abort ();
+ if (test (1, a, 128) != -127)
+ __builtin_abort ();
+ if (test (-127, a, 128) != -127)
+ __builtin_abort ();
+ if (test (-128, a, 128) != -128)
+ __builtin_abort ();
+
+ for (int i = 0; i < N; ++i)
+ a[i] = i;
+
+ if (test (1, a, 4) != 0)
+ __builtin_abort ();
+ if (test (0, a, 4) != 0)
+ __builtin_abort ();
+ if (test (-1, a, 4) != -1)
+ __builtin_abort ();
+
+ for (int i = 0; i < HALF; ++i)
+ {
+ a[i] = HALF - i;
+ a[HALF + i] = i;
+ }
+
+ if (test (N, a, HALF - 16) != 17)
+ __builtin_abort ();
+ if (test (N, a, HALF - 2) != 3)
+ __builtin_abort ();
+ if (test (N, a, HALF - 1) != 2)
+ __builtin_abort ();
+ if (test (N, a, HALF) != 1)
+ __builtin_abort ();
+ if (test (N, a, HALF + 1) != 0)
+ __builtin_abort ();
+ if (test (N, a, HALF + 2) != 0)
+ __builtin_abort ();
+ if (test (N, a, HALF + 3) != 0)
+ __builtin_abort ();
+ if (test (N, a, HALF + 16) != 0)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_max_reduc } } } */
+
+/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_max_reduc } } } */
new file mode 100644
@@ -0,0 +1,9 @@
+#ifndef TYPE
+#define TYPE double
+#define FN __builtin_fmin
+#endif
+
+#include "vect-fmin-1.c"
+
+/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_max_reduc } } } */
new file mode 100644
@@ -0,0 +1,83 @@
+#include "tree-vect.h"
+
+void __attribute__((noipa))
+test (double x0, double x1, double *restrict res, double *restrict ptr, int n)
+{
+ for (int i = 0; i < n; i += 2)
+ {
+ x0 = __builtin_fmin (x0, ptr[i + 0]);
+ x1 = __builtin_fmin (x1, ptr[i + 1]);
+ }
+ res[0] = x0;
+ res[1] = x1;
+}
+
+#define N 128
+#define HALF (N / 2)
+
+int
+main (void)
+{
+ check_vect ();
+
+ double res[2], a[N];
+
+ for (int i = 0; i < N; i += 2)
+ {
+ a[i] = i < HALF ? HALF - i : 0;
+ a[i + 1] = -i / 8;
+ }
+
+ test (N, N, res, a, 2);
+ if (res[0] != HALF || res[1] != 0)
+ __builtin_abort ();
+
+ test (N, N, res, a, 6);
+ if (res[0] != HALF - 4 || res[1] != 0)
+ __builtin_abort ();
+
+ test (N, N, res, a, 8);
+ if (res[0] != HALF - 6 || res[1] != 0)
+ __builtin_abort ();
+
+ test (N, N, res, a, 10);
+ if (res[0] != HALF - 8 || res[1] != -1)
+ __builtin_abort ();
+
+ test (N, N, res, a, HALF - 2);
+ if (res[0] != 4 || res[1] != -HALF / 8 + 1)
+ __builtin_abort ();
+
+ test (N, N, res, a, HALF);
+ if (res[0] != 2 || res[1] != -HALF / 8 + 1)
+ __builtin_abort ();
+
+ test (N, N, res, a, HALF + 2);
+ if (res[0] != 0 || res[1] != -HALF / 8)
+ __builtin_abort ();
+
+ test (N, N, res, a, HALF + 8);
+ if (res[0] != 0 || res[1] != -HALF / 8)
+ __builtin_abort ();
+
+ test (N, N, res, a, HALF + 10);
+ if (res[0] != 0 || res[1] != -HALF / 8 - 1)
+ __builtin_abort ();
+
+ test (N, N, res, a, N);
+ if (res[0] != 0 || res[1] != -N / 8 + 1)
+ __builtin_abort ();
+
+ test (-1, N, res, a, N);
+ if (res[0] != -1 || res[1] != -N / 8 + 1)
+ __builtin_abort ();
+
+ test (-1, -N / 8, res, a, N);
+ if (res[0] != -1 || res[1] != -N / 8)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump "Detected reduction" "vect" } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_max_reduc } } } */
new file mode 100644
@@ -0,0 +1,24 @@
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+#pragma GCC target "+nosve"
+
+float
+f1 (float x, float *ptr)
+{
+ for (int i = 0; i < 128; ++i)
+ x = __builtin_fmaxf (x, ptr[i]);
+ return x;
+}
+
+double
+f2 (double x, double *ptr)
+{
+ for (int i = 0; i < 128; ++i)
+ x = __builtin_fmax (x, ptr[i]);
+ return x;
+}
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnmv\ts[0-9]+, v[0-9]+\.4s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnm\tv[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfmaxnmp\td[0-9]+, v[0-9]+\.2d\n} 1 } } */
new file mode 100644
@@ -0,0 +1,20 @@
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+#pragma GCC target "+nosve"
+
+void
+f (double *restrict res, double *restrict ptr)
+{
+ double x0 = res[0];
+ double x1 = res[1];
+ for (int i = 0; i < 128; i += 2)
+ {
+ x0 = __builtin_fmax (x0, ptr[i + 0]);
+ x1 = __builtin_fmax (x1, ptr[i + 1]);
+ }
+ res[0] = x0;
+ res[1] = x1;
+}
+
+/* { dg-final { scan-assembler-times {\tfmaxnm\tv[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d\n} 1 } } */
+/* { dg-final { scan-assembler {\tstr\tq[0-9]+, \[x0\]\n} } } */
new file mode 100644
@@ -0,0 +1,24 @@
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+#pragma GCC target "+nosve"
+
+float
+f1 (float x, float *ptr)
+{
+ for (int i = 0; i < 128; ++i)
+ x = __builtin_fminf (x, ptr[i]);
+ return x;
+}
+
+double
+f2 (double x, double *ptr)
+{
+ for (int i = 0; i < 128; ++i)
+ x = __builtin_fmin (x, ptr[i]);
+ return x;
+}
+
+/* { dg-final { scan-assembler-times {\tfminnm\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, v[0-9]+\.4s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnm\tv[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfminnmp\td[0-9]+, v[0-9]+\.2d\n} 1 } } */
new file mode 100644
@@ -0,0 +1,20 @@
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+#pragma GCC target "+nosve"
+
+void
+f (double *restrict res, double *restrict ptr)
+{
+ double x0 = res[0];
+ double x1 = res[1];
+ for (int i = 0; i < 128; i += 2)
+ {
+ x0 = __builtin_fmin (x0, ptr[i + 0]);
+ x1 = __builtin_fmin (x1, ptr[i + 1]);
+ }
+ res[0] = x0;
+ res[1] = x1;
+}
+
+/* { dg-final { scan-assembler-times {\tfminnm\tv[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d\n} 1 } } */
+/* { dg-final { scan-assembler {\tstr\tq[0-9]+, \[x0\]\n} } } */
new file mode 100644
@@ -0,0 +1,22 @@
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+float
+f1 (float x, float *ptr)
+{
+ for (int i = 0; i < 128; ++i)
+ x = __builtin_fmaxf (x, ptr[i]);
+ return x;
+}
+
+double
+f2 (double x, double *ptr)
+{
+ for (int i = 0; i < 128; ++i)
+ x = __builtin_fmax (x, ptr[i]);
+ return x;
+}
+
+/* { dg-final { scan-assembler {\twhilelo\t(p[0-7])\.s,.*\tfmaxnm\tz[0-9]+\.s, \1/m, z[0-9]+\.s, z[0-9]+\.s\n} } } */
+/* { dg-final { scan-assembler-times {\tfmaxnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler {\twhilelo\t(p[0-7])\.d,.*\tfmaxnm\tz[0-9]+\.d, \1/m, z[0-9]+\.d, z[0-9]+\.d\n} } } */
+/* { dg-final { scan-assembler-times {\tfmaxnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+void
+f (double *restrict res, double *restrict ptr)
+{
+ double x0 = res[0];
+ double x1 = res[1];
+ for (int i = 0; i < 128; i += 2)
+ {
+ x0 = __builtin_fmax (x0, ptr[i + 0]);
+ x1 = __builtin_fmax (x1, ptr[i + 1]);
+ }
+ res[0] = x0;
+ res[1] = x1;
+}
+
+/* { dg-final { scan-assembler {\twhilelo\t(p[0-7])\.d,.*\tfmaxnm\tz[0-9]+\.d, \1/m, z[0-9]+\.d, z[0-9]+\.d\n} } } */
+/* { dg-final { scan-assembler-times {\tfmaxnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
new file mode 100644
@@ -0,0 +1,22 @@
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+float
+f1 (float x, float *ptr)
+{
+ for (int i = 0; i < 128; ++i)
+ x = __builtin_fminf (x, ptr[i]);
+ return x;
+}
+
+double
+f2 (double x, double *ptr)
+{
+ for (int i = 0; i < 128; ++i)
+ x = __builtin_fmin (x, ptr[i]);
+ return x;
+}
+
+/* { dg-final { scan-assembler {\twhilelo\t(p[0-7])\.s,.*\tfminnm\tz[0-9]+\.s, \1/m, z[0-9]+\.s, z[0-9]+\.s\n} } } */
+/* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler {\twhilelo\t(p[0-7])\.d,.*\tfminnm\tz[0-9]+\.d, \1/m, z[0-9]+\.d, z[0-9]+\.d\n} } } */
+/* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+void
+f (double *restrict res, double *restrict ptr)
+{
+ double x0 = res[0];
+ double x1 = res[1];
+ for (int i = 0; i < 128; i += 2)
+ {
+ x0 = __builtin_fmin (x0, ptr[i + 0]);
+ x1 = __builtin_fmin (x1, ptr[i + 1]);
+ }
+ res[0] = x0;
+ res[1] = x1;
+}
+
+/* { dg-final { scan-assembler {\twhilelo\t(p[0-7])\.d,.*\tfminnm\tz[0-9]+\.d, \1/m, z[0-9]+\.d, z[0-9]+\.d\n} } } */
+/* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
@@ -3185,9 +3185,22 @@ reduction_fn_for_scalar_code (code_helper code, internal_fn *reduc_fn)
return true;
default:
- break;
- }
- return false;
+ return false;
+ }
+ else
+ switch (combined_fn (code))
+ {
+ CASE_CFN_FMAX:
+ *reduc_fn = IFN_REDUC_FMAX;
+ return true;
+
+ CASE_CFN_FMIN:
+ *reduc_fn = IFN_REDUC_FMIN;
+ return true;
+
+ default:
+ return false;
+ }
}
/* If there is a neutral value X such that a reduction would not be affected
@@ -3223,9 +3236,18 @@ neutral_op_for_reduction (tree scalar_type, code_helper code,
return initial_value;
default:
- break;
+ return NULL_TREE;
+ }
+ else
+ switch (combined_fn (code))
+ {
+ CASE_CFN_FMIN:
+ CASE_CFN_FMAX:
+ return initial_value;
+
+ default:
+ return NULL_TREE;
}
- return NULL_TREE;
}
/* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement
@@ -3255,9 +3277,18 @@ needs_fold_left_reduction_p (tree type, code_helper code)
return false;
default:
- break;
+ return !flag_associative_math;
+ }
+ else
+ switch (combined_fn (code))
+ {
+ CASE_CFN_FMIN:
+ CASE_CFN_FMAX:
+ return false;
+
+ default:
+ return !flag_associative_math;
}
- return !flag_associative_math;
}
if (INTEGRAL_TYPE_P (type))