vect: Adjust vect_transform_reduction assertion [PR114883]
Checks
Commit Message
Hi!
The assertion doesn't allow IFN_COND_MIN/IFN_COND_MAX, which are
commutative conditional binary operations like ADD/MUL/AND/IOR/XOR,
and can be handled just fine.
In particular, we emit
vminpd %zmm3, %zmm5, %zmm0{%k2}
vminpd %zmm0, %zmm3, %zmm5{%k1}
and
vmaxpd %zmm3, %zmm5, %zmm0{%k2}
vmaxpd %zmm0, %zmm3, %zmm5{%k1}
in the vectorized loops of the first and second subroutine.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk and
14.1?
2024-04-30 Jakub Jelinek <jakub@redhat.com>
Hongtao Liu <hongtao.liu@intel.com>
PR tree-optimization/114883
* tree-vect-loop.cc (vect_transform_reduction): Allow IFN_COND_MIN and
IFN_COND_MAX in the assert.
* gfortran.dg/pr114883.f90: New test.
Jakub
Comments
On Tue, 30 Apr 2024, Jakub Jelinek wrote:
> Hi!
>
> The assertion doesn't allow IFN_COND_MIN/IFN_COND_MAX, which are
> commutative conditional binary operations like ADD/MUL/AND/IOR/XOR,
> and can be handled just fine.
> In particular, we emit
> vminpd %zmm3, %zmm5, %zmm0{%k2}
> vminpd %zmm0, %zmm3, %zmm5{%k1}
> and
> vmaxpd %zmm3, %zmm5, %zmm0{%k2}
> vmaxpd %zmm0, %zmm3, %zmm5{%k1}
> in the vectorized loops of the first and second subroutine.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk and
> 14.1?
OK for both.
Richard.
> 2024-04-30 Jakub Jelinek <jakub@redhat.com>
> Hongtao Liu <hongtao.liu@intel.com>
>
> PR tree-optimization/114883
> * tree-vect-loop.cc (vect_transform_reduction): Allow IFN_COND_MIN and
> IFN_COND_MAX in the assert.
>
> * gfortran.dg/pr114883.f90: New test.
>
> --- gcc/tree-vect-loop.cc.jj 2024-04-17 11:34:02.465185397 +0200
> +++ gcc/tree-vect-loop.cc 2024-04-29 20:41:04.973723992 +0200
> @@ -8505,7 +8505,8 @@ vect_transform_reduction (loop_vec_info
> {
> gcc_assert (code == IFN_COND_ADD || code == IFN_COND_SUB
> || code == IFN_COND_MUL || code == IFN_COND_AND
> - || code == IFN_COND_IOR || code == IFN_COND_XOR);
> + || code == IFN_COND_IOR || code == IFN_COND_XOR
> + || code == IFN_COND_MIN || code == IFN_COND_MAX);
> gcc_assert (op.num_ops == 4
> && (op.ops[reduc_index]
> == op.ops[internal_fn_else_index ((internal_fn) code)]));
> --- gcc/testsuite/gfortran.dg/pr114883.f90.jj 2024-04-29 20:39:39.000871849 +0200
> +++ gcc/testsuite/gfortran.dg/pr114883.f90 2024-04-29 20:39:27.757021972 +0200
> @@ -0,0 +1,53 @@
> +! PR tree-optimization/114883
> +! { dg-do compile }
> +! { dg-options "-O2 -fvect-cost-model=cheap" }
> +! { dg-additional-options "-march=x86-64-v4" { target i?86-*-* x86_64-*-* } }
> +
> +subroutine pr114883_1(a, b, c, d, e, f, g, h, o)
> + real(8) :: c(1011), d(1011), e(0:1011)
> + real(8) :: p, q, f, r, g(1011), h(1011), b, bar
> + integer :: o(100), a, t, u
> + p = 0.0_8
> + r = bar()
> + u = 1
> + do i = 1,a
> + do k = 1,1011
> + km1 = max0(k-1,1)
> + h(k) = c(k) * e(k-1) * d(km1)
> + f = g(k) + h(k)
> + if(f.gt.1.e-6)then
> + p = min(p,r)
> + endif
> + end do
> + q = 0.9_8 * p
> + t = integer(b/q + 1)
> + if(t>100)then
> + u = t
> + endif
> + o(u) = o(u) + 1
> + end do
> +end subroutine pr114883_1
> +subroutine pr114883_2(a, b, c, d, e, f, g, h, o)
> + real(8) :: c(1011), d(1011), e(0:1011)
> + real(8) :: p, q, f, r, g(1011), h(1011), b, bar
> + integer :: o(100), a, t, u
> + p = 0.0_8
> + r = bar()
> + u = 1
> + do i = 1,a
> + do k = 1,1011
> + km1 = max0(k-1,1)
> + h(k) = c(k) * e(k-1) * d(km1)
> + f = g(k) + h(k)
> + if(f.gt.1.e-6)then
> + p = max(p,r)
> + endif
> + end do
> + q = 0.9_8 * p
> + t = integer(b/q + 1)
> + if(t>100)then
> + u = t
> + endif
> + o(u) = o(u) + 1
> + end do
> +end subroutine pr114883_2
>
> Jakub
>
>
@@ -8505,7 +8505,8 @@ vect_transform_reduction (loop_vec_info
{
gcc_assert (code == IFN_COND_ADD || code == IFN_COND_SUB
|| code == IFN_COND_MUL || code == IFN_COND_AND
- || code == IFN_COND_IOR || code == IFN_COND_XOR);
+ || code == IFN_COND_IOR || code == IFN_COND_XOR
+ || code == IFN_COND_MIN || code == IFN_COND_MAX);
gcc_assert (op.num_ops == 4
&& (op.ops[reduc_index]
== op.ops[internal_fn_else_index ((internal_fn) code)]));
@@ -0,0 +1,53 @@
+! PR tree-optimization/114883
+! { dg-do compile }
+! { dg-options "-O2 -fvect-cost-model=cheap" }
+! { dg-additional-options "-march=x86-64-v4" { target i?86-*-* x86_64-*-* } }
+
+subroutine pr114883_1(a, b, c, d, e, f, g, h, o)
+ real(8) :: c(1011), d(1011), e(0:1011)
+ real(8) :: p, q, f, r, g(1011), h(1011), b, bar
+ integer :: o(100), a, t, u
+ p = 0.0_8
+ r = bar()
+ u = 1
+ do i = 1,a
+ do k = 1,1011
+ km1 = max0(k-1,1)
+ h(k) = c(k) * e(k-1) * d(km1)
+ f = g(k) + h(k)
+ if(f.gt.1.e-6)then
+ p = min(p,r)
+ endif
+ end do
+ q = 0.9_8 * p
+ t = integer(b/q + 1)
+ if(t>100)then
+ u = t
+ endif
+ o(u) = o(u) + 1
+ end do
+end subroutine pr114883_1
+subroutine pr114883_2(a, b, c, d, e, f, g, h, o)
+ real(8) :: c(1011), d(1011), e(0:1011)
+ real(8) :: p, q, f, r, g(1011), h(1011), b, bar
+ integer :: o(100), a, t, u
+ p = 0.0_8
+ r = bar()
+ u = 1
+ do i = 1,a
+ do k = 1,1011
+ km1 = max0(k-1,1)
+ h(k) = c(k) * e(k-1) * d(km1)
+ f = g(k) + h(k)
+ if(f.gt.1.e-6)then
+ p = max(p,r)
+ endif
+ end do
+ q = 0.9_8 * p
+ t = integer(b/q + 1)
+ if(t>100)then
+ u = t
+ endif
+ o(u) = o(u) + 1
+ end do
+end subroutine pr114883_2