Don't assert for IFN_COND_{MIN, MAX} in vect_transform_reduction
Checks
Context |
Check |
Description |
linaro-tcwg-bot/tcwg_gcc_build--master-arm |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-arm |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 |
success
|
Testing passed
|
Commit Message
The Fortran standard does not specify what the result of the MAX
and MIN intrinsics are if one of the arguments is a NaN. So it
should be ok to tranform reduction for IFN_COND_MIN with vectorized
COND_MIN and REDUC_MIN.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk and backport to GCC14?
gcc/ChangeLog:
PR 114883
* tree-vect-loop.cc (vect_transform_reduction): Don't assert
for IFN_COND_{MIN, MAX}.
gcc/testsuite/ChangeLog:
* gfortran.dg/pr114883.f90: New test.
---
gcc/testsuite/gfortran.dg/pr114883.f90 | 191 +++++++++++++++++++++++++
gcc/tree-vect-loop.cc | 3 +-
2 files changed, 193 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gfortran.dg/pr114883.f90
Comments
On Mon, Apr 29, 2024 at 6:47 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> The Fortran standard does not specify what the result of the MAX
> and MIN intrinsics are if one of the arguments is a NaN. So it
> should be ok to tranform reduction for IFN_COND_MIN with vectorized
> COND_MIN and REDUC_MIN.
The commit subject isn't very clear. This patch isn't about "Don't assert
for IFN_COND_{MIN,MAX}". It allows IFN_COND_{MIN,MAX} in
vect_transform_reduction.
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk and backport to GCC14?
>
> gcc/ChangeLog:
>
> PR 114883
> * tree-vect-loop.cc (vect_transform_reduction): Don't assert
> for IFN_COND_{MIN, MAX}.
>
> gcc/testsuite/ChangeLog:
>
> * gfortran.dg/pr114883.f90: New test.
> ---
> gcc/testsuite/gfortran.dg/pr114883.f90 | 191 +++++++++++++++++++++++++
> gcc/tree-vect-loop.cc | 3 +-
> 2 files changed, 193 insertions(+), 1 deletion(-)
> create mode 100644 gcc/testsuite/gfortran.dg/pr114883.f90
>
> diff --git a/gcc/testsuite/gfortran.dg/pr114883.f90 b/gcc/testsuite/gfortran.dg/pr114883.f90
> new file mode 100644
> index 00000000000..86b664a521e
> --- /dev/null
> +++ b/gcc/testsuite/gfortran.dg/pr114883.f90
> @@ -0,0 +1,191 @@
> +! { dg-do compile }
> +! { dg-options "-O3" }
> +! { dg-additional-options "-march=x86-64-v4" { target { x86_64-*-* i?86-*-* } } }
> +
> +module ndrop
> +
> +
> + implicit none
> +
> + private
> + save
> +
> + public dropmixnuc
> +
> + real(8) :: npv(1011) ! number per volume concentration
> + real(8) :: alogsig(1011) ! natl log of geometric standard dev of aerosol
> +
> + type qqcw_type
> + real(8), pointer :: fldcw(:,:)
> + end type qqcw_type
> +
> +contains
> +
> + subroutine dropmixnuc(lchnk, ncol, temp, &
> + cldn,cldo, &
> + raer, dtmicro &
> + )
> + implicit none
> +
> + ! input
> +
> + integer, intent(in) :: lchnk ! chunk identifier
> + integer, intent(in) :: ncol ! number of columns
> + ! type(physics_state), intent(in) :: state ! Physics state variables
> + real(8), intent(in) :: dtmicro ! time step for microphysics (s)
> + real(8), intent(in) :: temp(1,1011) ! temperature (K)
> + real(8), intent(in) :: cldo(1,1011) ! cloud fraction on previous time step
> + real(8), intent(in) :: cldn(1,1011) ! cloud fraction
> + real(8), intent(in) :: raer(1,1011,1011) ! aerosol mass, number mixing ratios
> +
> +
> + type(qqcw_type) :: QQCW(1011)
> +
> + real(8) depvel(1,1011)! deposition velocity for droplets (m/s)
> + real(8) wtke(1,1011) ! turbulent vertical velocity at base of layer k (m/s)
> + real(8) wtke_cen(1,1011) ! turbulent vertical velocity at center of layer k (m/s)
> + real(8) zn(1011) ! g/pdel (m2/g) for layer
> + real(8) zs(1011) ! inverse of distance between levels (m)
> + real(8), parameter :: zkmin=0.01_8,zkmax=100._8
> + real(8) cs(1,1011) ! air density (kg/m3)
> + real(8) dz(1,1011) ! geometric thickness of layers (m)
> + real(8) zero
> +
> + real(8) wdiab ! diabatic vertical velocity
> + real(8), parameter :: wmixmin = 0.1 ! minimum turbulence vertical velocity (m/s)
> + ! real(8), parameter :: wmixmin = 0.2 ! minimum turbulence vertical velocity (m/s)
> + ! real(8), parameter :: wmixmin = 1.0 ! minimum turbulence vertical velocity (m/s)
> + real(8) ekk(0:1011) ! density*diffusivity for droplets (kg/m3 m2/s)
> + real(8), parameter :: sq2pi=2.5066283_8
> + real(8) dtinv
> +
> + integer km1,kp1
> + real(8) wbar,wmix,wmin,wmax
> + real(8) dumc
> + real(8) fac_srflx
> + real(8) surfrate(1011) ! surface exchange rate (/s)
> + real(8) surfratemax ! max surfrate for all species treated here
> + real(8) dtmin,tinv,dtt
> + integer nsubmix,nsubmix_bnd
> + integer i,k,m
> + real(8) dtmix
> + real(8) pi
> + integer nnew,nsav,ntemp
> + real(8) ekkp(1011),ekkm(1011) ! zn*zs*density*diffusivity
> + integer count_submix(100)
> + save count_submix
> + real(8) nsource(1,1011) ! droplet number source (#/kg/s)
> + real(8) ndropmix(1,1011) ! droplet number mixing (#/kg/s)
> + real(8) ndropcol(1) ! column droplet number (#/m2)
> +
> + real(8) na(1),va(1),hy(1)
> + real(8) naermod(1011) ! (/m3)
> + real(8) hygro(1011) ! hygroscopicity of aerosol mode
> + real(8) vaerosol(1011) ! interstit+activated aerosol volume conc (cm3/cm3)
> + real(8) :: taumix_internal_1011_inv ! 1/(internal mixing time scale for k=1011) (1/s)
> + real(8) :: cldo_tmp, cldn_tmp
> + real(8) :: tau_cld_regenerate
> +
> + integer ixndrop, l
> + integer, parameter :: psat=6 ! number of supersaturations to calc ccn concentration
> + real(8) :: supersat(psat)= & ! supersaturation (%) to determine ccn concentration
> + (/0.02,0.05,0.1,0.2,0.5,1.0/)
> + real(8) ccn(1,1011,psat) ! number conc of aerosols activated at supersat
> + character(len=8), dimension(psat) :: ccn_name(psat)= &
> + (/'CCN1','CCN2','CCN3','CCN4','CCN5','CCN6'/)
> + real(8) arg
> + integer phase ! phase of aerosol
> +
> +
> +
> + arg = 1.0_8
> + zero=0._8
> +
> +
> + pi = 4._8*atan(1.0_8)
> + dtinv=1./dtmicro
> +
> + depvel(:,:) = 0.0_8 ! droplet number is done in pkg_cld_sediment, aerosols in mz_aerosols_intr
> +
> + overall_main_i_loop: &
> + do i=1,ncol
> +
> + surfratemax = 0.0_8
> + nsav=1
> + nnew=2
> + surfrate(ixndrop)=depvel(i,ixndrop)/dz(i,1011)
> + surfratemax = max( surfratemax, surfrate(ixndrop) )
> + tau_cld_regenerate = 3600.0_8 * 3.0_8
> +
> + grow_shrink_main_k_loop: &
> + do k=1,1011
> + km1=max0(k-1,1)
> + kp1=min0(k+1,1011)
> +
> + cldo_tmp = cldo(i,k)
> + cldn_tmp = cldn(i,k) * exp( -dtmicro/tau_cld_regenerate )
> + cldo_tmp = cldn_tmp
> + cldn_tmp = cldn(i,k)
> +
> + if(cldn_tmp-cldo_tmp.gt.0.01)then
> + wbar=wtke_cen(i,k)
> + wmix=0._8
> + wmin=0._8
> + wmax=10._8
> + wdiab=0
> +
> + phase=1 ! interstitial
> + do m=1,1011
> + call loadaer(raer,qqcw,i,i,k,m,cs,npv(m),phase, &
> + na, va, hy )
> + naermod(m)=na(i)
> + vaerosol(m)=va(i)
> + hygro(m)=hy(i)
> + end do
> + dumc=(cldn_tmp-cldo_tmp)
> + endif
> +
> + enddo grow_shrink_main_k_loop
> +
> + ntemp=nsav
> + nsav=nnew
> + nnew=ntemp
> + dtmin=dtmicro
> + ekk(0)=0.0
> + ekk(1011)=0.0
> + do k=1,1011
> + km1=max0(k-1,1)
> + ekkp(k)=zn(k)*ekk(k)*zs(k)
> + ekkm(k)=zn(k)*ekk(k-1)*zs(km1)
> + tinv=ekkp(k)+ekkm(k)
> +
> + if(k.eq.1011)tinv=tinv+surfratemax
> + if(k.eq.1011)tinv=tinv+taumix_internal_1011_inv
> +
> + if(tinv.gt.1.e-6)then
> + dtt=1./tinv
> + dtmin=min(dtmin,dtt)
> + endif
> + enddo
> + dtmix=0.9*dtmin
> + nsubmix=dtmicro/dtmix+1
> + if(nsubmix>100)then
> + nsubmix_bnd=100
> + else
> + nsubmix_bnd=nsubmix
> + endif
> + count_submix(nsubmix_bnd)=count_submix(nsubmix_bnd)+1
> + dtmix=dtmicro/nsubmix
> + fac_srflx = -1.0/(zn(1011)*nsubmix)
> +
> +
> + ! call t_stopf ('nsubmix')
> +
> +enddo overall_main_i_loop
> +! end of main loop over i/longitude ....................................
> +
> +
> +call ccncalc(lchnk,ncol,temp,cs,raer,qqcw,ccn,psat,supersat,alogsig,npv)
> +return
> +end subroutine dropmixnuc
> +end module ndrop
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index a6cf0a5546c..29c03c246d4 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -8505,7 +8505,8 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
> {
> gcc_assert (code == IFN_COND_ADD || code == IFN_COND_SUB
> || code == IFN_COND_MUL || code == IFN_COND_AND
> - || code == IFN_COND_IOR || code == IFN_COND_XOR);
> + || code == IFN_COND_IOR || code == IFN_COND_XOR
> + || code == IFN_COND_MIN || code == IFN_COND_MAX);
> gcc_assert (op.num_ops == 4
> && (op.ops[reduc_index]
> == op.ops[internal_fn_else_index ((internal_fn) code)]));
> --
> 2.31.1
>
On Mon, Apr 29, 2024 at 5:30 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Mon, Apr 29, 2024 at 6:47 AM liuhongt <hongtao.liu@intel.com> wrote:
> >
> > The Fortran standard does not specify what the result of the MAX
> > and MIN intrinsics are if one of the arguments is a NaN. So it
> > should be ok to tranform reduction for IFN_COND_MIN with vectorized
> > COND_MIN and REDUC_MIN.
>
> The commit subject isn't very clear. This patch isn't about "Don't assert
> for IFN_COND_{MIN,MAX}". It allows IFN_COND_{MIN,MAX} in
> vect_transform_reduction.
Well, we allow it elsewhere, we just fail to enumerate all COND_* we allow
here correctly.
> > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> > Ok for trunk and backport to GCC14?
OK for trunk and branch.
Thanks,
Richard.
> >
> > gcc/ChangeLog:
> >
> > PR 114883
> > * tree-vect-loop.cc (vect_transform_reduction): Don't assert
> > for IFN_COND_{MIN, MAX}.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gfortran.dg/pr114883.f90: New test.
> > ---
> > gcc/testsuite/gfortran.dg/pr114883.f90 | 191 +++++++++++++++++++++++++
> > gcc/tree-vect-loop.cc | 3 +-
> > 2 files changed, 193 insertions(+), 1 deletion(-)
> > create mode 100644 gcc/testsuite/gfortran.dg/pr114883.f90
> >
> > diff --git a/gcc/testsuite/gfortran.dg/pr114883.f90 b/gcc/testsuite/gfortran.dg/pr114883.f90
> > new file mode 100644
> > index 00000000000..86b664a521e
> > --- /dev/null
> > +++ b/gcc/testsuite/gfortran.dg/pr114883.f90
> > @@ -0,0 +1,191 @@
> > +! { dg-do compile }
> > +! { dg-options "-O3" }
> > +! { dg-additional-options "-march=x86-64-v4" { target { x86_64-*-* i?86-*-* } } }
> > +
> > +module ndrop
> > +
> > +
> > + implicit none
> > +
> > + private
> > + save
> > +
> > + public dropmixnuc
> > +
> > + real(8) :: npv(1011) ! number per volume concentration
> > + real(8) :: alogsig(1011) ! natl log of geometric standard dev of aerosol
> > +
> > + type qqcw_type
> > + real(8), pointer :: fldcw(:,:)
> > + end type qqcw_type
> > +
> > +contains
> > +
> > + subroutine dropmixnuc(lchnk, ncol, temp, &
> > + cldn,cldo, &
> > + raer, dtmicro &
> > + )
> > + implicit none
> > +
> > + ! input
> > +
> > + integer, intent(in) :: lchnk ! chunk identifier
> > + integer, intent(in) :: ncol ! number of columns
> > + ! type(physics_state), intent(in) :: state ! Physics state variables
> > + real(8), intent(in) :: dtmicro ! time step for microphysics (s)
> > + real(8), intent(in) :: temp(1,1011) ! temperature (K)
> > + real(8), intent(in) :: cldo(1,1011) ! cloud fraction on previous time step
> > + real(8), intent(in) :: cldn(1,1011) ! cloud fraction
> > + real(8), intent(in) :: raer(1,1011,1011) ! aerosol mass, number mixing ratios
> > +
> > +
> > + type(qqcw_type) :: QQCW(1011)
> > +
> > + real(8) depvel(1,1011)! deposition velocity for droplets (m/s)
> > + real(8) wtke(1,1011) ! turbulent vertical velocity at base of layer k (m/s)
> > + real(8) wtke_cen(1,1011) ! turbulent vertical velocity at center of layer k (m/s)
> > + real(8) zn(1011) ! g/pdel (m2/g) for layer
> > + real(8) zs(1011) ! inverse of distance between levels (m)
> > + real(8), parameter :: zkmin=0.01_8,zkmax=100._8
> > + real(8) cs(1,1011) ! air density (kg/m3)
> > + real(8) dz(1,1011) ! geometric thickness of layers (m)
> > + real(8) zero
> > +
> > + real(8) wdiab ! diabatic vertical velocity
> > + real(8), parameter :: wmixmin = 0.1 ! minimum turbulence vertical velocity (m/s)
> > + ! real(8), parameter :: wmixmin = 0.2 ! minimum turbulence vertical velocity (m/s)
> > + ! real(8), parameter :: wmixmin = 1.0 ! minimum turbulence vertical velocity (m/s)
> > + real(8) ekk(0:1011) ! density*diffusivity for droplets (kg/m3 m2/s)
> > + real(8), parameter :: sq2pi=2.5066283_8
> > + real(8) dtinv
> > +
> > + integer km1,kp1
> > + real(8) wbar,wmix,wmin,wmax
> > + real(8) dumc
> > + real(8) fac_srflx
> > + real(8) surfrate(1011) ! surface exchange rate (/s)
> > + real(8) surfratemax ! max surfrate for all species treated here
> > + real(8) dtmin,tinv,dtt
> > + integer nsubmix,nsubmix_bnd
> > + integer i,k,m
> > + real(8) dtmix
> > + real(8) pi
> > + integer nnew,nsav,ntemp
> > + real(8) ekkp(1011),ekkm(1011) ! zn*zs*density*diffusivity
> > + integer count_submix(100)
> > + save count_submix
> > + real(8) nsource(1,1011) ! droplet number source (#/kg/s)
> > + real(8) ndropmix(1,1011) ! droplet number mixing (#/kg/s)
> > + real(8) ndropcol(1) ! column droplet number (#/m2)
> > +
> > + real(8) na(1),va(1),hy(1)
> > + real(8) naermod(1011) ! (/m3)
> > + real(8) hygro(1011) ! hygroscopicity of aerosol mode
> > + real(8) vaerosol(1011) ! interstit+activated aerosol volume conc (cm3/cm3)
> > + real(8) :: taumix_internal_1011_inv ! 1/(internal mixing time scale for k=1011) (1/s)
> > + real(8) :: cldo_tmp, cldn_tmp
> > + real(8) :: tau_cld_regenerate
> > +
> > + integer ixndrop, l
> > + integer, parameter :: psat=6 ! number of supersaturations to calc ccn concentration
> > + real(8) :: supersat(psat)= & ! supersaturation (%) to determine ccn concentration
> > + (/0.02,0.05,0.1,0.2,0.5,1.0/)
> > + real(8) ccn(1,1011,psat) ! number conc of aerosols activated at supersat
> > + character(len=8), dimension(psat) :: ccn_name(psat)= &
> > + (/'CCN1','CCN2','CCN3','CCN4','CCN5','CCN6'/)
> > + real(8) arg
> > + integer phase ! phase of aerosol
> > +
> > +
> > +
> > + arg = 1.0_8
> > + zero=0._8
> > +
> > +
> > + pi = 4._8*atan(1.0_8)
> > + dtinv=1./dtmicro
> > +
> > + depvel(:,:) = 0.0_8 ! droplet number is done in pkg_cld_sediment, aerosols in mz_aerosols_intr
> > +
> > + overall_main_i_loop: &
> > + do i=1,ncol
> > +
> > + surfratemax = 0.0_8
> > + nsav=1
> > + nnew=2
> > + surfrate(ixndrop)=depvel(i,ixndrop)/dz(i,1011)
> > + surfratemax = max( surfratemax, surfrate(ixndrop) )
> > + tau_cld_regenerate = 3600.0_8 * 3.0_8
> > +
> > + grow_shrink_main_k_loop: &
> > + do k=1,1011
> > + km1=max0(k-1,1)
> > + kp1=min0(k+1,1011)
> > +
> > + cldo_tmp = cldo(i,k)
> > + cldn_tmp = cldn(i,k) * exp( -dtmicro/tau_cld_regenerate )
> > + cldo_tmp = cldn_tmp
> > + cldn_tmp = cldn(i,k)
> > +
> > + if(cldn_tmp-cldo_tmp.gt.0.01)then
> > + wbar=wtke_cen(i,k)
> > + wmix=0._8
> > + wmin=0._8
> > + wmax=10._8
> > + wdiab=0
> > +
> > + phase=1 ! interstitial
> > + do m=1,1011
> > + call loadaer(raer,qqcw,i,i,k,m,cs,npv(m),phase, &
> > + na, va, hy )
> > + naermod(m)=na(i)
> > + vaerosol(m)=va(i)
> > + hygro(m)=hy(i)
> > + end do
> > + dumc=(cldn_tmp-cldo_tmp)
> > + endif
> > +
> > + enddo grow_shrink_main_k_loop
> > +
> > + ntemp=nsav
> > + nsav=nnew
> > + nnew=ntemp
> > + dtmin=dtmicro
> > + ekk(0)=0.0
> > + ekk(1011)=0.0
> > + do k=1,1011
> > + km1=max0(k-1,1)
> > + ekkp(k)=zn(k)*ekk(k)*zs(k)
> > + ekkm(k)=zn(k)*ekk(k-1)*zs(km1)
> > + tinv=ekkp(k)+ekkm(k)
> > +
> > + if(k.eq.1011)tinv=tinv+surfratemax
> > + if(k.eq.1011)tinv=tinv+taumix_internal_1011_inv
> > +
> > + if(tinv.gt.1.e-6)then
> > + dtt=1./tinv
> > + dtmin=min(dtmin,dtt)
> > + endif
> > + enddo
> > + dtmix=0.9*dtmin
> > + nsubmix=dtmicro/dtmix+1
> > + if(nsubmix>100)then
> > + nsubmix_bnd=100
> > + else
> > + nsubmix_bnd=nsubmix
> > + endif
> > + count_submix(nsubmix_bnd)=count_submix(nsubmix_bnd)+1
> > + dtmix=dtmicro/nsubmix
> > + fac_srflx = -1.0/(zn(1011)*nsubmix)
> > +
> > +
> > + ! call t_stopf ('nsubmix')
> > +
> > +enddo overall_main_i_loop
> > +! end of main loop over i/longitude ....................................
> > +
> > +
> > +call ccncalc(lchnk,ncol,temp,cs,raer,qqcw,ccn,psat,supersat,alogsig,npv)
> > +return
> > +end subroutine dropmixnuc
> > +end module ndrop
> > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> > index a6cf0a5546c..29c03c246d4 100644
> > --- a/gcc/tree-vect-loop.cc
> > +++ b/gcc/tree-vect-loop.cc
> > @@ -8505,7 +8505,8 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
> > {
> > gcc_assert (code == IFN_COND_ADD || code == IFN_COND_SUB
> > || code == IFN_COND_MUL || code == IFN_COND_AND
> > - || code == IFN_COND_IOR || code == IFN_COND_XOR);
> > + || code == IFN_COND_IOR || code == IFN_COND_XOR
> > + || code == IFN_COND_MIN || code == IFN_COND_MAX);
> > gcc_assert (op.num_ops == 4
> > && (op.ops[reduc_index]
> > == op.ops[internal_fn_else_index ((internal_fn) code)]));
> > --
> > 2.31.1
> >
>
>
> --
> H.J.
On Tue, Apr 30, 2024 at 09:30:00AM +0200, Richard Biener wrote:
> On Mon, Apr 29, 2024 at 5:30 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Mon, Apr 29, 2024 at 6:47 AM liuhongt <hongtao.liu@intel.com> wrote:
> > >
> > > The Fortran standard does not specify what the result of the MAX
> > > and MIN intrinsics are if one of the arguments is a NaN. So it
> > > should be ok to tranform reduction for IFN_COND_MIN with vectorized
> > > COND_MIN and REDUC_MIN.
> >
> > The commit subject isn't very clear. This patch isn't about "Don't assert
> > for IFN_COND_{MIN,MAX}". It allows IFN_COND_{MIN,MAX} in
> > vect_transform_reduction.
>
> Well, we allow it elsewhere, we just fail to enumerate all COND_* we allow
> here correctly.
>
> > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> > > Ok for trunk and backport to GCC14?
>
> OK for trunk and branch.
Oops, I've just sent the same patch, just with a different testcase
(reduced and which tests both the min and max).
I think the reduced testcase is better.
> > > gcc/ChangeLog:
> > >
> > > PR 114883
Missing tree-optimization/
> > > * tree-vect-loop.cc (vect_transform_reduction): Don't assert
> > > for IFN_COND_{MIN, MAX}.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > * gfortran.dg/pr114883.f90: New test.
Jakub
On Tue, Apr 30, 2024 at 3:38 PM Jakub Jelinek <jakub@redhat.com> wrote:
>
> On Tue, Apr 30, 2024 at 09:30:00AM +0200, Richard Biener wrote:
> > On Mon, Apr 29, 2024 at 5:30 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Mon, Apr 29, 2024 at 6:47 AM liuhongt <hongtao.liu@intel.com> wrote:
> > > >
> > > > The Fortran standard does not specify what the result of the MAX
> > > > and MIN intrinsics are if one of the arguments is a NaN. So it
> > > > should be ok to tranform reduction for IFN_COND_MIN with vectorized
> > > > COND_MIN and REDUC_MIN.
> > >
> > > The commit subject isn't very clear. This patch isn't about "Don't assert
> > > for IFN_COND_{MIN,MAX}". It allows IFN_COND_{MIN,MAX} in
> > > vect_transform_reduction.
> >
> > Well, we allow it elsewhere, we just fail to enumerate all COND_* we allow
> > here correctly.
> >
> > > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> > > > Ok for trunk and backport to GCC14?
> >
> > OK for trunk and branch.
>
> Oops, I've just sent the same patch, just with a different testcase
> (reduced and which tests both the min and max).
> I think the reduced testcase is better.
Yes, please commit your patch :)
>
> > > > gcc/ChangeLog:
> > > >
> > > > PR 114883
>
> Missing tree-optimization/
>
> > > > * tree-vect-loop.cc (vect_transform_reduction): Don't assert
> > > > for IFN_COND_{MIN, MAX}.
> > > >
> > > > gcc/testsuite/ChangeLog:
> > > >
> > > > * gfortran.dg/pr114883.f90: New test.
>
> Jakub
>
new file mode 100644
@@ -0,0 +1,191 @@
+! { dg-do compile }
+! { dg-options "-O3" }
+! { dg-additional-options "-march=x86-64-v4" { target { x86_64-*-* i?86-*-* } } }
+
+module ndrop
+
+
+ implicit none
+
+ private
+ save
+
+ public dropmixnuc
+
+ real(8) :: npv(1011) ! number per volume concentration
+ real(8) :: alogsig(1011) ! natl log of geometric standard dev of aerosol
+
+ type qqcw_type
+ real(8), pointer :: fldcw(:,:)
+ end type qqcw_type
+
+contains
+
+ subroutine dropmixnuc(lchnk, ncol, temp, &
+ cldn,cldo, &
+ raer, dtmicro &
+ )
+ implicit none
+
+ ! input
+
+ integer, intent(in) :: lchnk ! chunk identifier
+ integer, intent(in) :: ncol ! number of columns
+ ! type(physics_state), intent(in) :: state ! Physics state variables
+ real(8), intent(in) :: dtmicro ! time step for microphysics (s)
+ real(8), intent(in) :: temp(1,1011) ! temperature (K)
+ real(8), intent(in) :: cldo(1,1011) ! cloud fraction on previous time step
+ real(8), intent(in) :: cldn(1,1011) ! cloud fraction
+ real(8), intent(in) :: raer(1,1011,1011) ! aerosol mass, number mixing ratios
+
+
+ type(qqcw_type) :: QQCW(1011)
+
+ real(8) depvel(1,1011)! deposition velocity for droplets (m/s)
+ real(8) wtke(1,1011) ! turbulent vertical velocity at base of layer k (m/s)
+ real(8) wtke_cen(1,1011) ! turbulent vertical velocity at center of layer k (m/s)
+ real(8) zn(1011) ! g/pdel (m2/g) for layer
+ real(8) zs(1011) ! inverse of distance between levels (m)
+ real(8), parameter :: zkmin=0.01_8,zkmax=100._8
+ real(8) cs(1,1011) ! air density (kg/m3)
+ real(8) dz(1,1011) ! geometric thickness of layers (m)
+ real(8) zero
+
+ real(8) wdiab ! diabatic vertical velocity
+ real(8), parameter :: wmixmin = 0.1 ! minimum turbulence vertical velocity (m/s)
+ ! real(8), parameter :: wmixmin = 0.2 ! minimum turbulence vertical velocity (m/s)
+ ! real(8), parameter :: wmixmin = 1.0 ! minimum turbulence vertical velocity (m/s)
+ real(8) ekk(0:1011) ! density*diffusivity for droplets (kg/m3 m2/s)
+ real(8), parameter :: sq2pi=2.5066283_8
+ real(8) dtinv
+
+ integer km1,kp1
+ real(8) wbar,wmix,wmin,wmax
+ real(8) dumc
+ real(8) fac_srflx
+ real(8) surfrate(1011) ! surface exchange rate (/s)
+ real(8) surfratemax ! max surfrate for all species treated here
+ real(8) dtmin,tinv,dtt
+ integer nsubmix,nsubmix_bnd
+ integer i,k,m
+ real(8) dtmix
+ real(8) pi
+ integer nnew,nsav,ntemp
+ real(8) ekkp(1011),ekkm(1011) ! zn*zs*density*diffusivity
+ integer count_submix(100)
+ save count_submix
+ real(8) nsource(1,1011) ! droplet number source (#/kg/s)
+ real(8) ndropmix(1,1011) ! droplet number mixing (#/kg/s)
+ real(8) ndropcol(1) ! column droplet number (#/m2)
+
+ real(8) na(1),va(1),hy(1)
+ real(8) naermod(1011) ! (/m3)
+ real(8) hygro(1011) ! hygroscopicity of aerosol mode
+ real(8) vaerosol(1011) ! interstit+activated aerosol volume conc (cm3/cm3)
+ real(8) :: taumix_internal_1011_inv ! 1/(internal mixing time scale for k=1011) (1/s)
+ real(8) :: cldo_tmp, cldn_tmp
+ real(8) :: tau_cld_regenerate
+
+ integer ixndrop, l
+ integer, parameter :: psat=6 ! number of supersaturations to calc ccn concentration
+ real(8) :: supersat(psat)= & ! supersaturation (%) to determine ccn concentration
+ (/0.02,0.05,0.1,0.2,0.5,1.0/)
+ real(8) ccn(1,1011,psat) ! number conc of aerosols activated at supersat
+ character(len=8), dimension(psat) :: ccn_name(psat)= &
+ (/'CCN1','CCN2','CCN3','CCN4','CCN5','CCN6'/)
+ real(8) arg
+ integer phase ! phase of aerosol
+
+
+
+ arg = 1.0_8
+ zero=0._8
+
+
+ pi = 4._8*atan(1.0_8)
+ dtinv=1./dtmicro
+
+ depvel(:,:) = 0.0_8 ! droplet number is done in pkg_cld_sediment, aerosols in mz_aerosols_intr
+
+ overall_main_i_loop: &
+ do i=1,ncol
+
+ surfratemax = 0.0_8
+ nsav=1
+ nnew=2
+ surfrate(ixndrop)=depvel(i,ixndrop)/dz(i,1011)
+ surfratemax = max( surfratemax, surfrate(ixndrop) )
+ tau_cld_regenerate = 3600.0_8 * 3.0_8
+
+ grow_shrink_main_k_loop: &
+ do k=1,1011
+ km1=max0(k-1,1)
+ kp1=min0(k+1,1011)
+
+ cldo_tmp = cldo(i,k)
+ cldn_tmp = cldn(i,k) * exp( -dtmicro/tau_cld_regenerate )
+ cldo_tmp = cldn_tmp
+ cldn_tmp = cldn(i,k)
+
+ if(cldn_tmp-cldo_tmp.gt.0.01)then
+ wbar=wtke_cen(i,k)
+ wmix=0._8
+ wmin=0._8
+ wmax=10._8
+ wdiab=0
+
+ phase=1 ! interstitial
+ do m=1,1011
+ call loadaer(raer,qqcw,i,i,k,m,cs,npv(m),phase, &
+ na, va, hy )
+ naermod(m)=na(i)
+ vaerosol(m)=va(i)
+ hygro(m)=hy(i)
+ end do
+ dumc=(cldn_tmp-cldo_tmp)
+ endif
+
+ enddo grow_shrink_main_k_loop
+
+ ntemp=nsav
+ nsav=nnew
+ nnew=ntemp
+ dtmin=dtmicro
+ ekk(0)=0.0
+ ekk(1011)=0.0
+ do k=1,1011
+ km1=max0(k-1,1)
+ ekkp(k)=zn(k)*ekk(k)*zs(k)
+ ekkm(k)=zn(k)*ekk(k-1)*zs(km1)
+ tinv=ekkp(k)+ekkm(k)
+
+ if(k.eq.1011)tinv=tinv+surfratemax
+ if(k.eq.1011)tinv=tinv+taumix_internal_1011_inv
+
+ if(tinv.gt.1.e-6)then
+ dtt=1./tinv
+ dtmin=min(dtmin,dtt)
+ endif
+ enddo
+ dtmix=0.9*dtmin
+ nsubmix=dtmicro/dtmix+1
+ if(nsubmix>100)then
+ nsubmix_bnd=100
+ else
+ nsubmix_bnd=nsubmix
+ endif
+ count_submix(nsubmix_bnd)=count_submix(nsubmix_bnd)+1
+ dtmix=dtmicro/nsubmix
+ fac_srflx = -1.0/(zn(1011)*nsubmix)
+
+
+ ! call t_stopf ('nsubmix')
+
+enddo overall_main_i_loop
+! end of main loop over i/longitude ....................................
+
+
+call ccncalc(lchnk,ncol,temp,cs,raer,qqcw,ccn,psat,supersat,alogsig,npv)
+return
+end subroutine dropmixnuc
+end module ndrop
@@ -8505,7 +8505,8 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
{
gcc_assert (code == IFN_COND_ADD || code == IFN_COND_SUB
|| code == IFN_COND_MUL || code == IFN_COND_AND
- || code == IFN_COND_IOR || code == IFN_COND_XOR);
+ || code == IFN_COND_IOR || code == IFN_COND_XOR
+ || code == IFN_COND_MIN || code == IFN_COND_MAX);
gcc_assert (op.num_ops == 4
&& (op.ops[reduc_index]
== op.ops[internal_fn_else_index ((internal_fn) code)]));