pieces-memset-21.c: Expect vzeroupper for ia32

Message ID CAMe9rOqTt6NBbkZ4N_PvzJJ1=zUPH2F958B2yLhj_vNNuxyAUg@mail.gmail.com
State New
Headers
Series pieces-memset-21.c: Expect vzeroupper for ia32 |

Commit Message

H.J. Lu Feb. 18, 2022, 6:40 p.m. UTC
  On Thu, Feb 17, 2022 at 6:32 PM Hongtao Liu via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> On Thu, Feb 17, 2022 at 9:47 PM Richard Biener via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > The x86 backend piggy-backs on mode-switching for insertion of
> > vzeroupper.  A recent improvement there was implemented in a way
> > to walk possibly the whole basic-block for all DF reg def definitions
> > in its mode_needed hook which is called for each instruction in
> > a basic-block during mode-switching local analysis.
> >
> > The following mostly reverts this improvement.  It needs to be
> > re-done in a way more consistent with a local dataflow which
> > probably means making targets aware of the state of the local
> > dataflow analysis.
> >
> > This improves compile-time of some 538.imagick_r TU from
> > 362s to 16s with -Ofast -mavx2 -fprofile-generate.
> >
> > Bootstrapped and tested on x86_64-unknown-linux-gnu, OK?
> LGTM, have talked to H.J, he also agrees.
> >
> > Thanks,
> > Richard.
> >
> > 2022-02-17  Richard Biener  <rguenther@suse.de>
> >
> >         PR target/104581
> >         * config/i386/i386.cc (ix86_avx_u128_mode_source): Remove.
> >         (ix86_avx_u128_mode_needed): Return AVX_U128_DIRTY instead
> >         of calling ix86_avx_u128_mode_source which would eventually
> >         have returned AVX_U128_ANY in some very special case.
> >
> >         * gcc.target/i386/pr101456-1.c: XFAIL.
> > ---
> >  gcc/config/i386/i386.cc                    | 78 +---------------------
> >  gcc/testsuite/gcc.target/i386/pr101456-1.c |  3 +-
> >  2 files changed, 5 insertions(+), 76 deletions(-)
> >
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index cf246e74e57..e4b42fbba6f 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -14377,80 +14377,12 @@ ix86_check_avx_upper_register (const_rtx exp)
> >
> >  static void
> >  ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
> > - {
> > -   if (ix86_check_avx_upper_register (dest))
> > +{
> > +  if (ix86_check_avx_upper_register (dest))
> >      {
> >        bool *used = (bool *) data;
> >        *used = true;
> >      }
> > - }
> > -
> > -/* For YMM/ZMM store or YMM/ZMM extract.  Return mode for the source
> > -   operand of SRC DEFs in the same basic block before INSN.  */
> > -
> > -static int
> > -ix86_avx_u128_mode_source (rtx_insn *insn, const_rtx src)
> > -{
> > -  basic_block bb = BLOCK_FOR_INSN (insn);
> > -  rtx_insn *end = BB_END (bb);
> > -
> > -  /* Return AVX_U128_DIRTY if there is no DEF in the same basic
> > -     block.  */
> > -  int status = AVX_U128_DIRTY;
> > -
> > -  for (df_ref def = DF_REG_DEF_CHAIN (REGNO (src));
> > -       def; def = DF_REF_NEXT_REG (def))
> > -    if (DF_REF_BB (def) == bb)
> > -      {
> > -       /* Ignore DEF from different basic blocks.  */
> > -       rtx_insn *def_insn = DF_REF_INSN (def);
> > -
> > -       /* Check if DEF_INSN is before INSN.  */
> > -       rtx_insn *next;
> > -       for (next = NEXT_INSN (def_insn);
> > -            next != nullptr && next != end && next != insn;
> > -            next = NEXT_INSN (next))
> > -         ;
> > -
> > -       /* Skip if DEF_INSN isn't before INSN.  */
> > -       if (next != insn)
> > -         continue;
> > -
> > -       /* Return AVX_U128_DIRTY if the source operand of DEF_INSN
> > -          isn't constant zero.  */
> > -
> > -       if (CALL_P (def_insn))
> > -         {
> > -           bool avx_upper_reg_found = false;
> > -           note_stores (def_insn,
> > -                        ix86_check_avx_upper_stores,
> > -                        &avx_upper_reg_found);
> > -
> > -           /* Return AVX_U128_DIRTY if call returns AVX.  */
> > -           if (avx_upper_reg_found)
> > -             return AVX_U128_DIRTY;
> > -
> > -           continue;
> > -         }
> > -
> > -       rtx set = single_set (def_insn);
> > -       if (!set)
> > -         return AVX_U128_DIRTY;
> > -
> > -       rtx dest = SET_DEST (set);
> > -
> > -       /* Skip if DEF_INSN is not an AVX load.  Return AVX_U128_DIRTY
> > -          if the source operand isn't constant zero.  */
> > -       if (ix86_check_avx_upper_register (dest)
> > -           && standard_sse_constant_p (SET_SRC (set),
> > -                                       GET_MODE (dest)) != 1)
> > -         return AVX_U128_DIRTY;
> > -
> > -       /* We get here only if all AVX loads are from constant zero.  */
> > -       status = AVX_U128_ANY;
> > -      }
> > -
> > -  return status;
> >  }
> >
> >  /* Return needed mode for entity in optimize_mode_switching pass.  */
> > @@ -14520,11 +14452,7 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
> >         {
> >           FOR_EACH_SUBRTX (iter, array, src, NONCONST)
> >             if (ix86_check_avx_upper_register (*iter))
> > -             {
> > -               int status = ix86_avx_u128_mode_source (insn, *iter);
> > -               if (status == AVX_U128_DIRTY)
> > -                 return status;
> > -             }
> > +             return AVX_U128_DIRTY;
> >         }
> >
> >        /* This isn't YMM/ZMM load/store.  */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr101456-1.c b/gcc/testsuite/gcc.target/i386/pr101456-1.c
> > index 803fc6e0207..7fb3a3f055c 100644
> > --- a/gcc/testsuite/gcc.target/i386/pr101456-1.c
> > +++ b/gcc/testsuite/gcc.target/i386/pr101456-1.c
> > @@ -30,4 +30,5 @@ foo3 (void)
> >    bar ();
> >  }
> >
> > -/* { dg-final { scan-assembler-not "vzeroupper" } } */
> > +/* See PR104581 for the XFAIL reason.  */
> > +/* { dg-final { scan-assembler-not "vzeroupper" { xfail *-*-* } } } */
> > --
> > 2.34.1
>

I am checking this patch.
  

Patch

From 1931cbad498e625b1e24452dcfffe02539b12224 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Fri, 18 Feb 2022 10:36:53 -0800
Subject: [PATCH] pieces-memset-21.c: Expect vzeroupper for ia32

Update gcc.target/i386/pieces-memset-21.c to expect vzeroupper for ia32
caused by

commit fe79d652c96b53384ddfa43e312cb0010251391b
Author: Richard Biener <rguenther@suse.de>
Date:   Thu Feb 17 14:40:16 2022 +0100

    target/104581 - compile-time regression in mode-switching

	PR target/104581
	* gcc.target/i386/pieces-memset-21.c: Expect vzeroupper for ia32.
---
 gcc/testsuite/gcc.target/i386/pieces-memset-21.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-21.c b/gcc/testsuite/gcc.target/i386/pieces-memset-21.c
index d87d084bf2a..4e2a7407c8a 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-21.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-21.c
@@ -11,7 +11,8 @@  foo (void)
 
 /* { dg-final { scan-assembler-times "vpxor(?:d|)\[ \\t\]+\[^\n\]*%xmm" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu(?:64|8)\[ \\t\]+\[^\n\]*%zmm" 1 } } */
-/* { dg-final { scan-assembler-not "vzeroupper" } } */
+/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "vzeroupper" { target ia32 } } } */
 /* No need to dynamically realign the stack here.  */
 /* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
 /* Nor use a frame pointer.  */
-- 
2.35.1