pieces-memset-21.c: Expect vzeroupper for ia32
Commit Message
On Thu, Feb 17, 2022 at 6:32 PM Hongtao Liu via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> On Thu, Feb 17, 2022 at 9:47 PM Richard Biener via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > The x86 backend piggy-backs on mode-switching for insertion of
> > vzeroupper. A recent improvement there was implemented in a way
> > to walk possibly the whole basic-block for all DF reg def definitions
> > in its mode_needed hook which is called for each instruction in
> > a basic-block during mode-switching local analysis.
> >
> > The following mostly reverts this improvement. It needs to be
> > re-done in a way more consistent with a local dataflow which
> > probably means making targets aware of the state of the local
> > dataflow analysis.
> >
> > This improves compile-time of some 538.imagick_r TU from
> > 362s to 16s with -Ofast -mavx2 -fprofile-generate.
> >
> > Bootstrapped and tested on x86_64-unknown-linux-gnu, OK?
> LGTM, have talked to H.J, he also agrees.
> >
> > Thanks,
> > Richard.
> >
> > 2022-02-17 Richard Biener <rguenther@suse.de>
> >
> > PR target/104581
> > * config/i386/i386.cc (ix86_avx_u128_mode_source): Remove.
> > (ix86_avx_u128_mode_needed): Return AVX_U128_DIRTY instead
> > of calling ix86_avx_u128_mode_source which would eventually
> > have returned AVX_U128_ANY in some very special case.
> >
> > * gcc.target/i386/pr101456-1.c: XFAIL.
> > ---
> > gcc/config/i386/i386.cc | 78 +---------------------
> > gcc/testsuite/gcc.target/i386/pr101456-1.c | 3 +-
> > 2 files changed, 5 insertions(+), 76 deletions(-)
> >
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index cf246e74e57..e4b42fbba6f 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -14377,80 +14377,12 @@ ix86_check_avx_upper_register (const_rtx exp)
> >
> > static void
> > ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
> > - {
> > - if (ix86_check_avx_upper_register (dest))
> > +{
> > + if (ix86_check_avx_upper_register (dest))
> > {
> > bool *used = (bool *) data;
> > *used = true;
> > }
> > - }
> > -
> > -/* For YMM/ZMM store or YMM/ZMM extract. Return mode for the source
> > - operand of SRC DEFs in the same basic block before INSN. */
> > -
> > -static int
> > -ix86_avx_u128_mode_source (rtx_insn *insn, const_rtx src)
> > -{
> > - basic_block bb = BLOCK_FOR_INSN (insn);
> > - rtx_insn *end = BB_END (bb);
> > -
> > - /* Return AVX_U128_DIRTY if there is no DEF in the same basic
> > - block. */
> > - int status = AVX_U128_DIRTY;
> > -
> > - for (df_ref def = DF_REG_DEF_CHAIN (REGNO (src));
> > - def; def = DF_REF_NEXT_REG (def))
> > - if (DF_REF_BB (def) == bb)
> > - {
> > - /* Ignore DEF from different basic blocks. */
> > - rtx_insn *def_insn = DF_REF_INSN (def);
> > -
> > - /* Check if DEF_INSN is before INSN. */
> > - rtx_insn *next;
> > - for (next = NEXT_INSN (def_insn);
> > - next != nullptr && next != end && next != insn;
> > - next = NEXT_INSN (next))
> > - ;
> > -
> > - /* Skip if DEF_INSN isn't before INSN. */
> > - if (next != insn)
> > - continue;
> > -
> > - /* Return AVX_U128_DIRTY if the source operand of DEF_INSN
> > - isn't constant zero. */
> > -
> > - if (CALL_P (def_insn))
> > - {
> > - bool avx_upper_reg_found = false;
> > - note_stores (def_insn,
> > - ix86_check_avx_upper_stores,
> > - &avx_upper_reg_found);
> > -
> > - /* Return AVX_U128_DIRTY if call returns AVX. */
> > - if (avx_upper_reg_found)
> > - return AVX_U128_DIRTY;
> > -
> > - continue;
> > - }
> > -
> > - rtx set = single_set (def_insn);
> > - if (!set)
> > - return AVX_U128_DIRTY;
> > -
> > - rtx dest = SET_DEST (set);
> > -
> > - /* Skip if DEF_INSN is not an AVX load. Return AVX_U128_DIRTY
> > - if the source operand isn't constant zero. */
> > - if (ix86_check_avx_upper_register (dest)
> > - && standard_sse_constant_p (SET_SRC (set),
> > - GET_MODE (dest)) != 1)
> > - return AVX_U128_DIRTY;
> > -
> > - /* We get here only if all AVX loads are from constant zero. */
> > - status = AVX_U128_ANY;
> > - }
> > -
> > - return status;
> > }
> >
> > /* Return needed mode for entity in optimize_mode_switching pass. */
> > @@ -14520,11 +14452,7 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
> > {
> > FOR_EACH_SUBRTX (iter, array, src, NONCONST)
> > if (ix86_check_avx_upper_register (*iter))
> > - {
> > - int status = ix86_avx_u128_mode_source (insn, *iter);
> > - if (status == AVX_U128_DIRTY)
> > - return status;
> > - }
> > + return AVX_U128_DIRTY;
> > }
> >
> > /* This isn't YMM/ZMM load/store. */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr101456-1.c b/gcc/testsuite/gcc.target/i386/pr101456-1.c
> > index 803fc6e0207..7fb3a3f055c 100644
> > --- a/gcc/testsuite/gcc.target/i386/pr101456-1.c
> > +++ b/gcc/testsuite/gcc.target/i386/pr101456-1.c
> > @@ -30,4 +30,5 @@ foo3 (void)
> > bar ();
> > }
> >
> > -/* { dg-final { scan-assembler-not "vzeroupper" } } */
> > +/* See PR104581 for the XFAIL reason. */
> > +/* { dg-final { scan-assembler-not "vzeroupper" { xfail *-*-* } } } */
> > --
> > 2.34.1
>
I am checking this patch.
From 1931cbad498e625b1e24452dcfffe02539b12224 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Fri, 18 Feb 2022 10:36:53 -0800
Subject: [PATCH] pieces-memset-21.c: Expect vzeroupper for ia32
Update gcc.target/i386/pieces-memset-21.c to expect vzeroupper for ia32
caused by
commit fe79d652c96b53384ddfa43e312cb0010251391b
Author: Richard Biener <rguenther@suse.de>
Date: Thu Feb 17 14:40:16 2022 +0100
target/104581 - compile-time regression in mode-switching
PR target/104581
* gcc.target/i386/pieces-memset-21.c: Expect vzeroupper for ia32.
---
gcc/testsuite/gcc.target/i386/pieces-memset-21.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
@@ -11,7 +11,8 @@ foo (void)
/* { dg-final { scan-assembler-times "vpxor(?:d|)\[ \\t\]+\[^\n\]*%xmm" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu(?:64|8)\[ \\t\]+\[^\n\]*%zmm" 1 } } */
-/* { dg-final { scan-assembler-not "vzeroupper" } } */
+/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "vzeroupper" { target ia32 } } } */
/* No need to dynamically realign the stack here. */
/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
/* Nor use a frame pointer. */
--
2.35.1