[v3] x86: Disable SSE on unwind-c.c and unwind-dw2.c
Commit Message
Since eh_return doesn't work with stack realignment, disable SSE on
unwind-c.c and unwind-dw2.c to avoid stack realignment with the 4-byte
incoming stack to avoid SSE usage which is caused by
commit 609e8c492d62d92465460eae3d43dfc4b2c68288
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Sat Feb 26 14:17:23 2022 -0800
x86: Always return pseudo register in ix86_gen_scratch_sse_rtx
when pseudo vector registers are used to expand memset.
gcc/
PR target/104781
* config/i386/i386.cc (ix86_expand_epilogue): Sorry if there is
stack realignment with eh_return or regparm nested function.
gcc/testsuite/
PR target/104781
* gcc.target/i386/eh_return-1.c: Add -mincoming-stack-boundary=4.
* gcc.target/i386/eh_return-2.c: Likewise.
libgcc/
PR target/104781
* config.host (tmake_file): Add i386/32/t-eh-return-no-sse for
32-bit x86 Cygwin, MinGW and Solaris.
* config/i386/32/t-eh-return-no-sse: New file.
---
gcc/config/i386/i386.cc | 11 +++++++----
gcc/testsuite/gcc.target/i386/eh_return-1.c | 2 +-
gcc/testsuite/gcc.target/i386/eh_return-2.c | 2 +-
libgcc/config.host | 13 +++++++++++++
libgcc/config/i386/32/t-eh-return-no-sse | 5 +++++
5 files changed, 27 insertions(+), 6 deletions(-)
create mode 100644 libgcc/config/i386/32/t-eh-return-no-sse
Comments
On Mon, Mar 07, 2022 at 07:06:28AM -0800, H.J. Lu wrote:
> Since eh_return doesn't work with stack realignment, disable SSE on
> unwind-c.c and unwind-dw2.c to avoid stack realignment with the 4-byte
> incoming stack to avoid SSE usage which is caused by
>
> commit 609e8c492d62d92465460eae3d43dfc4b2c68288
> Author: H.J. Lu <hjl.tools@gmail.com>
> Date: Sat Feb 26 14:17:23 2022 -0800
>
> x86: Always return pseudo register in ix86_gen_scratch_sse_rtx
>
> when pseudo vector registers are used to expand memset.
> PR target/104781
> * config.host (tmake_file): Add i386/32/t-eh-return-no-sse for
> 32-bit x86 Cygwin, MinGW and Solaris.
> * config/i386/32/t-eh-return-no-sse: New file.
For this, isn't the right fix instead something like:
--- gcc/config/i386/i386.h.jj 2022-02-25 12:06:45.535493490 +0100
+++ gcc/config/i386/i386.h 2022-03-08 11:20:43.207043370 +0100
@@ -2848,6 +2848,10 @@ extern enum attr_cpu ix86_schedule;
#define NUM_X86_64_MS_CLOBBERED_REGS 12
#endif
+/* __builtin_eh_return can't handle stack realignment, so disable SSE in
+ libgcc functions that call it. */
+#define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse")))
+
/*
Local variables:
version-control: t
As mentioned in PR104838, this likely isn't specific to just Solaris and
cygwin/mingw. Fedora uses -msse2 -mfpmath=sse -mstackrealign in its C{,XX}FLAGS
among other things for i686.
Jakub
On Tue, Mar 08, 2022 at 11:23:51AM +0100, Jakub Jelinek via Gcc-patches wrote:
> On Mon, Mar 07, 2022 at 07:06:28AM -0800, H.J. Lu wrote:
> > Since eh_return doesn't work with stack realignment, disable SSE on
> > unwind-c.c and unwind-dw2.c to avoid stack realignment with the 4-byte
> > incoming stack to avoid SSE usage which is caused by
> >
> > commit 609e8c492d62d92465460eae3d43dfc4b2c68288
> > Author: H.J. Lu <hjl.tools@gmail.com>
> > Date: Sat Feb 26 14:17:23 2022 -0800
> >
> > x86: Always return pseudo register in ix86_gen_scratch_sse_rtx
> >
> > when pseudo vector registers are used to expand memset.
>
> > PR target/104781
> > * config.host (tmake_file): Add i386/32/t-eh-return-no-sse for
> > 32-bit x86 Cygwin, MinGW and Solaris.
> > * config/i386/32/t-eh-return-no-sse: New file.
>
> For this, isn't the right fix instead something like:
>
> --- gcc/config/i386/i386.h.jj 2022-02-25 12:06:45.535493490 +0100
> +++ gcc/config/i386/i386.h 2022-03-08 11:20:43.207043370 +0100
> @@ -2848,6 +2848,10 @@ extern enum attr_cpu ix86_schedule;
> #define NUM_X86_64_MS_CLOBBERED_REGS 12
> #endif
>
> +/* __builtin_eh_return can't handle stack realignment, so disable SSE in
> + libgcc functions that call it. */
> +#define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse")))
> +
> /*
> Local variables:
> version-control: t
>
>
> As mentioned in PR104838, this likely isn't specific to just Solaris and
> cygwin/mingw. Fedora uses -msse2 -mfpmath=sse -mstackrealign in its C{,XX}FLAGS
> among other things for i686.
Now verified that with your full patch applied gcc on Fedora/i686 doesn't
build (gets those sorry messages when compiling unwind-dw2), while if I
replace those 2 libgcc hunks from your patch with the above one it seems to
get past the previous hanging point of gnat1 processes.
Jakub
On Tue, Mar 08, 2022 at 12:15:15PM +0100, Jakub Jelinek via Gcc-patches wrote:
> > --- gcc/config/i386/i386.h.jj 2022-02-25 12:06:45.535493490 +0100
> > +++ gcc/config/i386/i386.h 2022-03-08 11:20:43.207043370 +0100
> > @@ -2848,6 +2848,10 @@ extern enum attr_cpu ix86_schedule;
> > #define NUM_X86_64_MS_CLOBBERED_REGS 12
> > #endif
> >
> > +/* __builtin_eh_return can't handle stack realignment, so disable SSE in
> > + libgcc functions that call it. */
> > +#define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse")))
> > +
> > /*
> > Local variables:
> > version-control: t
> >
> >
> > As mentioned in PR104838, this likely isn't specific to just Solaris and
> > cygwin/mingw. Fedora uses -msse2 -mfpmath=sse -mstackrealign in its C{,XX}FLAGS
> > among other things for i686.
>
> Now verified that with your full patch applied gcc on Fedora/i686 doesn't
> build (gets those sorry messages when compiling unwind-dw2), while if I
> replace those 2 libgcc hunks from your patch with the above one it seems to
> get past the previous hanging point of gnat1 processes.
Though, perhaps it should be
#ifndef __x86_64__
#define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse")))
#endif
or something similar, on x86-64 one at least normally doesn't use lower
stack realignment unless avx or later. Maybe we want to use
no-avx for the x86-64 case though.
Disabling sse/sse2 might be a problem especially on mingw where we need to
restore SSE registers in the EH return, no?
Even better would be to make __builtin_eh_return work even with DRAP,
but I admit I haven't understood what exactly is the problem that prevents
it from working.
Jakub
> Disabling sse/sse2 might be a problem especially on mingw where we need to
> restore SSE registers in the EH return, no?
Not in 32-bit mode I think, all XMM registers are call used.
On Tue, Mar 8, 2022 at 4:29 AM Jakub Jelinek <jakub@redhat.com> wrote:
>
> On Tue, Mar 08, 2022 at 12:15:15PM +0100, Jakub Jelinek via Gcc-patches wrote:
> > > --- gcc/config/i386/i386.h.jj 2022-02-25 12:06:45.535493490 +0100
> > > +++ gcc/config/i386/i386.h 2022-03-08 11:20:43.207043370 +0100
> > > @@ -2848,6 +2848,10 @@ extern enum attr_cpu ix86_schedule;
> > > #define NUM_X86_64_MS_CLOBBERED_REGS 12
> > > #endif
> > >
> > > +/* __builtin_eh_return can't handle stack realignment, so disable SSE in
> > > + libgcc functions that call it. */
> > > +#define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse")))
> > > +
> > > /*
> > > Local variables:
> > > version-control: t
> > >
> > >
> > > As mentioned in PR104838, this likely isn't specific to just Solaris and
> > > cygwin/mingw. Fedora uses -msse2 -mfpmath=sse -mstackrealign in its C{,XX}FLAGS
> > > among other things for i686.
> >
> > Now verified that with your full patch applied gcc on Fedora/i686 doesn't
> > build (gets those sorry messages when compiling unwind-dw2), while if I
> > replace those 2 libgcc hunks from your patch with the above one it seems to
> > get past the previous hanging point of gnat1 processes.
>
> Though, perhaps it should be
> #ifndef __x86_64__
> #define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse")))
> #endif
> or something similar, on x86-64 one at least normally doesn't use lower
> stack realignment unless avx or later. Maybe we want to use
> no-avx for the x86-64 case though.
I have verified that AVX and AVX512 have no issues on x86-64. In 32-bit,
-mstackrealign triggers the problem.
> Disabling sse/sse2 might be a problem especially on mingw where we need to
> restore SSE registers in the EH return, no?
No, it isn't needed.
> Even better would be to make __builtin_eh_return work even with DRAP,
> but I admit I haven't understood what exactly is the problem that prevents
> it from working.
The EH return is a very special case. Disable SSE in 32-bit is the simplest
way to make the EH return to work.
On Tue, Mar 08, 2022 at 07:37:17AM -0800, H.J. Lu wrote:
> > Though, perhaps it should be
> > #ifndef __x86_64__
> > #define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse")))
> > #endif
> > or something similar, on x86-64 one at least normally doesn't use lower
> > stack realignment unless avx or later. Maybe we want to use
> > no-avx for the x86-64 case though.
>
> I have verified that AVX and AVX512 have no issues on x86-64. In 32-bit,
> -mstackrealign triggers the problem.
I bet it would be a problem if we started vectorizing something in there
using avx/avx2/avx512*. But given the sorry, I think we'd find that out
immediately.
> > Disabling sse/sse2 might be a problem especially on mingw where we need to
> > restore SSE registers in the EH return, no?
>
> No, it isn't needed.
I meant for 64-bit where I think the Windows ABI preserves some XMM regs
(low 128-bits of them). So my earlier patch to just define
LIBGCC2_UNWIND_ATTRIBUTE unconditionally would be wrong for it.
> > Even better would be to make __builtin_eh_return work even with DRAP,
> > but I admit I haven't understood what exactly is the problem that prevents
> > it from working.
>
> The EH return is a very special case. Disable SSE in 32-bit is the simplest
> way to make the EH return to work.
Ok. So, what do you think about replacing the libgcc/ part of your patch
with that
/* __builtin_eh_return can't handle stack realignment, so disable SSE in
32-bit libgcc functions that call it. */
#ifndef __x86_64__
#define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse")))
#endif
?
I'm bootstrapping/regtesting such a patch right now (because I needed some
quick fix for the gnat1 hangs).
Jakub
On Tue, Mar 8, 2022 at 7:46 AM Jakub Jelinek <jakub@redhat.com> wrote:
>
> On Tue, Mar 08, 2022 at 07:37:17AM -0800, H.J. Lu wrote:
> > > Though, perhaps it should be
> > > #ifndef __x86_64__
> > > #define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse")))
> > > #endif
> > > or something similar, on x86-64 one at least normally doesn't use lower
> > > stack realignment unless avx or later. Maybe we want to use
> > > no-avx for the x86-64 case though.
> >
> > I have verified that AVX and AVX512 have no issues on x86-64. In 32-bit,
> > -mstackrealign triggers the problem.
>
> I bet it would be a problem if we started vectorizing something in there
> using avx/avx2/avx512*. But given the sorry, I think we'd find that out
YMM and ZMM can be used to expand memset with -march=native.
It works fine on Linux. No stack realignment is needed.
> immediately.
True.
> > > Disabling sse/sse2 might be a problem especially on mingw where we need to
> > > restore SSE registers in the EH return, no?
> >
> > No, it isn't needed.
>
> I meant for 64-bit where I think the Windows ABI preserves some XMM regs
Does it need to realign the stack?
> (low 128-bits of them). So my earlier patch to just define
> LIBGCC2_UNWIND_ATTRIBUTE unconditionally would be wrong for it.
>
> > > Even better would be to make __builtin_eh_return work even with DRAP,
> > > but I admit I haven't understood what exactly is the problem that prevents
> > > it from working.
> >
> > The EH return is a very special case. Disable SSE in 32-bit is the simplest
> > way to make the EH return to work.
>
> Ok. So, what do you think about replacing the libgcc/ part of your patch
> with that
> /* __builtin_eh_return can't handle stack realignment, so disable SSE in
> 32-bit libgcc functions that call it. */
> #ifndef __x86_64__
> #define LIBGCC2_UNWIND_ATTRIBUTE __attribute__((target ("no-sse")))
> #endif
> ?
Yes, it should work.
Thanks.
> I'm bootstrapping/regtesting such a patch right now (because I needed some
> quick fix for the gnat1 hangs).
>
> Jakub
>
@@ -9444,12 +9444,15 @@ ix86_expand_epilogue (int style)
rtx sa = EH_RETURN_STACKADJ_RTX;
rtx_insn *insn;
- /* %ecx can't be used for both DRAP register and eh_return. */
- if (crtl->drap_reg)
- gcc_assert (REGNO (crtl->drap_reg) != CX_REG);
+ /* Stack realignment doesn't work with eh_return. */
+ if (crtl->stack_realign_needed)
+ sorry ("Stack realignment not supported with "
+ "%<__builtin_eh_return%>");
/* regparm nested functions don't work with eh_return. */
- gcc_assert (!ix86_static_chain_on_stack);
+ if (ix86_static_chain_on_stack)
+ sorry ("regparm nested function not supported with "
+ "%<__builtin_eh_return%>");
if (frame_pointer_needed)
{
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -march=haswell -mno-avx512f -mtune-ctrl=avx256_move_by_pieces" } */
+/* { dg-options "-O2 -mincoming-stack-boundary=4 -march=haswell -mno-avx512f -mtune-ctrl=avx256_move_by_pieces" } */
struct _Unwind_Context
{
@@ -1,6 +1,6 @@
/* PR target/101772 */
/* { dg-do compile } */
-/* { dg-additional-options "-O0 -march=x86-64 -mstackrealign" } */
+/* { dg-additional-options "-O0 -mincoming-stack-boundary=4 -march=x86-64 -mstackrealign" } */
struct _Unwind_Context _Unwind_Resume_or_Rethrow_this_context;
@@ -1583,3 +1583,16 @@ case ${host} in
tmake_file="$tmake_file t-gthr-noweak"
;;
esac
+
+case ${host} in
+i[34567]86-*-* | x86_64-*-*)
+ if test "${host_address}" = 32; then
+ case ${host} in
+ *-*-cygwin* | *-*-mingw* | *-*-solaris2*)
+ # Disable SSE on unwind-c.c and unwind-dw2.c to avoid stack
+ # realignment with the 4-byte aligned incoming stack.
+ tmake_file="${tmake_file} i386/${host_address}/t-eh-return-no-sse"
+ ;;
+ esac
+ fi
+esac
new file mode 100644
@@ -0,0 +1,5 @@
+# Since eh_return doesn't work with stack realignment, disable SSE on
+# unwind-c.c and unwind-dw2.c to avoid stack realignment with the 4-byte
+# incoming stack.
+
+unwind-c.o unwind-c_s.o unwind-dw2.o unwind-dw2_s.o : HOST_LIBGCC2_CFLAGS += -mno-sse