hurd: save xstate during signal handling

Message ID 20250319171118.142163-1-luca@orpolo.org (mailing list archive)
State New
Headers
Series hurd: save xstate during signal handling |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
redhat-pt-bot/TryBot-32bit success Build for i686
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_glibc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 success Test passed
linaro-tcwg-bot/tcwg_glibc_check--master-arm success Test passed
redhat-pt-bot/TryBot-still_applies warning Patch no longer applies to master

Commit Message

Luca Dariz March 19, 2025, 5:11 p.m. UTC
  * hurd/Makefile: add new tests
* hurd/test-sig-rpc-interrupted.c: check xstate save and restore in
  the case where a signal is delivered to a thread which is waiting
  for an rpc. This test implements the rpc interruption protocol used
  by the hurd servers. It was so far passing on Debian thanks to the
  local-intr-msg-clobber.diff patch, which is now obsolete.
* hurd/test-sig-xstate.c: check xstate save and restore in the case
  where a signal is delivered to a running thread, making sure that
  the xstate is modified in the signal handler.
* hurd/test-xstate.h: add helpers to test xstate
* sysdeps/mach/hurd/i386/bits/sigcontext.h: add xstate to the
  sigcontext structure.
+ sysdeps/mach/hurd/i386/sigreturn.c: restore xstate from the saved
  context
* sysdeps/mach/hurd/x86/trampoline.c: save xstate if
  supported. Otherwise we fall back to the previous behaviour of
  ignoring xstate.
* sysdeps/mach/hurd/x86_64/bits/sigcontext.h: add xstate to the
  sigcontext structure.
* sysdeps/mach/hurd/x86_64/sigreturn.c: restore xstate from the saved
  context

Signed-off-by: Luca Dariz <luca@orpolo.org>
---
 hurd/Makefile                              |   5 +
 hurd/test-sig-rpc-interrupted.c            | 185 +++++++++++++++++++++
 hurd/test-sig-xstate.c                     |  94 +++++++++++
 hurd/test-xstate.h                         |  40 +++++
 sysdeps/mach/hurd/i386/bits/sigcontext.h   |   2 +
 sysdeps/mach/hurd/i386/sigreturn.c         |  27 ++-
 sysdeps/mach/hurd/x86/trampoline.c         |  79 +++++++--
 sysdeps/mach/hurd/x86_64/bits/sigcontext.h |   2 +
 sysdeps/mach/hurd/x86_64/sigreturn.c       |  27 ++-
 9 files changed, 435 insertions(+), 26 deletions(-)
 create mode 100644 hurd/test-sig-rpc-interrupted.c
 create mode 100644 hurd/test-sig-xstate.c
 create mode 100644 hurd/test-xstate.h
  

Comments

Samuel Thibault April 11, 2025, 12:27 a.m. UTC | #1
Hello,

Sorry it took me long to manage to fine time to look at this...

Luca Dariz, le mer. 19 mars 2025 18:11:18 +0100, a ecrit:
> diff --git a/sysdeps/mach/hurd/i386/sigreturn.c b/sysdeps/mach/hurd/i386/sigreturn.c
> index ce8df8d02b..618cb74196 100644
> --- a/sysdeps/mach/hurd/i386/sigreturn.c
> +++ b/sysdeps/mach/hurd/i386/sigreturn.c
> @@ -21,6 +21,8 @@
>  #include <stdlib.h>
>  #include <string.h>
>  
> +#include <cpuid.h>
> +
>  /* This is run on the thread stack after restoring it, to be able to
>     unlock SS off sigstack.  */
>  static void
> @@ -123,10 +125,27 @@ __sigreturn (struct sigcontext *scp)
>    if (scp->sc_onstack)
>      ss->sigaltstack.ss_flags &= ~SS_ONSTACK;
>  
> -  if (scp->sc_fpused)
> -    /* Restore the FPU state.  Mach conveniently stores the state
> -       in the format the i387 `frstor' instruction uses to restore it.  */
> -    asm volatile ("frstor %0" : : "m" (scp->sc_fpsave));
> +#ifdef i386_XFLOAT_STATE
> +  if ((scp->xstate) && (scp->xstate->initialized))

else?

> +    {
> +      unsigned eax, ebx, ecx, edx;
> +      __cpuid_count(0xd, 0, eax, ebx, ecx, edx);
> +      switch (scp->xstate->fp_save_kind)
> +        {
> +        case 0: // FNSAVE
> +          asm volatile("frstor %0" : : "m" (scp->xstate->hw_state));
> +          break;
> +        case 1: // FXSAVE
> +          asm volatile("fxrstor %0" : : "m" (scp->xstate->hw_state),    \
> +                       "a" (eax), "d" (edx));
> +          break;
> +        default: // XSAVE
> +          asm volatile("xrstor %0" : : "m" (scp->xstate->hw_state),     \
> +                       "a" (eax), "d" (edx));
> +          break;

There is also FP_XSAVES which should use xrstors. Better also explicit
FP_XSAVEOPT FP_XSAVEC as using xrstor too.

(and similar in x86_64)

> +        }
> +    }
> +#endif
>  
>    {
>      /* There are convenient instructions to pop state off the stack, so we
> diff --git a/sysdeps/mach/hurd/x86/trampoline.c b/sysdeps/mach/hurd/x86/trampoline.c
> index 8e2890f8c5..c333d56022 100644
> --- a/sysdeps/mach/hurd/x86/trampoline.c
> +++ b/sysdeps/mach/hurd/x86/trampoline.c
> @@ -26,7 +26,11 @@
>  #include "hurdfault.h"
>  #include <intr-msg.h>
>  #include <sys/ucontext.h>
> -
> +#ifdef __x86_64__
> +#include <mach/x86_64/mach_i386.h>
> +#else
> +#include <mach/i386/mach_i386.h>
> +#endif
>  
>  /* Fill in a siginfo_t structure for SA_SIGINFO-enabled handlers.  */
>  static void fill_siginfo (siginfo_t *si, int signo,
> @@ -93,7 +97,7 @@ static void fill_ucontext (ucontext_t *uc, const struct sigcontext *sc)
>    /* XXX FPU state.  */
>    memset (&uc->uc_mcontext.fpregs, 0, sizeof (fpregset_t));
>  }
> -
> +#include <stdio.h>

Spurious change?

Samuel
  
Samuel Thibault April 18, 2025, 12:43 a.m. UTC | #2
Hello

I fixed these and pushed it, thanks so much!

Samuel

Samuel Thibault, le ven. 11 avril 2025 02:27:04 +0200, a ecrit:
> Hello,
> 
> Sorry it took me long to manage to fine time to look at this...
> 
> Luca Dariz, le mer. 19 mars 2025 18:11:18 +0100, a ecrit:
> > diff --git a/sysdeps/mach/hurd/i386/sigreturn.c b/sysdeps/mach/hurd/i386/sigreturn.c
> > index ce8df8d02b..618cb74196 100644
> > --- a/sysdeps/mach/hurd/i386/sigreturn.c
> > +++ b/sysdeps/mach/hurd/i386/sigreturn.c
> > @@ -21,6 +21,8 @@
> >  #include <stdlib.h>
> >  #include <string.h>
> >  
> > +#include <cpuid.h>
> > +
> >  /* This is run on the thread stack after restoring it, to be able to
> >     unlock SS off sigstack.  */
> >  static void
> > @@ -123,10 +125,27 @@ __sigreturn (struct sigcontext *scp)
> >    if (scp->sc_onstack)
> >      ss->sigaltstack.ss_flags &= ~SS_ONSTACK;
> >  
> > -  if (scp->sc_fpused)
> > -    /* Restore the FPU state.  Mach conveniently stores the state
> > -       in the format the i387 `frstor' instruction uses to restore it.  */
> > -    asm volatile ("frstor %0" : : "m" (scp->sc_fpsave));
> > +#ifdef i386_XFLOAT_STATE
> > +  if ((scp->xstate) && (scp->xstate->initialized))
> 
> else?
> 
> > +    {
> > +      unsigned eax, ebx, ecx, edx;
> > +      __cpuid_count(0xd, 0, eax, ebx, ecx, edx);
> > +      switch (scp->xstate->fp_save_kind)
> > +        {
> > +        case 0: // FNSAVE
> > +          asm volatile("frstor %0" : : "m" (scp->xstate->hw_state));
> > +          break;
> > +        case 1: // FXSAVE
> > +          asm volatile("fxrstor %0" : : "m" (scp->xstate->hw_state),    \
> > +                       "a" (eax), "d" (edx));
> > +          break;
> > +        default: // XSAVE
> > +          asm volatile("xrstor %0" : : "m" (scp->xstate->hw_state),     \
> > +                       "a" (eax), "d" (edx));
> > +          break;
> 
> There is also FP_XSAVES which should use xrstors. Better also explicit
> FP_XSAVEOPT FP_XSAVEC as using xrstor too.
> 
> (and similar in x86_64)
> 
> > +        }
> > +    }
> > +#endif
> >  
> >    {
> >      /* There are convenient instructions to pop state off the stack, so we
> > diff --git a/sysdeps/mach/hurd/x86/trampoline.c b/sysdeps/mach/hurd/x86/trampoline.c
> > index 8e2890f8c5..c333d56022 100644
> > --- a/sysdeps/mach/hurd/x86/trampoline.c
> > +++ b/sysdeps/mach/hurd/x86/trampoline.c
> > @@ -26,7 +26,11 @@
> >  #include "hurdfault.h"
> >  #include <intr-msg.h>
> >  #include <sys/ucontext.h>
> > -
> > +#ifdef __x86_64__
> > +#include <mach/x86_64/mach_i386.h>
> > +#else
> > +#include <mach/i386/mach_i386.h>
> > +#endif
> >  
> >  /* Fill in a siginfo_t structure for SA_SIGINFO-enabled handlers.  */
> >  static void fill_siginfo (siginfo_t *si, int signo,
> > @@ -93,7 +97,7 @@ static void fill_ucontext (ucontext_t *uc, const struct sigcontext *sc)
> >    /* XXX FPU state.  */
> >    memset (&uc->uc_mcontext.fpregs, 0, sizeof (fpregset_t));
> >  }
> > -
> > +#include <stdio.h>
> 
> Spurious change?
> 
> Samuel
  
Samuel Thibault April 19, 2025, 9:41 a.m. UTC | #3
Hello,

So it works as your additional test shows, but it is fragile.

Luca Dariz, le mer. 19 mars 2025 18:11:18 +0100, a ecrit:
> diff --git a/sysdeps/mach/hurd/i386/sigreturn.c b/sysdeps/mach/hurd/i386/sigreturn.c
> index ce8df8d02b..618cb74196 100644
> --- a/sysdeps/mach/hurd/i386/sigreturn.c
> +++ b/sysdeps/mach/hurd/i386/sigreturn.c
> @@ -123,10 +125,27 @@ __sigreturn (struct sigcontext *scp)
>    if (scp->sc_onstack)
>      ss->sigaltstack.ss_flags &= ~SS_ONSTACK;
>  
> +#ifdef i386_XFLOAT_STATE
> +  if ((scp->xstate) && (scp->xstate->initialized))
> +    {
> +      unsigned eax, ebx, ecx, edx;
> +      __cpuid_count(0xd, 0, eax, ebx, ecx, edx);
> +      switch (scp->xstate->fp_save_kind)
> +        {
> +        case 0: // FNSAVE
> +          asm volatile("frstor %0" : : "m" (scp->xstate->hw_state));
> +          break;
> +        case 1: // FXSAVE
> +          asm volatile("fxrstor %0" : : "m" (scp->xstate->hw_state),    \
> +                       "a" (eax), "d" (edx));
> +          break;
> +        default: // XSAVE
> +          asm volatile("xrstor %0" : : "m" (scp->xstate->hw_state),     \
> +                       "a" (eax), "d" (edx));
> +          break;
> +        }
> +    }
> +  else
> +#endif
>      if (scp->sc_fpused)
>        /* Restore the FPU state.  Mach conveniently stores the state
>           in the format the i387 `frstor' instruction uses to restore it.  */
>        asm volatile ("frstor %0" : : "m" (scp->sc_fpsave));
[...]
>      memcpy (usp -= 12, &scp->sc_i386_thread_state, 12 * sizeof (int));
[...]
>      sigreturn_trampoline (usp);

The original code was restoring FP state relatively early, before
calling memcpy etc. because it assumed that FP would not be used by libc
itself. But memcpy may use it, it's only luck that the call above gets
inlined as mere copying.

And then __sigreturn2 too calls various functions which might use SSE.

Rather than chasing these, it'd be better to make sigreturn2_trampoline
do the restoration. That should be not very hard, by adding just a
few more bits: in struct sigcontext, add an fpstate_size field that
stores the size of the FP state (and default to sizeof sc_fpsave if
xstate is not set), and at the end of __sigreturn copy the FP state
onto additional space on the usp stack, below the i386 thread state, as
well as the content of scp->xstate->fp_save_kind (or 0 if xstate is not
set) and fpstate_size. sigreturn2_trampoline can then easily test that
fp_save_kind value to determine whether it should use frstor, xrstor, or
xrstor to restore the FP state, pop that state, and continue with
restoring general state.

Samuel
  

Patch

diff --git a/hurd/Makefile b/hurd/Makefile
index cf70b8c65c..cbc3c23b1f 100644
--- a/hurd/Makefile
+++ b/hurd/Makefile
@@ -19,6 +19,11 @@  subdir := hurd
 
 include ../Makeconfig
 
+tests := test-sig-xstate \
+	test-sig-rpc-interrupted
+$(objpfx)test-sig-xstate: $(shared-thread-library)
+$(objpfx)test-sig-rpc-interrupted: $(shared-thread-library) $(objdir)/hurd/libhurduser.so
+
 headers = \
   $(interface-headers) \
   hurd.h \
diff --git a/hurd/test-sig-rpc-interrupted.c b/hurd/test-sig-rpc-interrupted.c
new file mode 100644
index 0000000000..b9093850a4
--- /dev/null
+++ b/hurd/test-sig-rpc-interrupted.c
@@ -0,0 +1,185 @@ 
+/* Test the state save/restore procedures during signal handling when an
+   interruptible RPC is restarted.
+.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+
+#include <assert.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <mach/message.h>
+#include <mach/gnumach.h>
+#include <mach/mach_traps.h>
+#include <mach/mig_errors.h>
+#include <mach-shortcuts.h>
+#include <mach_init.h>
+#include <hurd/io.h>
+#include <hurd/io_reply.h>
+
+#include <support/check.h>
+#include <support/xthread.h>
+
+#include "test-xstate.h"
+
+void handler(int signum, siginfo_t *info, void *context)
+{
+  printf("signal %d setting a different CPU state\n", signum);
+  char buf3[XSTATE_BUFFER_SIZE];
+  memset(buf3, 0x77, XSTATE_BUFFER_SIZE);
+  SET_XSTATE(buf3);
+}
+
+static const mach_msg_type_t RetCodeCheck = {
+  .msgt_name =            (unsigned char) MACH_MSG_TYPE_INTEGER_32,
+  .msgt_size =            32,
+  .msgt_number =          1,
+  .msgt_inline =          TRUE,
+  .msgt_longform =        FALSE,
+  .msgt_deallocate =      FALSE,
+  .msgt_unused =          0
+};
+
+
+/* Helper thread to simulate a proper RPC interruption during dignal handling */
+void* fake_interruptor(void *arg)
+{
+  int err;
+  sigset_t ss;
+  TEST_COMPARE(sigemptyset(&ss), 0);
+  TEST_COMPARE(sigaddset(&ss, SIGUSR1), 0);
+  TEST_COMPARE(sigprocmask(SIG_BLOCK, &ss, NULL), 0);
+
+  struct {
+    mach_msg_header_t Head;
+  } request;
+  mach_port_t rxport = *((mach_port_t*)arg);
+  err = mach_msg(&request.Head, MACH_RCV_MSG, 0, sizeof(request), rxport,
+                 MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
+  TEST_COMPARE(err, MACH_MSG_SUCCESS);
+  TEST_COMPARE(request.Head.msgh_bits, 0x1112);
+  TEST_COMPARE(request.Head.msgh_size, sizeof(request.Head));
+  TEST_COMPARE(request.Head.msgh_id, 33000);
+
+  mig_reply_header_t reply;
+  reply.Head = request.Head;
+  reply.Head.msgh_id += 100;
+  reply.RetCodeType = RetCodeCheck;
+  reply.RetCode = KERN_SUCCESS;
+  err = mach_msg(&reply.Head, MACH_SEND_MSG, sizeof(reply), 0, MACH_PORT_NULL,
+                 MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
+  TEST_COMPARE(err, MACH_MSG_SUCCESS);
+
+  return NULL;
+}
+
+
+/* Helper thread to send a signal to the main thread in the middle of
+ * an interruptible rpc */
+void* signal_sender(void *arg)
+{
+  int err;
+  sigset_t ss;
+  TEST_COMPARE(sigemptyset(&ss), 0);
+  TEST_COMPARE(sigaddset(&ss, SIGUSR1), 0);
+  TEST_COMPARE(sigprocmask(SIG_BLOCK, &ss, NULL), 0);
+
+  /* Receive the first request, we won't answer to this. */
+  struct {
+    mach_msg_header_t head;
+    char data[64];
+  } m1, m2;
+  mach_port_t rxport = *((mach_port_t*)arg);
+  memset(&m1, 0, sizeof(m1));
+  memset(&m2, 0, sizeof(m2));
+  err = mach_msg(&m1.head, MACH_RCV_MSG, 0, sizeof(m1), rxport,
+                 MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
+  TEST_COMPARE(err, MACH_MSG_SUCCESS);
+
+  /* interrupt the ongoing rpc with a signal, using the
+   * interruptible rpc protocol */
+  pthread_t thintr = xpthread_create(NULL, fake_interruptor, arg);
+  TEST_COMPARE(kill(getpid(), SIGUSR1), 0);
+  xpthread_join(thintr);
+
+  /* Complete the interruption by sending EINTR */
+  mig_reply_header_t reply;
+  reply.Head = m1.head;
+  reply.Head.msgh_id += 100;
+  reply.RetCodeType = RetCodeCheck;
+  reply.RetCode = EINTR;
+  err = mach_msg(&reply.Head, MACH_SEND_MSG, sizeof(reply), 0, MACH_PORT_NULL,
+                 MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
+  TEST_COMPARE(err, MACH_MSG_SUCCESS);
+
+  /* Receive the retried rpc, and check that it has the same payload
+   * as the first one. Port names might still be different. */
+  err = mach_msg(&m2.head, MACH_RCV_MSG, 0, sizeof(m2), rxport,
+                 MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
+  TEST_COMPARE(m1.head.msgh_bits, m2.head.msgh_bits);
+  TEST_COMPARE(m1.head.msgh_size, m2.head.msgh_size);
+  TEST_COMPARE(m1.head.msgh_id, m2.head.msgh_id);
+  TEST_COMPARE_BLOB(m1.data, sizeof(m1.data), m2.data, sizeof(m2.data));
+
+  /* And finally make the rpc succeed by sending a valid reply */
+  err = io_read_reply(m2.head.msgh_remote_port, MACH_MSG_TYPE_MOVE_SEND_ONCE,
+                      KERN_SUCCESS, NULL, 0);
+  TEST_COMPARE(err, MACH_MSG_SUCCESS);
+
+  return NULL;
+}
+
+
+static int do_test(void)
+{
+#if ! XSTATE_HELPERS_SUPPORTED
+  FAIL_UNSUPPORTED("Test not supported on this arch.");
+#endif
+
+  /* Setup signal handling; we need to handle the signal in the main
+   * thread, the other ones will explicitely block SIGUSR1. */
+  struct sigaction act = { 0 };
+  act.sa_flags = SA_RESTART;
+  act.sa_sigaction = &handler;
+  TEST_COMPARE(sigaction(SIGUSR1, &act, NULL), 0);
+
+  mach_port_t fakeio;
+  int err;
+  err = mach_port_allocate(mach_task_self (), MACH_PORT_RIGHT_RECEIVE, &fakeio);
+  TEST_COMPARE(err, MACH_MSG_SUCCESS);
+
+  err = mach_port_insert_right(mach_task_self(), fakeio, fakeio,
+                               MACH_MSG_TYPE_MAKE_SEND);
+  TEST_COMPARE(err, MACH_MSG_SUCCESS);
+
+  pthread_t thsender = xpthread_create(NULL, signal_sender, &fakeio);
+
+  char *buf;
+  mach_msg_type_number_t n;
+  TEST_COMPARE(io_read(fakeio, &buf, &n, 1, 2), 0);
+
+  xpthread_join(thsender);
+  return EXIT_SUCCESS;
+}
+
+#include <support/test-driver.c>
diff --git a/hurd/test-sig-xstate.c b/hurd/test-sig-xstate.c
new file mode 100644
index 0000000000..610b083977
--- /dev/null
+++ b/hurd/test-sig-xstate.c
@@ -0,0 +1,94 @@ 
+/* Test the state save/restore procedures during signal handling.
+.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+
+#include <assert.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <mach/message.h>
+#include <mach/gnumach.h>
+#include <mach/mach_traps.h>
+#include <mach-shortcuts.h>
+#include <mach_init.h>
+#include <hurd/io.h>
+#include <hurd/io_reply.h>
+
+#include <support/check.h>
+#include <support/xthread.h>
+
+#include "test-xstate.h"
+
+static volatile bool loopflag = true;
+
+void handler(int signum, siginfo_t *info, void *context)
+{
+  char buf3[XSTATE_BUFFER_SIZE];
+  memset(buf3, 0x77, XSTATE_BUFFER_SIZE);
+  SET_XSTATE(buf3);
+  printf("signal %d setting a different CPU state\n", signum);
+  loopflag = false;
+}
+
+/* Helper thread to send a signal to the main thread  */
+void* signal_sender(void *arg)
+{
+  sigset_t ss;
+  assert(! sigemptyset(&ss));
+  assert(! sigaddset(&ss, SIGUSR1));
+  assert(! sigprocmask(SIG_BLOCK, &ss, NULL));
+
+  TEST_COMPARE(kill(getpid(), SIGUSR1), 0);
+
+  return NULL;
+}
+
+static int do_test(void)
+{
+#if ! XSTATE_HELPERS_SUPPORTED
+  FAIL_UNSUPPORTED("Test not supported on this arch.");
+#endif
+
+  struct sigaction act = { 0 };
+  act.sa_sigaction = &handler;
+  TEST_COMPARE(sigaction(SIGUSR1, &act, NULL), 0);
+
+  pthread_t thsender = xpthread_create(NULL, signal_sender, NULL);
+
+  char buf1[XSTATE_BUFFER_SIZE], buf2[XSTATE_BUFFER_SIZE];
+  memset(buf1, 0x33, XSTATE_BUFFER_SIZE);
+
+  SET_XSTATE(buf1);
+
+  while (loopflag)
+    ;
+
+  GET_XSTATE(buf2);
+  TEST_COMPARE_BLOB(buf1, sizeof(buf1), buf2, sizeof(buf2));
+
+  xpthread_join(thsender);
+  return EXIT_SUCCESS;
+}
+
+#include <support/test-driver.c>
diff --git a/hurd/test-xstate.h b/hurd/test-xstate.h
new file mode 100644
index 0000000000..1425fe2d38
--- /dev/null
+++ b/hurd/test-xstate.h
@@ -0,0 +1,40 @@ 
+/* Helpers to test XSTATE during signal handling
+.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _TEST_XSTATE_H
+#define _TEST_XSTATE_H
+
+#if defined(__x86_64__) || defined(__i386__)
+#define XSTATE_HELPERS_SUPPORTED 1
+#define XSTATE_BUFFER_SIZE 16
+#define SET_XSTATE(b) do {                                      \
+    asm volatile ("movups (%0),%%xmm0" :: "r" (b) :);           \
+  } while (0)
+
+#define GET_XSTATE(b) do {                                      \
+    asm volatile ("movups %%xmm0,(%0)" :: "r" (b) :);           \
+  } while (0)
+
+#else
+#define XSTATE_HELPERS_SUPPORTED 0
+#define XSTATE_BUFFER_SIZE 1
+#define SET_XSTATE(b)
+#endif
+
+#endif /* _TEST_XSTATE_H */
diff --git a/sysdeps/mach/hurd/i386/bits/sigcontext.h b/sysdeps/mach/hurd/i386/bits/sigcontext.h
index 6e5e220e9d..c44e4deac6 100644
--- a/sysdeps/mach/hurd/i386/bits/sigcontext.h
+++ b/sysdeps/mach/hurd/i386/bits/sigcontext.h
@@ -88,6 +88,8 @@  struct sigcontext
     struct i386_fp_save sc_fpsave;
     struct i386_fp_regs sc_fpregs;
     int sc_fpexcsr;		/* FPSR including exception bits.  */
+
+    struct i386_xfloat_state *xstate;
   };
 
 /* Traditional BSD names for some members.  */
diff --git a/sysdeps/mach/hurd/i386/sigreturn.c b/sysdeps/mach/hurd/i386/sigreturn.c
index ce8df8d02b..618cb74196 100644
--- a/sysdeps/mach/hurd/i386/sigreturn.c
+++ b/sysdeps/mach/hurd/i386/sigreturn.c
@@ -21,6 +21,8 @@ 
 #include <stdlib.h>
 #include <string.h>
 
+#include <cpuid.h>
+
 /* This is run on the thread stack after restoring it, to be able to
    unlock SS off sigstack.  */
 static void
@@ -123,10 +125,27 @@  __sigreturn (struct sigcontext *scp)
   if (scp->sc_onstack)
     ss->sigaltstack.ss_flags &= ~SS_ONSTACK;
 
-  if (scp->sc_fpused)
-    /* Restore the FPU state.  Mach conveniently stores the state
-       in the format the i387 `frstor' instruction uses to restore it.  */
-    asm volatile ("frstor %0" : : "m" (scp->sc_fpsave));
+#ifdef i386_XFLOAT_STATE
+  if ((scp->xstate) && (scp->xstate->initialized))
+    {
+      unsigned eax, ebx, ecx, edx;
+      __cpuid_count(0xd, 0, eax, ebx, ecx, edx);
+      switch (scp->xstate->fp_save_kind)
+        {
+        case 0: // FNSAVE
+          asm volatile("frstor %0" : : "m" (scp->xstate->hw_state));
+          break;
+        case 1: // FXSAVE
+          asm volatile("fxrstor %0" : : "m" (scp->xstate->hw_state),    \
+                       "a" (eax), "d" (edx));
+          break;
+        default: // XSAVE
+          asm volatile("xrstor %0" : : "m" (scp->xstate->hw_state),     \
+                       "a" (eax), "d" (edx));
+          break;
+        }
+    }
+#endif
 
   {
     /* There are convenient instructions to pop state off the stack, so we
diff --git a/sysdeps/mach/hurd/x86/trampoline.c b/sysdeps/mach/hurd/x86/trampoline.c
index 8e2890f8c5..c333d56022 100644
--- a/sysdeps/mach/hurd/x86/trampoline.c
+++ b/sysdeps/mach/hurd/x86/trampoline.c
@@ -26,7 +26,11 @@ 
 #include "hurdfault.h"
 #include <intr-msg.h>
 #include <sys/ucontext.h>
-
+#ifdef __x86_64__
+#include <mach/x86_64/mach_i386.h>
+#else
+#include <mach/i386/mach_i386.h>
+#endif
 
 /* Fill in a siginfo_t structure for SA_SIGINFO-enabled handlers.  */
 static void fill_siginfo (siginfo_t *si, int signo,
@@ -93,7 +97,7 @@  static void fill_ucontext (ucontext_t *uc, const struct sigcontext *sc)
   /* XXX FPU state.  */
   memset (&uc->uc_mcontext.fpregs, 0, sizeof (fpregset_t));
 }
-
+#include <stdio.h>
 struct sigcontext *
 _hurd_setup_sighandler (struct hurd_sigstate *ss, const struct sigaction *action,
 			__sighandler_t handler,
@@ -106,6 +110,7 @@  _hurd_setup_sighandler (struct hurd_sigstate *ss, const struct sigaction *action
   void firewall (void);
   void *sigsp;
   struct sigcontext *scp;
+  vm_size_t xstate_size;
   struct
     {
       union
@@ -145,6 +150,14 @@  _hurd_setup_sighandler (struct hurd_sigstate *ss, const struct sigaction *action
       struct hurd_userlink link;
       ucontext_t ucontext;
       siginfo_t siginfo;
+#ifdef __x86_64__
+      char _pad2[56];
+#else
+      char _pad2[20];
+#endif
+      char xstate[];
+      /* Don't add anything after xstate, as it's dynamically
+         sized. */
     } *stackframe;
 
 #ifdef __x86_64__
@@ -170,6 +183,17 @@  _hurd_setup_sighandler (struct hurd_sigstate *ss, const struct sigaction *action
   if (! machine_get_basic_state (ss->thread, state))
     return NULL;
 
+  /* Initialize the size of the CPU extended state, to be saved during
+   * signal handling */
+#ifdef i386_XFLOAT_STATE
+  _Static_assert ((sizeof(*stackframe) + sizeof(struct i386_xfloat_state)) % 64 == 0,
+                  "stackframe size must be multiple of 64-byte minus "
+                  "sizeof(struct i386_xfloat_state), please adjust _pad2");
+
+  if (__i386_get_xstate_size(__mach_host_self(), &xstate_size))
+#endif
+    xstate_size = 0;
+
   /* Save the original SP in the gratuitous `esp' slot.
      We may need to reset the SP (the `uesp' slot) to avoid clobbering an
      interrupted RPC frame.  */
@@ -196,14 +220,21 @@  _hurd_setup_sighandler (struct hurd_sigstate *ss, const struct sigaction *action
 #endif
     }
 
-  /* Push the arguments to call `trampoline' on the stack.  */
-  sigsp -= sizeof (*stackframe);
-#ifdef __x86_64__
-  /* Align SP at 16 bytes.  Coupled with the fact that sigreturn_addr is
-     16-byte aligned within the stackframe struct, this ensures that it ends
-     up on a 16-byte aligned address, as required by the ABI.  */
-  sigsp = (void *) ((uintptr_t) sigsp & ~15UL);
-#endif
+  /* Push the arguments to call `trampoline' on the stack.
+   * The extended state might have a variable size depending on the platform,
+   * so we dynamically allocate it on the stack frame.*/
+  sigsp -= sizeof (*stackframe) + xstate_size;
+
+  /* Align SP at 64 bytes. This is needed for two reasons:
+   * - sigreturn_addr is 16-byte aligned within the stackframe
+   *   struct, and this ensures that it ends up on a 16-byte aligned
+   *   address, as required by the ABI.
+   * - the XSAVE state needs to be aligned at 64 bytes (on both i386 and
+   *   x86_64), so we align the stackframe also at 64 bytes and add the
+   *   required padding at the end, see the _pad2 field.
+   */
+  sigsp = (void *) ((uintptr_t) sigsp & ~63UL);
+
   stackframe = sigsp;
 
   if (_hurdsig_catch_memory_fault (stackframe))
@@ -248,14 +279,26 @@  _hurd_setup_sighandler (struct hurd_sigstate *ss, const struct sigaction *action
       memcpy (&scp->sc_i386_thread_state,
 	      &state->basic, sizeof (state->basic));
 
-      /* struct sigcontext is laid out so that starting at sc_fpkind mimics
-	 a struct i386_float_state.  */
-      _Static_assert (offsetof (struct sigcontext, sc_i386_float_state)
-		      % __alignof__ (struct i386_float_state) == 0,
-		      "sc_i386_float_state layout mismatch");
-      ok = machine_get_state (ss->thread, state, i386_FLOAT_STATE,
-			      &state->fpu, &scp->sc_i386_float_state,
-			      sizeof (state->fpu));
+      scp->xstate = NULL;
+#ifdef i386_XFLOAT_STATE
+      if (xstate_size > 0)
+        {
+          mach_msg_type_number_t got;
+          got = (xstate_size / sizeof (int));
+          ok = (! __thread_get_state (ss->thread, i386_XFLOAT_STATE,
+                                      (thread_state_t) stackframe->xstate, &got)
+                && got == (xstate_size / sizeof (int)));
+          if (ok)
+          {
+            scp->xstate = (struct i386_xfloat_state*) stackframe->xstate;
+            assert((uintptr_t)scp->xstate->hw_state % 64 == 0);
+          }
+        }
+      else
+#endif
+        {
+          ok = 1;
+        }
 
       /* Set up the arguments for the signal handler.  */
       stackframe->signo = signo;
diff --git a/sysdeps/mach/hurd/x86_64/bits/sigcontext.h b/sysdeps/mach/hurd/x86_64/bits/sigcontext.h
index 7bac881176..d83795fcbc 100644
--- a/sysdeps/mach/hurd/x86_64/bits/sigcontext.h
+++ b/sysdeps/mach/hurd/x86_64/bits/sigcontext.h
@@ -96,6 +96,8 @@  struct sigcontext
     struct i386_fp_save sc_fpsave;
     struct i386_fp_regs sc_fpregs;
     int sc_fpexcsr;		/* FPSR including exception bits.  */
+
+    struct i386_xfloat_state *xstate;
   };
 
 /* Traditional BSD names for some members.  */
diff --git a/sysdeps/mach/hurd/x86_64/sigreturn.c b/sysdeps/mach/hurd/x86_64/sigreturn.c
index 81a2d3ba74..be1394d36e 100644
--- a/sysdeps/mach/hurd/x86_64/sigreturn.c
+++ b/sysdeps/mach/hurd/x86_64/sigreturn.c
@@ -20,6 +20,8 @@ 
 #include <hurd/msg.h>
 #include <stdlib.h>
 
+#include <cpuid.h>
+
 /* This is run on the thread stack after restoring it, to be able to
    unlock SS off sigstack.  */
 void
@@ -116,10 +118,27 @@  __sigreturn (struct sigcontext *scp)
   if (scp->sc_onstack)
     ss->sigaltstack.ss_flags &= ~SS_ONSTACK;
 
-  if (scp->sc_fpused)
-    /* Restore the FPU state.  Mach conveniently stores the state
-       in the format the i387 `frstor' instruction uses to restore it.  */
-    asm volatile ("frstor %0" : : "m" (scp->sc_fpsave));
+#ifdef i386_XFLOAT_STATE
+  if ((scp->xstate) && (scp->xstate->initialized))
+    {
+      unsigned eax, ebx, ecx, edx;
+      __cpuid_count(0xd, 0, eax, ebx, ecx, edx);
+      switch (scp->xstate->fp_save_kind)
+        {
+        case 0: // FNSAVE
+          asm volatile("frstor %0" : : "m" (scp->xstate->hw_state));
+          break;
+        case 1: // FXSAVE
+          asm volatile("fxrstor %0" : : "m" (scp->xstate->hw_state),    \
+                       "a" (eax), "d" (edx));
+          break;
+        default: // XSAVE
+          asm volatile("xrstor %0" : : "m" (scp->xstate->hw_state),     \
+                       "a" (eax), "d" (edx));
+          break;
+        }
+    }
+#endif
 
   /* Copy the registers onto the user's stack, to be able to release the
      altstack (by unlocking sigstate).  Note that unless an altstack is used,