diff mbox series

[4/8] nptl: Add rseq registration

Message ID cca3816aae39e906bf7a92cbb4bbfe3705abcc95.1638880889.git.fweimer@redhat.com
State Committed
Commit 95e114a0919d844d8fe07839cb6538b7f5ee920e
Headers show
Series Extensible rseq integration | expand

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Florian Weimer Dec. 7, 2021, 1:01 p.m. UTC
The rseq area is placed directly into struct pthread.  rseq
registration failure is not treated as an error, so it is possible
that threads run with inconsistent registration status.

<sys/rseq.h> is not yet installed as a public header.

Co-Authored-By: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
v2: Use volatite access to cpu_id.  Drop csu/libc-tls.c spurious change.

 nptl/descr.h                                |   4 +
 nptl/pthread_create.c                       |  13 +
 sysdeps/nptl/dl-tls_init_tp.c               |   8 +-
 sysdeps/unix/sysv/linux/Makefile            |   9 +-
 sysdeps/unix/sysv/linux/aarch64/bits/rseq.h |  43 ++++
 sysdeps/unix/sysv/linux/arm/bits/rseq.h     |  83 +++++++
 sysdeps/unix/sysv/linux/bits/rseq.h         |  29 +++
 sysdeps/unix/sysv/linux/mips/bits/rseq.h    |  62 +++++
 sysdeps/unix/sysv/linux/powerpc/bits/rseq.h |  37 +++
 sysdeps/unix/sysv/linux/rseq-internal.h     |  45 ++++
 sysdeps/unix/sysv/linux/s390/bits/rseq.h    |  37 +++
 sysdeps/unix/sysv/linux/sys/rseq.h          | 174 +++++++++++++
 sysdeps/unix/sysv/linux/tst-rseq-nptl.c     | 260 ++++++++++++++++++++
 sysdeps/unix/sysv/linux/tst-rseq.c          |  64 +++++
 sysdeps/unix/sysv/linux/tst-rseq.h          |  57 +++++
 sysdeps/unix/sysv/linux/x86/bits/rseq.h     |  30 +++
 16 files changed, 952 insertions(+), 3 deletions(-)
 create mode 100644 sysdeps/unix/sysv/linux/aarch64/bits/rseq.h
 create mode 100644 sysdeps/unix/sysv/linux/arm/bits/rseq.h
 create mode 100644 sysdeps/unix/sysv/linux/bits/rseq.h
 create mode 100644 sysdeps/unix/sysv/linux/mips/bits/rseq.h
 create mode 100644 sysdeps/unix/sysv/linux/powerpc/bits/rseq.h
 create mode 100644 sysdeps/unix/sysv/linux/rseq-internal.h
 create mode 100644 sysdeps/unix/sysv/linux/s390/bits/rseq.h
 create mode 100644 sysdeps/unix/sysv/linux/sys/rseq.h
 create mode 100644 sysdeps/unix/sysv/linux/tst-rseq-nptl.c
 create mode 100644 sysdeps/unix/sysv/linux/tst-rseq.c
 create mode 100644 sysdeps/unix/sysv/linux/tst-rseq.h
 create mode 100644 sysdeps/unix/sysv/linux/x86/bits/rseq.h

Comments

Szabolcs Nagy Dec. 8, 2021, 4:51 p.m. UTC | #1
The 12/07/2021 14:01, Florian Weimer via Libc-alpha wrote:
> The rseq area is placed directly into struct pthread.  rseq
> registration failure is not treated as an error, so it is possible
> that threads run with inconsistent registration status.
> 
> <sys/rseq.h> is not yet installed as a public header.
> 
> Co-Authored-By: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>

looks good.

most of the changes were reviewed when rseq was first committed.

my problem with __has_include ("linux/rseq.h") etc in sys/rseq.h
is that linux might change that later to conflict with libc headers
in some way. but i don't have a better way to avoid issues when
both libc and linux headers are included into the same TU.

despite the comments in linux/rseq.h (and sys/rseq.h) the
RSEQ_CPU_ID_UNINITIALIZED state is now not observable.
i guess it is just an unused piece of linux uapi so ok.

inconsistent rseq status in threads is ok.

not unregistering on thread exit is ok.

updated tests are ok.

Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
Siddhesh Poyarekar Dec. 8, 2021, 6:03 p.m. UTC | #2
On 12/7/21 18:31, Florian Weimer via Libc-alpha wrote:
> The rseq area is placed directly into struct pthread.  rseq
> registration failure is not treated as an error, so it is possible
> that threads run with inconsistent registration status.
> 
> <sys/rseq.h> is not yet installed as a public header.
> 
> Co-Authored-By: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> ---
> v2: Use volatite access to cpu_id.  Drop csu/libc-tls.c spurious change.
> 
>   nptl/descr.h                                |   4 +
>   nptl/pthread_create.c                       |  13 +
>   sysdeps/nptl/dl-tls_init_tp.c               |   8 +-
>   sysdeps/unix/sysv/linux/Makefile            |   9 +-
>   sysdeps/unix/sysv/linux/aarch64/bits/rseq.h |  43 ++++
>   sysdeps/unix/sysv/linux/arm/bits/rseq.h     |  83 +++++++
>   sysdeps/unix/sysv/linux/bits/rseq.h         |  29 +++
>   sysdeps/unix/sysv/linux/mips/bits/rseq.h    |  62 +++++
>   sysdeps/unix/sysv/linux/powerpc/bits/rseq.h |  37 +++
>   sysdeps/unix/sysv/linux/rseq-internal.h     |  45 ++++
>   sysdeps/unix/sysv/linux/s390/bits/rseq.h    |  37 +++
>   sysdeps/unix/sysv/linux/sys/rseq.h          | 174 +++++++++++++
>   sysdeps/unix/sysv/linux/tst-rseq-nptl.c     | 260 ++++++++++++++++++++
>   sysdeps/unix/sysv/linux/tst-rseq.c          |  64 +++++
>   sysdeps/unix/sysv/linux/tst-rseq.h          |  57 +++++
>   sysdeps/unix/sysv/linux/x86/bits/rseq.h     |  30 +++
>   16 files changed, 952 insertions(+), 3 deletions(-)
>   create mode 100644 sysdeps/unix/sysv/linux/aarch64/bits/rseq.h
>   create mode 100644 sysdeps/unix/sysv/linux/arm/bits/rseq.h
>   create mode 100644 sysdeps/unix/sysv/linux/bits/rseq.h
>   create mode 100644 sysdeps/unix/sysv/linux/mips/bits/rseq.h
>   create mode 100644 sysdeps/unix/sysv/linux/powerpc/bits/rseq.h
>   create mode 100644 sysdeps/unix/sysv/linux/rseq-internal.h
>   create mode 100644 sysdeps/unix/sysv/linux/s390/bits/rseq.h
>   create mode 100644 sysdeps/unix/sysv/linux/sys/rseq.h
>   create mode 100644 sysdeps/unix/sysv/linux/tst-rseq-nptl.c
>   create mode 100644 sysdeps/unix/sysv/linux/tst-rseq.c
>   create mode 100644 sysdeps/unix/sysv/linux/tst-rseq.h
>   create mode 100644 sysdeps/unix/sysv/linux/x86/bits/rseq.h
> 
> diff --git a/nptl/descr.h b/nptl/descr.h
> index af2a6ab87a..92db305913 100644
> --- a/nptl/descr.h
> +++ b/nptl/descr.h
> @@ -34,6 +34,7 @@
>   #include <bits/types/res_state.h>
>   #include <kernel-features.h>
>   #include <tls-internal-struct.h>
> +#include <sys/rseq.h>
>   
>   #ifndef TCB_ALIGNMENT
>   # define TCB_ALIGNMENT 32
> @@ -406,6 +407,9 @@ struct pthread
>     /* Used on strsignal.  */
>     struct tls_internal_t tls_state;
>   
> +  /* rseq area registered with the kernel.  */
> +  struct rseq rseq_area;
> +
>     /* This member must be last.  */
>     char end_padding[];
>   
> diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
> index bad9eeb52f..ea0d79341e 100644
> --- a/nptl/pthread_create.c
> +++ b/nptl/pthread_create.c
> @@ -32,6 +32,7 @@
>   #include <default-sched.h>
>   #include <futex-internal.h>
>   #include <tls-setup.h>
> +#include <rseq-internal.h>
>   #include "libioP.h"
>   #include <sys/single_threaded.h>
>   #include <version.h>
> @@ -366,6 +367,9 @@ start_thread (void *arg)
>     /* Initialize pointers to locale data.  */
>     __ctype_init ();
>   
> +  /* Register rseq TLS to the kernel.  */
> +  rseq_register_current_thread (pd);
> +
>   #ifndef __ASSUME_SET_ROBUST_LIST
>     if (__nptl_set_robust_list_avail)
>   #endif
> @@ -571,6 +575,15 @@ out:
>        process is really dead since 'clone' got passed the CLONE_CHILD_CLEARTID
>        flag.  The 'tid' field in the TCB will be set to zero.
>   
> +     rseq TLS is still registered at this point.  Rely on implicit
> +     unregistration performed by the kernel on thread teardown.  This is not a
> +     problem because the rseq TLS lives on the stack, and the stack outlives
> +     the thread.  If TCB allocation is ever changed, additional steps may be
> +     required, such as performing explicit rseq unregistration before
> +     reclaiming the rseq TLS area memory.  It is NOT sufficient to block
> +     signals because the kernel may write to the rseq area even without
> +     signals.
> +
>        The exit code is zero since in case all threads exit by calling
>        'pthread_exit' the exit status must be 0 (zero).  */
>     while (1)
> diff --git a/sysdeps/nptl/dl-tls_init_tp.c b/sysdeps/nptl/dl-tls_init_tp.c
> index ca494dd3a5..fedb876fdb 100644
> --- a/sysdeps/nptl/dl-tls_init_tp.c
> +++ b/sysdeps/nptl/dl-tls_init_tp.c
> @@ -21,6 +21,7 @@
>   #include <list.h>
>   #include <pthreadP.h>
>   #include <tls.h>
> +#include <rseq-internal.h>
>   
>   #ifndef __ASSUME_SET_ROBUST_LIST
>   bool __nptl_set_robust_list_avail;
> @@ -57,11 +58,12 @@ __tls_pre_init_tp (void)
>   void
>   __tls_init_tp (void)
>   {
> +  struct pthread *pd = THREAD_SELF;
> +
>     /* Set up thread stack list management.  */
> -  list_add (&THREAD_SELF->list, &GL (dl_stack_user));
> +  list_add (&pd->list, &GL (dl_stack_user));
>   
>      /* Early initialization of the TCB.   */
> -   struct pthread *pd = THREAD_SELF;
>      pd->tid = INTERNAL_SYSCALL_CALL (set_tid_address, &pd->tid);
>      THREAD_SETMEM (pd, specific[0], &pd->specific_1stblock[0]);
>      THREAD_SETMEM (pd, user_stack, true);
> @@ -90,6 +92,8 @@ __tls_init_tp (void)
>         }
>     }
>   
> +  rseq_register_current_thread (pd);
> +
>     /* Set initial thread's stack block from 0 up to __libc_stack_end.
>        It will be bigger than it actually is, but for unwind.c/pt-longjmp.c
>        purposes this is good enough.  */
> diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
> index 29c6c78f98..eb0f5fc021 100644
> --- a/sysdeps/unix/sysv/linux/Makefile
> +++ b/sysdeps/unix/sysv/linux/Makefile
> @@ -131,7 +131,10 @@ ifeq ($(have-GLIBC_2.27)$(build-shared),yesyes)
>   tests += tst-ofdlocks-compat
>   endif
>   
> -tests-internal += tst-sigcontext-get_pc
> +tests-internal += \
> +  tst-rseq \
> +  tst-sigcontext-get_pc \
> +  # tests-internal
>   
>   tests-time64 += \
>     tst-adjtimex-time64 \
> @@ -357,4 +360,8 @@ endif
>   
>   ifeq ($(subdir),nptl)
>   tests += tst-align-clone tst-getpid1
> +
> +# tst-rseq-nptl is an internal test because it requires a definition of
> +# __NR_rseq from the internal system call list.
> +tests-internal += tst-rseq-nptl
>   endif
> diff --git a/sysdeps/unix/sysv/linux/aarch64/bits/rseq.h b/sysdeps/unix/sysv/linux/aarch64/bits/rseq.h
> new file mode 100644
> index 0000000000..9ba92725c7
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/aarch64/bits/rseq.h
> @@ -0,0 +1,43 @@
> +/* Restartable Sequences Linux aarch64 architecture header.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef _SYS_RSEQ_H
> +# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead."
> +#endif
> +
> +/* RSEQ_SIG is a signature required before each abort handler code.
> +
> +   It is a 32-bit value that maps to actual architecture code compiled
> +   into applications and libraries.  It needs to be defined for each
> +   architecture.  When choosing this value, it needs to be taken into
> +   account that generating invalid instructions may have ill effects on
> +   tools like objdump, and may also have impact on the CPU speculative
> +   execution efficiency in some cases.
> +
> +   aarch64 -mbig-endian generates mixed endianness code vs data:
> +   little-endian code and big-endian data.  Ensure the RSEQ_SIG signature
> +   matches code endianness.  */
> +
> +#define RSEQ_SIG_CODE  0xd428bc00  /* BRK #0x45E0.  */
> +
> +#ifdef __AARCH64EB__
> +# define RSEQ_SIG_DATA 0x00bc28d4  /* BRK #0x45E0.  */
> +#else
> +# define RSEQ_SIG_DATA RSEQ_SIG_CODE
> +#endif
> +
> +#define RSEQ_SIG       RSEQ_SIG_DATA
> diff --git a/sysdeps/unix/sysv/linux/arm/bits/rseq.h b/sysdeps/unix/sysv/linux/arm/bits/rseq.h
> new file mode 100644
> index 0000000000..0542b26f6a
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/arm/bits/rseq.h
> @@ -0,0 +1,83 @@
> +/* Restartable Sequences Linux arm architecture header.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef _SYS_RSEQ_H
> +# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead."
> +#endif
> +
> +/*
> +   RSEQ_SIG is a signature required before each abort handler code.
> +
> +   It is a 32-bit value that maps to actual architecture code compiled
> +   into applications and libraries.  It needs to be defined for each
> +   architecture.  When choosing this value, it needs to be taken into
> +   account that generating invalid instructions may have ill effects on
> +   tools like objdump, and may also have impact on the CPU speculative
> +   execution efficiency in some cases.
> +
> +   - ARM little endian
> +
> +   RSEQ_SIG uses the udf A32 instruction with an uncommon immediate operand
> +   value 0x5de3.  This traps if user-space reaches this instruction by mistake,
> +   and the uncommon operand ensures the kernel does not move the instruction
> +   pointer to attacker-controlled code on rseq abort.
> +
> +   The instruction pattern in the A32 instruction set is:
> +
> +   e7f5def3    udf    #24035    ; 0x5de3
> +
> +   This translates to the following instruction pattern in the T16 instruction
> +   set:
> +
> +   little endian:
> +   def3        udf    #243      ; 0xf3
> +   e7f5        b.n    <7f5>
> +
> +   - ARMv6+ big endian (BE8):
> +
> +   ARMv6+ -mbig-endian generates mixed endianness code vs data: little-endian
> +   code and big-endian data.  The data value of the signature needs to have its
> +   byte order reversed to generate the trap instruction:
> +
> +   Data: 0xf3def5e7
> +
> +   Translates to this A32 instruction pattern:
> +
> +   e7f5def3    udf    #24035    ; 0x5de3
> +
> +   Translates to this T16 instruction pattern:
> +
> +   def3        udf    #243      ; 0xf3
> +   e7f5        b.n    <7f5>
> +
> +   - Prior to ARMv6 big endian (BE32):
> +
> +   Prior to ARMv6, -mbig-endian generates big-endian code and data
> +   (which match), so the endianness of the data representation of the
> +   signature should not be reversed.  However, the choice between BE32
> +   and BE8 is done by the linker, so we cannot know whether code and
> +   data endianness will be mixed before the linker is invoked.  So rather
> +   than try to play tricks with the linker, the rseq signature is simply
> +   data (not a trap instruction) prior to ARMv6 on big endian.  This is
> +   why the signature is expressed as data (.word) rather than as
> +   instruction (.inst) in assembler.  */
> +
> +#ifdef __ARMEB__
> +# define RSEQ_SIG    0xf3def5e7      /* udf    #24035    ; 0x5de3 (ARMv6+) */
> +#else
> +# define RSEQ_SIG    0xe7f5def3      /* udf    #24035    ; 0x5de3 */
> +#endif
> diff --git a/sysdeps/unix/sysv/linux/bits/rseq.h b/sysdeps/unix/sysv/linux/bits/rseq.h
> new file mode 100644
> index 0000000000..46cf5d1c74
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/bits/rseq.h
> @@ -0,0 +1,29 @@
> +/* Restartable Sequences architecture header.  Stub version.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef _SYS_RSEQ_H
> +# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead."
> +#endif
> +
> +/* RSEQ_SIG is a signature required before each abort handler code.
> +
> +   It is a 32-bit value that maps to actual architecture code compiled
> +   into applications and libraries.  It needs to be defined for each
> +   architecture.  When choosing this value, it needs to be taken into
> +   account that generating invalid instructions may have ill effects on
> +   tools like objdump, and may also have impact on the CPU speculative
> +   execution efficiency in some cases.  */
> diff --git a/sysdeps/unix/sysv/linux/mips/bits/rseq.h b/sysdeps/unix/sysv/linux/mips/bits/rseq.h
> new file mode 100644
> index 0000000000..a9defee568
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/mips/bits/rseq.h
> @@ -0,0 +1,62 @@
> +/* Restartable Sequences Linux mips architecture header.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef _SYS_RSEQ_H
> +# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead."
> +#endif
> +
> +/* RSEQ_SIG is a signature required before each abort handler code.
> +
> +   It is a 32-bit value that maps to actual architecture code compiled
> +   into applications and libraries.  It needs to be defined for each
> +   architecture.  When choosing this value, it needs to be taken into
> +   account that generating invalid instructions may have ill effects on
> +   tools like objdump, and may also have impact on the CPU speculative
> +   execution efficiency in some cases.
> +
> +   RSEQ_SIG uses the break instruction.  The instruction pattern is:
> +
> +   On MIPS:
> +        0350000d        break     0x350
> +
> +   On nanoMIPS:
> +        00100350        break     0x350
> +
> +   On microMIPS:
> +        0000d407        break     0x350
> +
> +   For nanoMIPS32 and microMIPS, the instruction stream is encoded as
> +   16-bit halfwords, so the signature halfwords need to be swapped
> +   accordingly for little-endian.  */
> +
> +#if defined (__nanomips__)
> +# ifdef __MIPSEL__
> +#  define RSEQ_SIG      0x03500010
> +# else
> +#  define RSEQ_SIG      0x00100350
> +# endif
> +#elif defined (__mips_micromips)
> +# ifdef __MIPSEL__
> +#  define RSEQ_SIG      0xd4070000
> +# else
> +#  define RSEQ_SIG      0x0000d407
> +# endif
> +#elif defined (__mips__)
> +# define RSEQ_SIG       0x0350000d
> +#else
> +/* Unknown MIPS architecture.  */
> +#endif
> diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/rseq.h b/sysdeps/unix/sysv/linux/powerpc/bits/rseq.h
> new file mode 100644
> index 0000000000..05b3cf7b8f
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/powerpc/bits/rseq.h
> @@ -0,0 +1,37 @@
> +/* Restartable Sequences Linux powerpc architecture header.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef _SYS_RSEQ_H
> +# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead."
> +#endif
> +
> +/* RSEQ_SIG is a signature required before each abort handler code.
> +
> +   It is a 32-bit value that maps to actual architecture code compiled
> +   into applications and libraries.  It needs to be defined for each
> +   architecture.  When choosing this value, it needs to be taken into
> +   account that generating invalid instructions may have ill effects on
> +   tools like objdump, and may also have impact on the CPU speculative
> +   execution efficiency in some cases.
> +
> +   RSEQ_SIG uses the following trap instruction:
> +
> +   powerpc-be:    0f e5 00 0b           twui   r5,11
> +   powerpc64-le:  0b 00 e5 0f           twui   r5,11
> +   powerpc64-be:  0f e5 00 0b           twui   r5,11  */
> +
> +#define RSEQ_SIG        0x0fe5000b
> diff --git a/sysdeps/unix/sysv/linux/rseq-internal.h b/sysdeps/unix/sysv/linux/rseq-internal.h
> new file mode 100644
> index 0000000000..909f547825
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/rseq-internal.h
> @@ -0,0 +1,45 @@
> +/* Restartable Sequences internal API.  Linux implementation.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef RSEQ_INTERNAL_H
> +#define RSEQ_INTERNAL_H
> +
> +#include <sysdep.h>
> +#include <errno.h>
> +#include <kernel-features.h>
> +#include <stdio.h>
> +#include <sys/rseq.h>
> +
> +#ifdef RSEQ_SIG
> +static inline void
> +rseq_register_current_thread (struct pthread *self)
> +{
> +  int ret = INTERNAL_SYSCALL_CALL (rseq,
> +                                   &self->rseq_area, sizeof (self->rseq_area),
> +                                   0, RSEQ_SIG);
> +  if (INTERNAL_SYSCALL_ERROR_P (ret))
> +    THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);

Why can't we just leave it as the kernel did when it failed the syscall? 
  It looks like we'll only end up shadowing UNINITIALIZED all the time 
and it may cause issues if linux decides to use -2 for some other 
purpose in future.

Siddhesh
Florian Weimer Dec. 8, 2021, 6:08 p.m. UTC | #3
* Siddhesh Poyarekar:

>> +#ifdef RSEQ_SIG
>> +static inline void
>> +rseq_register_current_thread (struct pthread *self)
>> +{
>> +  int ret = INTERNAL_SYSCALL_CALL (rseq,
>> +                                   &self->rseq_area, sizeof (self->rseq_area),
>> +                                   0, RSEQ_SIG);
>> +  if (INTERNAL_SYSCALL_ERROR_P (ret))
>> +    THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
>
> Why can't we just leave it as the kernel did when it failed the
> syscall?

The kernel definitely won't write anything if the failure is ENOSYS.  I
don't expect the kernel to write something for the other failures,
either.

Thanks,
Florian
Siddhesh Poyarekar Dec. 8, 2021, 11:27 p.m. UTC | #4
On 12/8/21 23:38, Florian Weimer wrote:
> * Siddhesh Poyarekar:
> 
>>> +#ifdef RSEQ_SIG
>>> +static inline void
>>> +rseq_register_current_thread (struct pthread *self)
>>> +{
>>> +  int ret = INTERNAL_SYSCALL_CALL (rseq,
>>> +                                   &self->rseq_area, sizeof (self->rseq_area),
>>> +                                   0, RSEQ_SIG);
>>> +  if (INTERNAL_SYSCALL_ERROR_P (ret))
>>> +    THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
>>
>> Why can't we just leave it as the kernel did when it failed the
>> syscall?
> 
> The kernel definitely won't write anything if the failure is ENOSYS.  I
> don't expect the kernel to write something for the other failures,
> either.

OK, I interpreted the from the outdated manpage patch[1] that the kernel 
ensures that uninitialized cpu_id will be read as -1.  I read the rseq 
implementation in the kernel and saw that there are a number of error 
paths where the kernel simply returns without touching the user memory. 
  I suppose what they meant by "uninitialized" in the manpage is 
actually "reset after unregister", which is odd.

In any case, what I meant to eventually get at (sorry I wasn't specific; 
I wrote both patch reviews together and didn't realize they'd be read as 
separate emails!) is that RSEQ_CPU_ID_UNINITIALIZED seemed enough for 
all use cases and RSEQ_CPU_ID_REGISTRATION_FAILED seemed unnecessary.

On syscall failure (or tunable being disabled) too it seems safe to do 
THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_UNINITIALIZED); 
AFAICT, __tls_init_tp will run early enough that it won't have 
overwritten any earlier rseq calls from user code.

Is there a use case I'm missing?

Thanks,
Siddhesh

[1] https://lkml.org/lkml/2019/2/28/183
Noah Goldstein Dec. 9, 2021, 1:51 a.m. UTC | #5
On Tue, Dec 7, 2021 at 7:02 AM Florian Weimer via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> The rseq area is placed directly into struct pthread.  rseq
> registration failure is not treated as an error, so it is possible
> that threads run with inconsistent registration status.
>
> <sys/rseq.h> is not yet installed as a public header.
>
> Co-Authored-By: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> ---
> v2: Use volatite access to cpu_id.  Drop csu/libc-tls.c spurious change.
>
>  nptl/descr.h                                |   4 +
>  nptl/pthread_create.c                       |  13 +
>  sysdeps/nptl/dl-tls_init_tp.c               |   8 +-
>  sysdeps/unix/sysv/linux/Makefile            |   9 +-
>  sysdeps/unix/sysv/linux/aarch64/bits/rseq.h |  43 ++++
>  sysdeps/unix/sysv/linux/arm/bits/rseq.h     |  83 +++++++
>  sysdeps/unix/sysv/linux/bits/rseq.h         |  29 +++
>  sysdeps/unix/sysv/linux/mips/bits/rseq.h    |  62 +++++
>  sysdeps/unix/sysv/linux/powerpc/bits/rseq.h |  37 +++
>  sysdeps/unix/sysv/linux/rseq-internal.h     |  45 ++++
>  sysdeps/unix/sysv/linux/s390/bits/rseq.h    |  37 +++
>  sysdeps/unix/sysv/linux/sys/rseq.h          | 174 +++++++++++++
>  sysdeps/unix/sysv/linux/tst-rseq-nptl.c     | 260 ++++++++++++++++++++
>  sysdeps/unix/sysv/linux/tst-rseq.c          |  64 +++++
>  sysdeps/unix/sysv/linux/tst-rseq.h          |  57 +++++
>  sysdeps/unix/sysv/linux/x86/bits/rseq.h     |  30 +++
>  16 files changed, 952 insertions(+), 3 deletions(-)
>  create mode 100644 sysdeps/unix/sysv/linux/aarch64/bits/rseq.h
>  create mode 100644 sysdeps/unix/sysv/linux/arm/bits/rseq.h
>  create mode 100644 sysdeps/unix/sysv/linux/bits/rseq.h
>  create mode 100644 sysdeps/unix/sysv/linux/mips/bits/rseq.h
>  create mode 100644 sysdeps/unix/sysv/linux/powerpc/bits/rseq.h
>  create mode 100644 sysdeps/unix/sysv/linux/rseq-internal.h
>  create mode 100644 sysdeps/unix/sysv/linux/s390/bits/rseq.h
>  create mode 100644 sysdeps/unix/sysv/linux/sys/rseq.h
>  create mode 100644 sysdeps/unix/sysv/linux/tst-rseq-nptl.c
>  create mode 100644 sysdeps/unix/sysv/linux/tst-rseq.c
>  create mode 100644 sysdeps/unix/sysv/linux/tst-rseq.h
>  create mode 100644 sysdeps/unix/sysv/linux/x86/bits/rseq.h
>
> diff --git a/nptl/descr.h b/nptl/descr.h
> index af2a6ab87a..92db305913 100644
> --- a/nptl/descr.h
> +++ b/nptl/descr.h
> @@ -34,6 +34,7 @@
>  #include <bits/types/res_state.h>
>  #include <kernel-features.h>
>  #include <tls-internal-struct.h>
> +#include <sys/rseq.h>
>
>  #ifndef TCB_ALIGNMENT
>  # define TCB_ALIGNMENT 32
> @@ -406,6 +407,9 @@ struct pthread
>    /* Used on strsignal.  */
>    struct tls_internal_t tls_state;
>
> +  /* rseq area registered with the kernel.  */
> +  struct rseq rseq_area;
> +
>    /* This member must be last.  */
>    char end_padding[];
>
> diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
> index bad9eeb52f..ea0d79341e 100644
> --- a/nptl/pthread_create.c
> +++ b/nptl/pthread_create.c
> @@ -32,6 +32,7 @@
>  #include <default-sched.h>
>  #include <futex-internal.h>
>  #include <tls-setup.h>
> +#include <rseq-internal.h>
>  #include "libioP.h"
>  #include <sys/single_threaded.h>
>  #include <version.h>
> @@ -366,6 +367,9 @@ start_thread (void *arg)
>    /* Initialize pointers to locale data.  */
>    __ctype_init ();
>
> +  /* Register rseq TLS to the kernel.  */
> +  rseq_register_current_thread (pd);
> +
>  #ifndef __ASSUME_SET_ROBUST_LIST
>    if (__nptl_set_robust_list_avail)
>  #endif
> @@ -571,6 +575,15 @@ out:
>       process is really dead since 'clone' got passed the CLONE_CHILD_CLEARTID
>       flag.  The 'tid' field in the TCB will be set to zero.
>
> +     rseq TLS is still registered at this point.  Rely on implicit
> +     unregistration performed by the kernel on thread teardown.  This is not a
> +     problem because the rseq TLS lives on the stack, and the stack outlives
> +     the thread.  If TCB allocation is ever changed, additional steps may be
> +     required, such as performing explicit rseq unregistration before
> +     reclaiming the rseq TLS area memory.  It is NOT sufficient to block
> +     signals because the kernel may write to the rseq area even without
> +     signals.
> +
>       The exit code is zero since in case all threads exit by calling
>       'pthread_exit' the exit status must be 0 (zero).  */
>    while (1)
> diff --git a/sysdeps/nptl/dl-tls_init_tp.c b/sysdeps/nptl/dl-tls_init_tp.c
> index ca494dd3a5..fedb876fdb 100644
> --- a/sysdeps/nptl/dl-tls_init_tp.c
> +++ b/sysdeps/nptl/dl-tls_init_tp.c
> @@ -21,6 +21,7 @@
>  #include <list.h>
>  #include <pthreadP.h>
>  #include <tls.h>
> +#include <rseq-internal.h>
>
>  #ifndef __ASSUME_SET_ROBUST_LIST
>  bool __nptl_set_robust_list_avail;
> @@ -57,11 +58,12 @@ __tls_pre_init_tp (void)
>  void
>  __tls_init_tp (void)
>  {
> +  struct pthread *pd = THREAD_SELF;
> +
>    /* Set up thread stack list management.  */
> -  list_add (&THREAD_SELF->list, &GL (dl_stack_user));
> +  list_add (&pd->list, &GL (dl_stack_user));
>
>     /* Early initialization of the TCB.   */
> -   struct pthread *pd = THREAD_SELF;
>     pd->tid = INTERNAL_SYSCALL_CALL (set_tid_address, &pd->tid);
>     THREAD_SETMEM (pd, specific[0], &pd->specific_1stblock[0]);
>     THREAD_SETMEM (pd, user_stack, true);
> @@ -90,6 +92,8 @@ __tls_init_tp (void)
>        }
>    }
>
> +  rseq_register_current_thread (pd);
> +
>    /* Set initial thread's stack block from 0 up to __libc_stack_end.
>       It will be bigger than it actually is, but for unwind.c/pt-longjmp.c
>       purposes this is good enough.  */
> diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
> index 29c6c78f98..eb0f5fc021 100644
> --- a/sysdeps/unix/sysv/linux/Makefile
> +++ b/sysdeps/unix/sysv/linux/Makefile
> @@ -131,7 +131,10 @@ ifeq ($(have-GLIBC_2.27)$(build-shared),yesyes)
>  tests += tst-ofdlocks-compat
>  endif
>
> -tests-internal += tst-sigcontext-get_pc
> +tests-internal += \
> +  tst-rseq \
> +  tst-sigcontext-get_pc \
> +  # tests-internal
>
>  tests-time64 += \
>    tst-adjtimex-time64 \
> @@ -357,4 +360,8 @@ endif
>
>  ifeq ($(subdir),nptl)
>  tests += tst-align-clone tst-getpid1
> +
> +# tst-rseq-nptl is an internal test because it requires a definition of
> +# __NR_rseq from the internal system call list.
> +tests-internal += tst-rseq-nptl
>  endif
> diff --git a/sysdeps/unix/sysv/linux/aarch64/bits/rseq.h b/sysdeps/unix/sysv/linux/aarch64/bits/rseq.h
> new file mode 100644
> index 0000000000..9ba92725c7
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/aarch64/bits/rseq.h
> @@ -0,0 +1,43 @@
> +/* Restartable Sequences Linux aarch64 architecture header.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef _SYS_RSEQ_H
> +# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead."
> +#endif
> +
> +/* RSEQ_SIG is a signature required before each abort handler code.
> +
> +   It is a 32-bit value that maps to actual architecture code compiled
> +   into applications and libraries.  It needs to be defined for each
> +   architecture.  When choosing this value, it needs to be taken into
> +   account that generating invalid instructions may have ill effects on
> +   tools like objdump, and may also have impact on the CPU speculative
> +   execution efficiency in some cases.
> +
> +   aarch64 -mbig-endian generates mixed endianness code vs data:
> +   little-endian code and big-endian data.  Ensure the RSEQ_SIG signature
> +   matches code endianness.  */
> +
> +#define RSEQ_SIG_CODE  0xd428bc00  /* BRK #0x45E0.  */
> +
> +#ifdef __AARCH64EB__
> +# define RSEQ_SIG_DATA 0x00bc28d4  /* BRK #0x45E0.  */
> +#else
> +# define RSEQ_SIG_DATA RSEQ_SIG_CODE
> +#endif
> +
> +#define RSEQ_SIG       RSEQ_SIG_DATA
> diff --git a/sysdeps/unix/sysv/linux/arm/bits/rseq.h b/sysdeps/unix/sysv/linux/arm/bits/rseq.h
> new file mode 100644
> index 0000000000..0542b26f6a
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/arm/bits/rseq.h
> @@ -0,0 +1,83 @@
> +/* Restartable Sequences Linux arm architecture header.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef _SYS_RSEQ_H
> +# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead."
> +#endif
> +
> +/*
> +   RSEQ_SIG is a signature required before each abort handler code.
> +
> +   It is a 32-bit value that maps to actual architecture code compiled
> +   into applications and libraries.  It needs to be defined for each
> +   architecture.  When choosing this value, it needs to be taken into
> +   account that generating invalid instructions may have ill effects on
> +   tools like objdump, and may also have impact on the CPU speculative
> +   execution efficiency in some cases.
> +
> +   - ARM little endian
> +
> +   RSEQ_SIG uses the udf A32 instruction with an uncommon immediate operand
> +   value 0x5de3.  This traps if user-space reaches this instruction by mistake,
> +   and the uncommon operand ensures the kernel does not move the instruction
> +   pointer to attacker-controlled code on rseq abort.
> +
> +   The instruction pattern in the A32 instruction set is:
> +
> +   e7f5def3    udf    #24035    ; 0x5de3
> +
> +   This translates to the following instruction pattern in the T16 instruction
> +   set:
> +
> +   little endian:
> +   def3        udf    #243      ; 0xf3
> +   e7f5        b.n    <7f5>
> +
> +   - ARMv6+ big endian (BE8):
> +
> +   ARMv6+ -mbig-endian generates mixed endianness code vs data: little-endian
> +   code and big-endian data.  The data value of the signature needs to have its
> +   byte order reversed to generate the trap instruction:
> +
> +   Data: 0xf3def5e7
> +
> +   Translates to this A32 instruction pattern:
> +
> +   e7f5def3    udf    #24035    ; 0x5de3
> +
> +   Translates to this T16 instruction pattern:
> +
> +   def3        udf    #243      ; 0xf3
> +   e7f5        b.n    <7f5>
> +
> +   - Prior to ARMv6 big endian (BE32):
> +
> +   Prior to ARMv6, -mbig-endian generates big-endian code and data
> +   (which match), so the endianness of the data representation of the
> +   signature should not be reversed.  However, the choice between BE32
> +   and BE8 is done by the linker, so we cannot know whether code and
> +   data endianness will be mixed before the linker is invoked.  So rather
> +   than try to play tricks with the linker, the rseq signature is simply
> +   data (not a trap instruction) prior to ARMv6 on big endian.  This is
> +   why the signature is expressed as data (.word) rather than as
> +   instruction (.inst) in assembler.  */
> +
> +#ifdef __ARMEB__
> +# define RSEQ_SIG    0xf3def5e7      /* udf    #24035    ; 0x5de3 (ARMv6+) */
> +#else
> +# define RSEQ_SIG    0xe7f5def3      /* udf    #24035    ; 0x5de3 */
> +#endif
> diff --git a/sysdeps/unix/sysv/linux/bits/rseq.h b/sysdeps/unix/sysv/linux/bits/rseq.h
> new file mode 100644
> index 0000000000..46cf5d1c74
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/bits/rseq.h
> @@ -0,0 +1,29 @@
> +/* Restartable Sequences architecture header.  Stub version.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef _SYS_RSEQ_H
> +# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead."
> +#endif
> +
> +/* RSEQ_SIG is a signature required before each abort handler code.
> +
> +   It is a 32-bit value that maps to actual architecture code compiled
> +   into applications and libraries.  It needs to be defined for each
> +   architecture.  When choosing this value, it needs to be taken into
> +   account that generating invalid instructions may have ill effects on
> +   tools like objdump, and may also have impact on the CPU speculative
> +   execution efficiency in some cases.  */
> diff --git a/sysdeps/unix/sysv/linux/mips/bits/rseq.h b/sysdeps/unix/sysv/linux/mips/bits/rseq.h
> new file mode 100644
> index 0000000000..a9defee568
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/mips/bits/rseq.h
> @@ -0,0 +1,62 @@
> +/* Restartable Sequences Linux mips architecture header.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef _SYS_RSEQ_H
> +# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead."
> +#endif
> +
> +/* RSEQ_SIG is a signature required before each abort handler code.
> +
> +   It is a 32-bit value that maps to actual architecture code compiled
> +   into applications and libraries.  It needs to be defined for each
> +   architecture.  When choosing this value, it needs to be taken into
> +   account that generating invalid instructions may have ill effects on
> +   tools like objdump, and may also have impact on the CPU speculative
> +   execution efficiency in some cases.
> +
> +   RSEQ_SIG uses the break instruction.  The instruction pattern is:
> +
> +   On MIPS:
> +        0350000d        break     0x350
> +
> +   On nanoMIPS:
> +        00100350        break     0x350
> +
> +   On microMIPS:
> +        0000d407        break     0x350
> +
> +   For nanoMIPS32 and microMIPS, the instruction stream is encoded as
> +   16-bit halfwords, so the signature halfwords need to be swapped
> +   accordingly for little-endian.  */
> +
> +#if defined (__nanomips__)
> +# ifdef __MIPSEL__
> +#  define RSEQ_SIG      0x03500010
> +# else
> +#  define RSEQ_SIG      0x00100350
> +# endif
> +#elif defined (__mips_micromips)
> +# ifdef __MIPSEL__
> +#  define RSEQ_SIG      0xd4070000
> +# else
> +#  define RSEQ_SIG      0x0000d407
> +# endif
> +#elif defined (__mips__)
> +# define RSEQ_SIG       0x0350000d
> +#else
> +/* Unknown MIPS architecture.  */
> +#endif
> diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/rseq.h b/sysdeps/unix/sysv/linux/powerpc/bits/rseq.h
> new file mode 100644
> index 0000000000..05b3cf7b8f
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/powerpc/bits/rseq.h
> @@ -0,0 +1,37 @@
> +/* Restartable Sequences Linux powerpc architecture header.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef _SYS_RSEQ_H
> +# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead."
> +#endif
> +
> +/* RSEQ_SIG is a signature required before each abort handler code.
> +
> +   It is a 32-bit value that maps to actual architecture code compiled
> +   into applications and libraries.  It needs to be defined for each
> +   architecture.  When choosing this value, it needs to be taken into
> +   account that generating invalid instructions may have ill effects on
> +   tools like objdump, and may also have impact on the CPU speculative
> +   execution efficiency in some cases.
> +
> +   RSEQ_SIG uses the following trap instruction:
> +
> +   powerpc-be:    0f e5 00 0b           twui   r5,11
> +   powerpc64-le:  0b 00 e5 0f           twui   r5,11
> +   powerpc64-be:  0f e5 00 0b           twui   r5,11  */
> +
> +#define RSEQ_SIG        0x0fe5000b
> diff --git a/sysdeps/unix/sysv/linux/rseq-internal.h b/sysdeps/unix/sysv/linux/rseq-internal.h
> new file mode 100644
> index 0000000000..909f547825
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/rseq-internal.h
> @@ -0,0 +1,45 @@
> +/* Restartable Sequences internal API.  Linux implementation.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef RSEQ_INTERNAL_H
> +#define RSEQ_INTERNAL_H
> +
> +#include <sysdep.h>
> +#include <errno.h>
> +#include <kernel-features.h>
> +#include <stdio.h>
> +#include <sys/rseq.h>
> +
> +#ifdef RSEQ_SIG
> +static inline void
> +rseq_register_current_thread (struct pthread *self)
> +{
> +  int ret = INTERNAL_SYSCALL_CALL (rseq,
> +                                   &self->rseq_area, sizeof (self->rseq_area),
> +                                   0, RSEQ_SIG);
> +  if (INTERNAL_SYSCALL_ERROR_P (ret))
> +    THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
> +}
> +#else /* RSEQ_SIG */
> +static inline void
> +rseq_register_current_thread (struct pthread *self)
> +{
> +  THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
> +}
> +#endif /* RSEQ_SIG */
> +
> +#endif /* rseq-internal.h */
> diff --git a/sysdeps/unix/sysv/linux/s390/bits/rseq.h b/sysdeps/unix/sysv/linux/s390/bits/rseq.h
> new file mode 100644
> index 0000000000..3030e38f40
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/s390/bits/rseq.h
> @@ -0,0 +1,37 @@
> +/* Restartable Sequences Linux s390 architecture header.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef _SYS_RSEQ_H
> +# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead."
> +#endif
> +
> +/* RSEQ_SIG is a signature required before each abort handler code.
> +
> +   It is a 32-bit value that maps to actual architecture code compiled
> +   into applications and libraries.  It needs to be defined for each
> +   architecture.  When choosing this value, it needs to be taken into
> +   account that generating invalid instructions may have ill effects on
> +   tools like objdump, and may also have impact on the CPU speculative
> +   execution efficiency in some cases.
> +
> +   RSEQ_SIG uses the trap4 instruction.  As Linux does not make use of the
> +   access-register mode nor the linkage stack this instruction will always
> +   cause a special-operation exception (the trap-enabled bit in the DUCT
> +   is and will stay 0).  The instruction pattern is
> +       b2 ff 0f ff        trap4   4095(%r0)  */
> +
> +#define RSEQ_SIG        0xB2FF0FFF
> diff --git a/sysdeps/unix/sysv/linux/sys/rseq.h b/sysdeps/unix/sysv/linux/sys/rseq.h
> new file mode 100644
> index 0000000000..c8edff50d4
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/sys/rseq.h
> @@ -0,0 +1,174 @@
> +/* Restartable Sequences exported symbols.  Linux header.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef _SYS_RSEQ_H
> +#define _SYS_RSEQ_H    1
> +
> +/* Architecture-specific rseq signature.  */
> +#include <bits/rseq.h>
> +
> +#include <stdint.h>
> +#include <sys/cdefs.h>
> +#include <bits/endian.h>
> +
> +#ifdef __has_include
> +# if __has_include ("linux/rseq.h")
> +#  define __GLIBC_HAVE_KERNEL_RSEQ
> +# endif
> +#else
> +# include <linux/version.h>
> +# if LINUX_VERSION_CODE >= KERNEL_VERSION (4, 18, 0)
> +#  define __GLIBC_HAVE_KERNEL_RSEQ
> +# endif
> +#endif
> +
> +#ifdef __GLIBC_HAVE_KERNEL_RSEQ
> +/* We use the structures declarations from the kernel headers.  */
> +# include <linux/rseq.h>
> +#else /* __GLIBC_HAVE_KERNEL_RSEQ */
> +/* We use a copy of the include/uapi/linux/rseq.h kernel header.  */
> +
> +enum rseq_cpu_id_state
> +  {
> +    RSEQ_CPU_ID_UNINITIALIZED = -1,
> +    RSEQ_CPU_ID_REGISTRATION_FAILED = -2,
> +  };
> +
> +enum rseq_flags
> +  {
> +    RSEQ_FLAG_UNREGISTER = (1 << 0),
> +  };
> +
> +enum rseq_cs_flags_bit
> +  {
> +    RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0,
> +    RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1,
> +    RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2,
> +  };
> +
> +enum rseq_cs_flags
> +  {
> +    RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT =
> +      (1U << RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT),
> +    RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL =
> +      (1U << RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT),
> +    RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE =
> +      (1U << RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT),
> +  };
> +
> +/* struct rseq_cs is aligned on 32 bytes to ensure it is always
> +   contained within a single cache-line.  It is usually declared as
> +   link-time constant data.  */
> +struct rseq_cs
> +  {
> +    /* Version of this structure.  */
> +    uint32_t version;
> +    /* enum rseq_cs_flags.  */
> +    uint32_t flags;
> +    uint64_t start_ip;
> +    /* Offset from start_ip.  */
> +    uint64_t post_commit_offset;
> +    uint64_t abort_ip;
> +  } __attribute__ ((__aligned__ (32)));
> +
> +/* struct rseq is aligned on 32 bytes to ensure it is always
> +   contained within a single cache-line.
> +
> +   A single struct rseq per thread is allowed.  */
> +struct rseq
> +  {
> +    /* Restartable sequences cpu_id_start field.  Updated by the
> +       kernel.  Read by user-space with single-copy atomicity
> +       semantics.  This field should only be read by the thread which
> +       registered this data structure.  Aligned on 32-bit.  Always
> +       contains a value in the range of possible CPUs, although the
> +       value may not be the actual current CPU (e.g. if rseq is not
> +       initialized).  This CPU number value should always be compared
> +       against the value of the cpu_id field before performing a rseq
> +       commit or returning a value read from a data structure indexed
> +       using the cpu_id_start value.  */
> +    uint32_t cpu_id_start;
> +    /* Restartable sequences cpu_id field.  Updated by the kernel.
> +       Read by user-space with single-copy atomicity semantics.  This
> +       field should only be read by the thread which registered this
> +       data structure.  Aligned on 32-bit.  Values
> +       RSEQ_CPU_ID_UNINITIALIZED and RSEQ_CPU_ID_REGISTRATION_FAILED
> +       have a special semantic: the former means "rseq uninitialized",
> +       and latter means "rseq initialization failed".  This value is
> +       meant to be read within rseq critical sections and compared
> +       with the cpu_id_start value previously read, before performing
> +       the commit instruction, or read and compared with the
> +       cpu_id_start value before returning a value loaded from a data
> +       structure indexed using the cpu_id_start value.  */
> +    uint32_t cpu_id;
> +    /* Restartable sequences rseq_cs field.
> +
> +       Contains NULL when no critical section is active for the current
> +       thread, or holds a pointer to the currently active struct rseq_cs.
> +
> +       Updated by user-space, which sets the address of the currently
> +       active rseq_cs at the beginning of assembly instruction sequence
> +       block, and set to NULL by the kernel when it restarts an assembly
> +       instruction sequence block, as well as when the kernel detects that
> +       it is preempting or delivering a signal outside of the range
> +       targeted by the rseq_cs.  Also needs to be set to NULL by user-space
> +       before reclaiming memory that contains the targeted struct rseq_cs.
> +
> +       Read and set by the kernel.  Set by user-space with single-copy
> +       atomicity semantics.  This field should only be updated by the
> +       thread which registered this data structure.  Aligned on 64-bit.  */
> +    union
> +      {
> +        uint64_t ptr64;
> +# ifdef __LP64__
> +        uint64_t ptr;
> +# else /* __LP64__ */
> +        struct
> +          {
> +#if __BYTE_ORDER == __BIG_ENDIAN
> +            uint32_t padding; /* Initialized to zero.  */
> +            uint32_t ptr32;
> +#  else /* LITTLE */
> +            uint32_t ptr32;
> +            uint32_t padding; /* Initialized to zero.  */
> +#  endif /* ENDIAN */
> +          } ptr;
> +# endif /* __LP64__ */
> +      } rseq_cs;
> +
> +    /* Restartable sequences flags field.
> +
> +       This field should only be updated by the thread which
> +       registered this data structure.  Read by the kernel.
> +       Mainly used for single-stepping through rseq critical sections
> +       with debuggers.
> +
> +       - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT
> +           Inhibit instruction sequence block restart on preemption
> +           for this thread.
> +       - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL
> +           Inhibit instruction sequence block restart on signal
> +           delivery for this thread.
> +       - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE
> +           Inhibit instruction sequence block restart on migration for
> +           this thread.  */
> +    uint32_t flags;
> +  } __attribute__ ((__aligned__ (32)));
> +
> +#endif /* __GLIBC_HAVE_KERNEL_RSEQ */
> +
> +#endif /* sys/rseq.h */
> diff --git a/sysdeps/unix/sysv/linux/tst-rseq-nptl.c b/sysdeps/unix/sysv/linux/tst-rseq-nptl.c
> new file mode 100644
> index 0000000000..d31d94445c
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/tst-rseq-nptl.c
> @@ -0,0 +1,260 @@
> +/* Restartable Sequences NPTL test.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +/* These tests validate that rseq is registered from various execution
> +   contexts (main thread, destructor, other threads, other threads created
> +   from destructor, forked process (without exec), pthread_atfork handlers,
> +   pthread setspecific destructors, signal handlers, atexit handlers).
> +
> +   See the Linux kernel selftests for extensive rseq stress-tests.  */
> +
> +#include <stdio.h>
> +#include <support/check.h>
> +#include <support/xthread.h>
> +#include <sys/rseq.h>
> +#include <unistd.h>
> +
> +#ifdef RSEQ_SIG
> +# include <array_length.h>
> +# include <errno.h>
> +# include <error.h>
> +# include <pthread.h>
> +# include <signal.h>
> +# include <stdlib.h>
> +# include <string.h>
> +# include <support/namespace.h>
> +# include <support/xsignal.h>
> +# include <syscall.h>
> +# include <sys/types.h>
> +# include <sys/wait.h>
> +# include "tst-rseq.h"
> +
> +static pthread_key_t rseq_test_key;
> +
> +static void
> +atfork_prepare (void)
> +{
> +  if (!rseq_thread_registered ())
> +    {
> +      printf ("error: rseq not registered in pthread atfork prepare\n");
> +      support_record_failure ();
> +    }
> +}
> +
> +static void
> +atfork_parent (void)
> +{
> +  if (!rseq_thread_registered ())
> +    {
> +      printf ("error: rseq not registered in pthread atfork parent\n");
> +      support_record_failure ();
> +    }
> +}
> +
> +static void
> +atfork_child (void)
> +{
> +  if (!rseq_thread_registered ())
> +    {
> +      printf ("error: rseq not registered in pthread atfork child\n");
> +      support_record_failure ();
> +    }
> +}
> +
> +static void
> +rseq_key_destructor (void *arg)
> +{
> +  /* Cannot use deferred failure reporting after main returns.  */
> +  if (!rseq_thread_registered ())
> +    FAIL_EXIT1 ("rseq not registered in pthread key destructor");
> +}
> +
> +static void
> +atexit_handler (void)
> +{
> +  /* Cannot use deferred failure reporting after main returns.  */
> +  if (!rseq_thread_registered ())
> +    FAIL_EXIT1 ("rseq not registered in atexit handler");
> +}
> +
> +/* Used to avoid -Werror=stringop-overread warning with
> +   pthread_setspecific and GCC 11.  */
> +static char one = 1;
> +
> +static void
> +do_rseq_main_test (void)
> +{
> +  TEST_COMPARE (atexit (atexit_handler), 0);
> +  rseq_test_key = xpthread_key_create (rseq_key_destructor);
> +  TEST_COMPARE (pthread_atfork (atfork_prepare, atfork_parent, atfork_child), 0);
> +  xraise (SIGUSR1);
> +  TEST_COMPARE (pthread_setspecific (rseq_test_key, &one), 0);
> +  TEST_VERIFY_EXIT (rseq_thread_registered ());
> +}
> +
> +static void
> +cancel_routine (void *arg)
> +{
> +  if (!rseq_thread_registered ())
> +    {
> +      printf ("error: rseq not registered in cancel routine\n");
> +      support_record_failure ();
> +    }
> +}
> +
> +static pthread_barrier_t cancel_thread_barrier;
> +static pthread_cond_t cancel_thread_cond = PTHREAD_COND_INITIALIZER;
> +static pthread_mutex_t cancel_thread_mutex = PTHREAD_MUTEX_INITIALIZER;
> +
> +static void
> +test_cancel_thread (void)
> +{
> +  pthread_cleanup_push (cancel_routine, NULL);
> +  (void) xpthread_barrier_wait (&cancel_thread_barrier);
> +  /* Wait forever until cancellation.  */
> +  xpthread_cond_wait (&cancel_thread_cond, &cancel_thread_mutex);
> +  pthread_cleanup_pop (0);
> +}
> +
> +static void *
> +thread_function (void * arg)
> +{
> +  int i = (int) (intptr_t) arg;
> +
> +  xraise (SIGUSR1);
> +  if (i == 0)
> +    test_cancel_thread ();
> +  TEST_COMPARE (pthread_setspecific (rseq_test_key, &one), 0);
> +  return rseq_thread_registered () ? NULL : (void *) 1l;
> +}
> +
> +static void
> +sighandler (int sig)
> +{
> +  if (!rseq_thread_registered ())
> +    {
> +      printf ("error: rseq not registered in signal handler\n");
> +      support_record_failure ();
> +    }
> +}
> +
> +static void
> +setup_signals (void)
> +{
> +  struct sigaction sa;
> +
> +  sigemptyset (&sa.sa_mask);
> +  sigaddset (&sa.sa_mask, SIGUSR1);
> +  sa.sa_flags = 0;
> +  sa.sa_handler = sighandler;
> +  xsigaction (SIGUSR1, &sa, NULL);
> +}
> +
> +static int
> +do_rseq_threads_test (int nr_threads)
> +{
> +  pthread_t th[nr_threads];
> +  int i;
> +  int result = 0;
> +
> +  xpthread_barrier_init (&cancel_thread_barrier, NULL, 2);
> +
> +  for (i = 0; i < nr_threads; ++i)
> +    th[i] = xpthread_create (NULL, thread_function,
> +                             (void *) (intptr_t) i);
> +
> +  (void) xpthread_barrier_wait (&cancel_thread_barrier);
> +
> +  xpthread_cancel (th[0]);
> +
> +  for (i = 0; i < nr_threads; ++i)
> +    {
> +      void *v;
> +
> +      v = xpthread_join (th[i]);
> +      if (i != 0 && v != NULL)
> +        {
> +          printf ("error: join %d successful, but child failed\n", i);
> +          result = 1;
> +        }
> +      else if (i == 0 && v == NULL)
> +        {
> +          printf ("error: join %d successful, child did not fail as expected\n", i);
> +          result = 1;
> +        }
> +    }
> +
> +  xpthread_barrier_destroy (&cancel_thread_barrier);
> +
> +  return result;
> +}
> +
> +static void
> +subprocess_callback (void *closure)
> +{
> +  do_rseq_main_test ();
> +}
> +
> +static void
> +do_rseq_fork_test (void)
> +{
> +  support_isolate_in_subprocess (subprocess_callback, NULL);
> +}
> +
> +static int
> +do_rseq_test (void)
> +{
> +  int t[] = { 1, 2, 6, 5, 4, 3, 50 };
> +  int i, result = 0;
> +
> +  if (!rseq_available ())
> +    FAIL_UNSUPPORTED ("kernel does not support rseq, skipping test");
> +  setup_signals ();
> +  xraise (SIGUSR1);
> +  do_rseq_main_test ();
> +  for (i = 0; i < array_length (t); i++)
> +    if (do_rseq_threads_test (t[i]))
> +      result = 1;
> +  do_rseq_fork_test ();
> +  return result;
> +}
> +
> +static void __attribute__ ((destructor))
> +do_rseq_destructor_test (void)
> +{
> +  /* Cannot use deferred failure reporting after main returns.  */
> +  if (do_rseq_test ())
> +    FAIL_EXIT1 ("rseq not registered within destructor");
> +  xpthread_key_delete (rseq_test_key);
> +}
> +
> +#else /* RSEQ_SIG */
> +static int
> +do_rseq_test (void)
> +{
> +  FAIL_UNSUPPORTED ("glibc does not define RSEQ_SIG, skipping test");
> +  return 0;
> +}
> +#endif /* RSEQ_SIG */
> +
> +static int
> +do_test (void)
> +{
> +  return do_rseq_test ();
> +}
> +
> +#include <support/test-driver.c>
> diff --git a/sysdeps/unix/sysv/linux/tst-rseq.c b/sysdeps/unix/sysv/linux/tst-rseq.c
> new file mode 100644
> index 0000000000..926376b6a5
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/tst-rseq.c
> @@ -0,0 +1,64 @@
> +/* Restartable Sequences single-threaded tests.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +/* These tests validate that rseq is registered from main in an executable
> +   not linked against libpthread.  */
> +
> +#include <support/check.h>
> +#include <stdio.h>
> +#include <sys/rseq.h>
> +#include <unistd.h>
> +
> +#ifdef RSEQ_SIG
> +# include <errno.h>
> +# include <error.h>
> +# include <stdlib.h>
> +# include <string.h>
> +# include <syscall.h>
> +# include "tst-rseq.h"
> +
> +static void
> +do_rseq_main_test (void)
> +{
> +  TEST_VERIFY_EXIT (rseq_thread_registered ());
> +}
> +
> +static void
> +do_rseq_test (void)
> +{
> +  if (!rseq_available ())
> +    {
> +      FAIL_UNSUPPORTED ("kernel does not support rseq, skipping test");
> +    }
> +  do_rseq_main_test ();
> +}
> +#else /* RSEQ_SIG */
> +static void
> +do_rseq_test (void)
> +{
> +  FAIL_UNSUPPORTED ("glibc does not define RSEQ_SIG, skipping test");
> +}
> +#endif /* RSEQ_SIG */
> +
> +static int
> +do_test (void)
> +{
> +  do_rseq_test ();
> +  return 0;
> +}

Should the test possibly include a simple critical section? Maybe a while(1)
and either timesout or hits the abort handler?
Timeout -> error.
abort handler -> test passed.
> +
> +#include <support/test-driver.c>
> diff --git a/sysdeps/unix/sysv/linux/tst-rseq.h b/sysdeps/unix/sysv/linux/tst-rseq.h
> new file mode 100644
> index 0000000000..a476c316fc
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/tst-rseq.h
> @@ -0,0 +1,57 @@
> +/* Restartable Sequences tests header.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <errno.h>
> +#include <error.h>
> +#include <stdbool.h>
> +#include <stdint.h>
> +#include <support/check.h>
> +#include <syscall.h>
> +#include <sys/rseq.h>
> +#include <tls.h>
> +
> +static inline bool
> +rseq_thread_registered (void)
> +{
> +  return THREAD_GETMEM_VOLATILE (THREAD_SELF, rseq_area.cpu_id) >= 0;
> +}
> +
> +static inline int
> +sys_rseq (struct rseq *rseq_abi, uint32_t rseq_len, int flags, uint32_t sig)
> +{
> +  return syscall (__NR_rseq, rseq_abi, rseq_len, flags, sig);
> +}
> +
> +static inline bool
> +rseq_available (void)
> +{
> +  int rc;
> +
> +  rc = sys_rseq (NULL, 0, 0, 0);
> +  if (rc != -1)
> +    FAIL_EXIT1 ("Unexpected rseq return value %d", rc);
> +  switch (errno)
> +    {
> +    case ENOSYS:
> +      return false;
> +    case EINVAL:
> +      /* rseq is implemented, but detected an invalid rseq_len parameter.  */
> +      return true;
> +    default:
> +      FAIL_EXIT1 ("Unexpected rseq error %s", strerror (errno));
> +    }
> +}
> diff --git a/sysdeps/unix/sysv/linux/x86/bits/rseq.h b/sysdeps/unix/sysv/linux/x86/bits/rseq.h
> new file mode 100644
> index 0000000000..9fc909e7c8
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/x86/bits/rseq.h
> @@ -0,0 +1,30 @@
> +/* Restartable Sequences Linux x86 architecture header.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef _SYS_RSEQ_H
> +# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead."
> +#endif
> +
> +/* RSEQ_SIG is a signature required before each abort handler code.
> +
> +   RSEQ_SIG is used with the following reserved undefined instructions, which
> +   trap in user-space:
> +
> +   x86-32:    0f b9 3d 53 30 05 53      ud1    0x53053053,%edi
> +   x86-64:    0f b9 3d 53 30 05 53      ud1    0x53053053(%rip),%edi  */
> +
> +#define RSEQ_SIG        0x53053053
> --
> 2.33.1
>
>
Florian Weimer Dec. 9, 2021, 7:42 a.m. UTC | #6
* Siddhesh Poyarekar:

> On 12/8/21 23:38, Florian Weimer wrote:
>> * Siddhesh Poyarekar:
>> 
>>>> +#ifdef RSEQ_SIG
>>>> +static inline void
>>>> +rseq_register_current_thread (struct pthread *self)
>>>> +{
>>>> +  int ret = INTERNAL_SYSCALL_CALL (rseq,
>>>> +                                   &self->rseq_area, sizeof (self->rseq_area),
>>>> +                                   0, RSEQ_SIG);
>>>> +  if (INTERNAL_SYSCALL_ERROR_P (ret))
>>>> +    THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
>>>
>>> Why can't we just leave it as the kernel did when it failed the
>>> syscall?
>> The kernel definitely won't write anything if the failure is ENOSYS.
>> I
>> don't expect the kernel to write something for the other failures,
>> either.
>
> OK, I interpreted the from the outdated manpage patch[1] that the
> kernel ensures that uninitialized cpu_id will be read as -1.  I read
> the rseq implementation in the kernel and saw that there are a number
> of error paths where the kernel simply returns without touching the
> user memory.   I suppose what they meant by "uninitialized" in the
> manpage is actually "reset after unregister", which is odd.
>
> In any case, what I meant to eventually get at (sorry I wasn't
> specific; I wrote both patch reviews together and didn't realize
> they'd be read as separate emails!) is that RSEQ_CPU_ID_UNINITIALIZED
> seemed enough for all use cases and RSEQ_CPU_ID_REGISTRATION_FAILED
> seemed unnecessary.

Yes, but the constant is (also) defined in the UAPI headers, so it's
value is fixed.  And RSEQ_CPU_ID_REGISTRATION_FAILED (that is, -2)
is closer to the behavior we want to trigger in application (that there
is nothing to register because we already tried and failed).

Thanks,
Florian
Siddhesh Poyarekar Dec. 9, 2021, 8:01 a.m. UTC | #7
On 12/9/21 13:12, Florian Weimer wrote:
> * Siddhesh Poyarekar:
> 
>> On 12/8/21 23:38, Florian Weimer wrote:
>>> * Siddhesh Poyarekar:
>>>
>>>>> +#ifdef RSEQ_SIG
>>>>> +static inline void
>>>>> +rseq_register_current_thread (struct pthread *self)
>>>>> +{
>>>>> +  int ret = INTERNAL_SYSCALL_CALL (rseq,
>>>>> +                                   &self->rseq_area, sizeof (self->rseq_area),
>>>>> +                                   0, RSEQ_SIG);
>>>>> +  if (INTERNAL_SYSCALL_ERROR_P (ret))
>>>>> +    THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
>>>>
>>>> Why can't we just leave it as the kernel did when it failed the
>>>> syscall?
>>> The kernel definitely won't write anything if the failure is ENOSYS.
>>> I
>>> don't expect the kernel to write something for the other failures,
>>> either.
>>
>> OK, I interpreted the from the outdated manpage patch[1] that the
>> kernel ensures that uninitialized cpu_id will be read as -1.  I read
>> the rseq implementation in the kernel and saw that there are a number
>> of error paths where the kernel simply returns without touching the
>> user memory.   I suppose what they meant by "uninitialized" in the
>> manpage is actually "reset after unregister", which is odd.
>>
>> In any case, what I meant to eventually get at (sorry I wasn't
>> specific; I wrote both patch reviews together and didn't realize
>> they'd be read as separate emails!) is that RSEQ_CPU_ID_UNINITIALIZED
>> seemed enough for all use cases and RSEQ_CPU_ID_REGISTRATION_FAILED
>> seemed unnecessary.
> 
> Yes, but the constant is (also) defined in the UAPI headers, so it's
> value is fixed.  And RSEQ_CPU_ID_REGISTRATION_FAILED (that is, -2)
> is closer to the behavior we want to trigger in application (that there
> is nothing to register because we already tried and failed).

OK, I see it in the headers, sorry.  Once again I assumed only 
RSEQ_CPU_ID_UNINITIALIZED was defined because the man page didn't 
specify it :/

It's redundant IMO, but that's a Linux API problem.  No objections from 
me then.

Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
diff mbox series

Patch

diff --git a/nptl/descr.h b/nptl/descr.h
index af2a6ab87a..92db305913 100644
--- a/nptl/descr.h
+++ b/nptl/descr.h
@@ -34,6 +34,7 @@ 
 #include <bits/types/res_state.h>
 #include <kernel-features.h>
 #include <tls-internal-struct.h>
+#include <sys/rseq.h>
 
 #ifndef TCB_ALIGNMENT
 # define TCB_ALIGNMENT 32
@@ -406,6 +407,9 @@  struct pthread
   /* Used on strsignal.  */
   struct tls_internal_t tls_state;
 
+  /* rseq area registered with the kernel.  */
+  struct rseq rseq_area;
+
   /* This member must be last.  */
   char end_padding[];
 
diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
index bad9eeb52f..ea0d79341e 100644
--- a/nptl/pthread_create.c
+++ b/nptl/pthread_create.c
@@ -32,6 +32,7 @@ 
 #include <default-sched.h>
 #include <futex-internal.h>
 #include <tls-setup.h>
+#include <rseq-internal.h>
 #include "libioP.h"
 #include <sys/single_threaded.h>
 #include <version.h>
@@ -366,6 +367,9 @@  start_thread (void *arg)
   /* Initialize pointers to locale data.  */
   __ctype_init ();
 
+  /* Register rseq TLS to the kernel.  */
+  rseq_register_current_thread (pd);
+
 #ifndef __ASSUME_SET_ROBUST_LIST
   if (__nptl_set_robust_list_avail)
 #endif
@@ -571,6 +575,15 @@  out:
      process is really dead since 'clone' got passed the CLONE_CHILD_CLEARTID
      flag.  The 'tid' field in the TCB will be set to zero.
 
+     rseq TLS is still registered at this point.  Rely on implicit
+     unregistration performed by the kernel on thread teardown.  This is not a
+     problem because the rseq TLS lives on the stack, and the stack outlives
+     the thread.  If TCB allocation is ever changed, additional steps may be
+     required, such as performing explicit rseq unregistration before
+     reclaiming the rseq TLS area memory.  It is NOT sufficient to block
+     signals because the kernel may write to the rseq area even without
+     signals.
+
      The exit code is zero since in case all threads exit by calling
      'pthread_exit' the exit status must be 0 (zero).  */
   while (1)
diff --git a/sysdeps/nptl/dl-tls_init_tp.c b/sysdeps/nptl/dl-tls_init_tp.c
index ca494dd3a5..fedb876fdb 100644
--- a/sysdeps/nptl/dl-tls_init_tp.c
+++ b/sysdeps/nptl/dl-tls_init_tp.c
@@ -21,6 +21,7 @@ 
 #include <list.h>
 #include <pthreadP.h>
 #include <tls.h>
+#include <rseq-internal.h>
 
 #ifndef __ASSUME_SET_ROBUST_LIST
 bool __nptl_set_robust_list_avail;
@@ -57,11 +58,12 @@  __tls_pre_init_tp (void)
 void
 __tls_init_tp (void)
 {
+  struct pthread *pd = THREAD_SELF;
+
   /* Set up thread stack list management.  */
-  list_add (&THREAD_SELF->list, &GL (dl_stack_user));
+  list_add (&pd->list, &GL (dl_stack_user));
 
    /* Early initialization of the TCB.   */
-   struct pthread *pd = THREAD_SELF;
    pd->tid = INTERNAL_SYSCALL_CALL (set_tid_address, &pd->tid);
    THREAD_SETMEM (pd, specific[0], &pd->specific_1stblock[0]);
    THREAD_SETMEM (pd, user_stack, true);
@@ -90,6 +92,8 @@  __tls_init_tp (void)
       }
   }
 
+  rseq_register_current_thread (pd);
+
   /* Set initial thread's stack block from 0 up to __libc_stack_end.
      It will be bigger than it actually is, but for unwind.c/pt-longjmp.c
      purposes this is good enough.  */
diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
index 29c6c78f98..eb0f5fc021 100644
--- a/sysdeps/unix/sysv/linux/Makefile
+++ b/sysdeps/unix/sysv/linux/Makefile
@@ -131,7 +131,10 @@  ifeq ($(have-GLIBC_2.27)$(build-shared),yesyes)
 tests += tst-ofdlocks-compat
 endif
 
-tests-internal += tst-sigcontext-get_pc
+tests-internal += \
+  tst-rseq \
+  tst-sigcontext-get_pc \
+  # tests-internal
 
 tests-time64 += \
   tst-adjtimex-time64 \
@@ -357,4 +360,8 @@  endif
 
 ifeq ($(subdir),nptl)
 tests += tst-align-clone tst-getpid1
+
+# tst-rseq-nptl is an internal test because it requires a definition of
+# __NR_rseq from the internal system call list.
+tests-internal += tst-rseq-nptl
 endif
diff --git a/sysdeps/unix/sysv/linux/aarch64/bits/rseq.h b/sysdeps/unix/sysv/linux/aarch64/bits/rseq.h
new file mode 100644
index 0000000000..9ba92725c7
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/aarch64/bits/rseq.h
@@ -0,0 +1,43 @@ 
+/* Restartable Sequences Linux aarch64 architecture header.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _SYS_RSEQ_H
+# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead."
+#endif
+
+/* RSEQ_SIG is a signature required before each abort handler code.
+
+   It is a 32-bit value that maps to actual architecture code compiled
+   into applications and libraries.  It needs to be defined for each
+   architecture.  When choosing this value, it needs to be taken into
+   account that generating invalid instructions may have ill effects on
+   tools like objdump, and may also have impact on the CPU speculative
+   execution efficiency in some cases.
+
+   aarch64 -mbig-endian generates mixed endianness code vs data:
+   little-endian code and big-endian data.  Ensure the RSEQ_SIG signature
+   matches code endianness.  */
+
+#define RSEQ_SIG_CODE  0xd428bc00  /* BRK #0x45E0.  */
+
+#ifdef __AARCH64EB__
+# define RSEQ_SIG_DATA 0x00bc28d4  /* BRK #0x45E0.  */
+#else
+# define RSEQ_SIG_DATA RSEQ_SIG_CODE
+#endif
+
+#define RSEQ_SIG       RSEQ_SIG_DATA
diff --git a/sysdeps/unix/sysv/linux/arm/bits/rseq.h b/sysdeps/unix/sysv/linux/arm/bits/rseq.h
new file mode 100644
index 0000000000..0542b26f6a
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/arm/bits/rseq.h
@@ -0,0 +1,83 @@ 
+/* Restartable Sequences Linux arm architecture header.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _SYS_RSEQ_H
+# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead."
+#endif
+
+/*
+   RSEQ_SIG is a signature required before each abort handler code.
+
+   It is a 32-bit value that maps to actual architecture code compiled
+   into applications and libraries.  It needs to be defined for each
+   architecture.  When choosing this value, it needs to be taken into
+   account that generating invalid instructions may have ill effects on
+   tools like objdump, and may also have impact on the CPU speculative
+   execution efficiency in some cases.
+
+   - ARM little endian
+
+   RSEQ_SIG uses the udf A32 instruction with an uncommon immediate operand
+   value 0x5de3.  This traps if user-space reaches this instruction by mistake,
+   and the uncommon operand ensures the kernel does not move the instruction
+   pointer to attacker-controlled code on rseq abort.
+
+   The instruction pattern in the A32 instruction set is:
+
+   e7f5def3    udf    #24035    ; 0x5de3
+
+   This translates to the following instruction pattern in the T16 instruction
+   set:
+
+   little endian:
+   def3        udf    #243      ; 0xf3
+   e7f5        b.n    <7f5>
+
+   - ARMv6+ big endian (BE8):
+
+   ARMv6+ -mbig-endian generates mixed endianness code vs data: little-endian
+   code and big-endian data.  The data value of the signature needs to have its
+   byte order reversed to generate the trap instruction:
+
+   Data: 0xf3def5e7
+
+   Translates to this A32 instruction pattern:
+
+   e7f5def3    udf    #24035    ; 0x5de3
+
+   Translates to this T16 instruction pattern:
+
+   def3        udf    #243      ; 0xf3
+   e7f5        b.n    <7f5>
+
+   - Prior to ARMv6 big endian (BE32):
+
+   Prior to ARMv6, -mbig-endian generates big-endian code and data
+   (which match), so the endianness of the data representation of the
+   signature should not be reversed.  However, the choice between BE32
+   and BE8 is done by the linker, so we cannot know whether code and
+   data endianness will be mixed before the linker is invoked.  So rather
+   than try to play tricks with the linker, the rseq signature is simply
+   data (not a trap instruction) prior to ARMv6 on big endian.  This is
+   why the signature is expressed as data (.word) rather than as
+   instruction (.inst) in assembler.  */
+
+#ifdef __ARMEB__
+# define RSEQ_SIG    0xf3def5e7      /* udf    #24035    ; 0x5de3 (ARMv6+) */
+#else
+# define RSEQ_SIG    0xe7f5def3      /* udf    #24035    ; 0x5de3 */
+#endif
diff --git a/sysdeps/unix/sysv/linux/bits/rseq.h b/sysdeps/unix/sysv/linux/bits/rseq.h
new file mode 100644
index 0000000000..46cf5d1c74
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/bits/rseq.h
@@ -0,0 +1,29 @@ 
+/* Restartable Sequences architecture header.  Stub version.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _SYS_RSEQ_H
+# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead."
+#endif
+
+/* RSEQ_SIG is a signature required before each abort handler code.
+
+   It is a 32-bit value that maps to actual architecture code compiled
+   into applications and libraries.  It needs to be defined for each
+   architecture.  When choosing this value, it needs to be taken into
+   account that generating invalid instructions may have ill effects on
+   tools like objdump, and may also have impact on the CPU speculative
+   execution efficiency in some cases.  */
diff --git a/sysdeps/unix/sysv/linux/mips/bits/rseq.h b/sysdeps/unix/sysv/linux/mips/bits/rseq.h
new file mode 100644
index 0000000000..a9defee568
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/mips/bits/rseq.h
@@ -0,0 +1,62 @@ 
+/* Restartable Sequences Linux mips architecture header.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _SYS_RSEQ_H
+# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead."
+#endif
+
+/* RSEQ_SIG is a signature required before each abort handler code.
+
+   It is a 32-bit value that maps to actual architecture code compiled
+   into applications and libraries.  It needs to be defined for each
+   architecture.  When choosing this value, it needs to be taken into
+   account that generating invalid instructions may have ill effects on
+   tools like objdump, and may also have impact on the CPU speculative
+   execution efficiency in some cases.
+
+   RSEQ_SIG uses the break instruction.  The instruction pattern is:
+
+   On MIPS:
+        0350000d        break     0x350
+
+   On nanoMIPS:
+        00100350        break     0x350
+
+   On microMIPS:
+        0000d407        break     0x350
+
+   For nanoMIPS32 and microMIPS, the instruction stream is encoded as
+   16-bit halfwords, so the signature halfwords need to be swapped
+   accordingly for little-endian.  */
+
+#if defined (__nanomips__)
+# ifdef __MIPSEL__
+#  define RSEQ_SIG      0x03500010
+# else
+#  define RSEQ_SIG      0x00100350
+# endif
+#elif defined (__mips_micromips)
+# ifdef __MIPSEL__
+#  define RSEQ_SIG      0xd4070000
+# else
+#  define RSEQ_SIG      0x0000d407
+# endif
+#elif defined (__mips__)
+# define RSEQ_SIG       0x0350000d
+#else
+/* Unknown MIPS architecture.  */
+#endif
diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/rseq.h b/sysdeps/unix/sysv/linux/powerpc/bits/rseq.h
new file mode 100644
index 0000000000..05b3cf7b8f
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/powerpc/bits/rseq.h
@@ -0,0 +1,37 @@ 
+/* Restartable Sequences Linux powerpc architecture header.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _SYS_RSEQ_H
+# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead."
+#endif
+
+/* RSEQ_SIG is a signature required before each abort handler code.
+
+   It is a 32-bit value that maps to actual architecture code compiled
+   into applications and libraries.  It needs to be defined for each
+   architecture.  When choosing this value, it needs to be taken into
+   account that generating invalid instructions may have ill effects on
+   tools like objdump, and may also have impact on the CPU speculative
+   execution efficiency in some cases.
+
+   RSEQ_SIG uses the following trap instruction:
+
+   powerpc-be:    0f e5 00 0b           twui   r5,11
+   powerpc64-le:  0b 00 e5 0f           twui   r5,11
+   powerpc64-be:  0f e5 00 0b           twui   r5,11  */
+
+#define RSEQ_SIG        0x0fe5000b
diff --git a/sysdeps/unix/sysv/linux/rseq-internal.h b/sysdeps/unix/sysv/linux/rseq-internal.h
new file mode 100644
index 0000000000..909f547825
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/rseq-internal.h
@@ -0,0 +1,45 @@ 
+/* Restartable Sequences internal API.  Linux implementation.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef RSEQ_INTERNAL_H
+#define RSEQ_INTERNAL_H
+
+#include <sysdep.h>
+#include <errno.h>
+#include <kernel-features.h>
+#include <stdio.h>
+#include <sys/rseq.h>
+
+#ifdef RSEQ_SIG
+static inline void
+rseq_register_current_thread (struct pthread *self)
+{
+  int ret = INTERNAL_SYSCALL_CALL (rseq,
+                                   &self->rseq_area, sizeof (self->rseq_area),
+                                   0, RSEQ_SIG);
+  if (INTERNAL_SYSCALL_ERROR_P (ret))
+    THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
+}
+#else /* RSEQ_SIG */
+static inline void
+rseq_register_current_thread (struct pthread *self)
+{
+  THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
+}
+#endif /* RSEQ_SIG */
+
+#endif /* rseq-internal.h */
diff --git a/sysdeps/unix/sysv/linux/s390/bits/rseq.h b/sysdeps/unix/sysv/linux/s390/bits/rseq.h
new file mode 100644
index 0000000000..3030e38f40
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/s390/bits/rseq.h
@@ -0,0 +1,37 @@ 
+/* Restartable Sequences Linux s390 architecture header.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _SYS_RSEQ_H
+# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead."
+#endif
+
+/* RSEQ_SIG is a signature required before each abort handler code.
+
+   It is a 32-bit value that maps to actual architecture code compiled
+   into applications and libraries.  It needs to be defined for each
+   architecture.  When choosing this value, it needs to be taken into
+   account that generating invalid instructions may have ill effects on
+   tools like objdump, and may also have impact on the CPU speculative
+   execution efficiency in some cases.
+
+   RSEQ_SIG uses the trap4 instruction.  As Linux does not make use of the
+   access-register mode nor the linkage stack this instruction will always
+   cause a special-operation exception (the trap-enabled bit in the DUCT
+   is and will stay 0).  The instruction pattern is
+       b2 ff 0f ff        trap4   4095(%r0)  */
+
+#define RSEQ_SIG        0xB2FF0FFF
diff --git a/sysdeps/unix/sysv/linux/sys/rseq.h b/sysdeps/unix/sysv/linux/sys/rseq.h
new file mode 100644
index 0000000000..c8edff50d4
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/sys/rseq.h
@@ -0,0 +1,174 @@ 
+/* Restartable Sequences exported symbols.  Linux header.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _SYS_RSEQ_H
+#define _SYS_RSEQ_H	1
+
+/* Architecture-specific rseq signature.  */
+#include <bits/rseq.h>
+
+#include <stdint.h>
+#include <sys/cdefs.h>
+#include <bits/endian.h>
+
+#ifdef __has_include
+# if __has_include ("linux/rseq.h")
+#  define __GLIBC_HAVE_KERNEL_RSEQ
+# endif
+#else
+# include <linux/version.h>
+# if LINUX_VERSION_CODE >= KERNEL_VERSION (4, 18, 0)
+#  define __GLIBC_HAVE_KERNEL_RSEQ
+# endif
+#endif
+
+#ifdef __GLIBC_HAVE_KERNEL_RSEQ
+/* We use the structures declarations from the kernel headers.  */
+# include <linux/rseq.h>
+#else /* __GLIBC_HAVE_KERNEL_RSEQ */
+/* We use a copy of the include/uapi/linux/rseq.h kernel header.  */
+
+enum rseq_cpu_id_state
+  {
+    RSEQ_CPU_ID_UNINITIALIZED = -1,
+    RSEQ_CPU_ID_REGISTRATION_FAILED = -2,
+  };
+
+enum rseq_flags
+  {
+    RSEQ_FLAG_UNREGISTER = (1 << 0),
+  };
+
+enum rseq_cs_flags_bit
+  {
+    RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0,
+    RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1,
+    RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2,
+  };
+
+enum rseq_cs_flags
+  {
+    RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT =
+      (1U << RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT),
+    RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL =
+      (1U << RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT),
+    RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE =
+      (1U << RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT),
+  };
+
+/* struct rseq_cs is aligned on 32 bytes to ensure it is always
+   contained within a single cache-line.  It is usually declared as
+   link-time constant data.  */
+struct rseq_cs
+  {
+    /* Version of this structure.  */
+    uint32_t version;
+    /* enum rseq_cs_flags.  */
+    uint32_t flags;
+    uint64_t start_ip;
+    /* Offset from start_ip.  */
+    uint64_t post_commit_offset;
+    uint64_t abort_ip;
+  } __attribute__ ((__aligned__ (32)));
+
+/* struct rseq is aligned on 32 bytes to ensure it is always
+   contained within a single cache-line.
+
+   A single struct rseq per thread is allowed.  */
+struct rseq
+  {
+    /* Restartable sequences cpu_id_start field.  Updated by the
+       kernel.  Read by user-space with single-copy atomicity
+       semantics.  This field should only be read by the thread which
+       registered this data structure.  Aligned on 32-bit.  Always
+       contains a value in the range of possible CPUs, although the
+       value may not be the actual current CPU (e.g. if rseq is not
+       initialized).  This CPU number value should always be compared
+       against the value of the cpu_id field before performing a rseq
+       commit or returning a value read from a data structure indexed
+       using the cpu_id_start value.  */
+    uint32_t cpu_id_start;
+    /* Restartable sequences cpu_id field.  Updated by the kernel.
+       Read by user-space with single-copy atomicity semantics.  This
+       field should only be read by the thread which registered this
+       data structure.  Aligned on 32-bit.  Values
+       RSEQ_CPU_ID_UNINITIALIZED and RSEQ_CPU_ID_REGISTRATION_FAILED
+       have a special semantic: the former means "rseq uninitialized",
+       and latter means "rseq initialization failed".  This value is
+       meant to be read within rseq critical sections and compared
+       with the cpu_id_start value previously read, before performing
+       the commit instruction, or read and compared with the
+       cpu_id_start value before returning a value loaded from a data
+       structure indexed using the cpu_id_start value.  */
+    uint32_t cpu_id;
+    /* Restartable sequences rseq_cs field.
+
+       Contains NULL when no critical section is active for the current
+       thread, or holds a pointer to the currently active struct rseq_cs.
+
+       Updated by user-space, which sets the address of the currently
+       active rseq_cs at the beginning of assembly instruction sequence
+       block, and set to NULL by the kernel when it restarts an assembly
+       instruction sequence block, as well as when the kernel detects that
+       it is preempting or delivering a signal outside of the range
+       targeted by the rseq_cs.  Also needs to be set to NULL by user-space
+       before reclaiming memory that contains the targeted struct rseq_cs.
+
+       Read and set by the kernel.  Set by user-space with single-copy
+       atomicity semantics.  This field should only be updated by the
+       thread which registered this data structure.  Aligned on 64-bit.  */
+    union
+      {
+        uint64_t ptr64;
+# ifdef __LP64__
+        uint64_t ptr;
+# else /* __LP64__ */
+        struct
+          {
+#if __BYTE_ORDER == __BIG_ENDIAN
+            uint32_t padding; /* Initialized to zero.  */
+            uint32_t ptr32;
+#  else /* LITTLE */
+            uint32_t ptr32;
+            uint32_t padding; /* Initialized to zero.  */
+#  endif /* ENDIAN */
+          } ptr;
+# endif /* __LP64__ */
+      } rseq_cs;
+
+    /* Restartable sequences flags field.
+
+       This field should only be updated by the thread which
+       registered this data structure.  Read by the kernel.
+       Mainly used for single-stepping through rseq critical sections
+       with debuggers.
+
+       - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT
+           Inhibit instruction sequence block restart on preemption
+           for this thread.
+       - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL
+           Inhibit instruction sequence block restart on signal
+           delivery for this thread.
+       - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE
+           Inhibit instruction sequence block restart on migration for
+           this thread.  */
+    uint32_t flags;
+  } __attribute__ ((__aligned__ (32)));
+
+#endif /* __GLIBC_HAVE_KERNEL_RSEQ */
+
+#endif /* sys/rseq.h */
diff --git a/sysdeps/unix/sysv/linux/tst-rseq-nptl.c b/sysdeps/unix/sysv/linux/tst-rseq-nptl.c
new file mode 100644
index 0000000000..d31d94445c
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/tst-rseq-nptl.c
@@ -0,0 +1,260 @@ 
+/* Restartable Sequences NPTL test.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* These tests validate that rseq is registered from various execution
+   contexts (main thread, destructor, other threads, other threads created
+   from destructor, forked process (without exec), pthread_atfork handlers,
+   pthread setspecific destructors, signal handlers, atexit handlers).
+
+   See the Linux kernel selftests for extensive rseq stress-tests.  */
+
+#include <stdio.h>
+#include <support/check.h>
+#include <support/xthread.h>
+#include <sys/rseq.h>
+#include <unistd.h>
+
+#ifdef RSEQ_SIG
+# include <array_length.h>
+# include <errno.h>
+# include <error.h>
+# include <pthread.h>
+# include <signal.h>
+# include <stdlib.h>
+# include <string.h>
+# include <support/namespace.h>
+# include <support/xsignal.h>
+# include <syscall.h>
+# include <sys/types.h>
+# include <sys/wait.h>
+# include "tst-rseq.h"
+
+static pthread_key_t rseq_test_key;
+
+static void
+atfork_prepare (void)
+{
+  if (!rseq_thread_registered ())
+    {
+      printf ("error: rseq not registered in pthread atfork prepare\n");
+      support_record_failure ();
+    }
+}
+
+static void
+atfork_parent (void)
+{
+  if (!rseq_thread_registered ())
+    {
+      printf ("error: rseq not registered in pthread atfork parent\n");
+      support_record_failure ();
+    }
+}
+
+static void
+atfork_child (void)
+{
+  if (!rseq_thread_registered ())
+    {
+      printf ("error: rseq not registered in pthread atfork child\n");
+      support_record_failure ();
+    }
+}
+
+static void
+rseq_key_destructor (void *arg)
+{
+  /* Cannot use deferred failure reporting after main returns.  */
+  if (!rseq_thread_registered ())
+    FAIL_EXIT1 ("rseq not registered in pthread key destructor");
+}
+
+static void
+atexit_handler (void)
+{
+  /* Cannot use deferred failure reporting after main returns.  */
+  if (!rseq_thread_registered ())
+    FAIL_EXIT1 ("rseq not registered in atexit handler");
+}
+
+/* Used to avoid -Werror=stringop-overread warning with
+   pthread_setspecific and GCC 11.  */
+static char one = 1;
+
+static void
+do_rseq_main_test (void)
+{
+  TEST_COMPARE (atexit (atexit_handler), 0);
+  rseq_test_key = xpthread_key_create (rseq_key_destructor);
+  TEST_COMPARE (pthread_atfork (atfork_prepare, atfork_parent, atfork_child), 0);
+  xraise (SIGUSR1);
+  TEST_COMPARE (pthread_setspecific (rseq_test_key, &one), 0);
+  TEST_VERIFY_EXIT (rseq_thread_registered ());
+}
+
+static void
+cancel_routine (void *arg)
+{
+  if (!rseq_thread_registered ())
+    {
+      printf ("error: rseq not registered in cancel routine\n");
+      support_record_failure ();
+    }
+}
+
+static pthread_barrier_t cancel_thread_barrier;
+static pthread_cond_t cancel_thread_cond = PTHREAD_COND_INITIALIZER;
+static pthread_mutex_t cancel_thread_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void
+test_cancel_thread (void)
+{
+  pthread_cleanup_push (cancel_routine, NULL);
+  (void) xpthread_barrier_wait (&cancel_thread_barrier);
+  /* Wait forever until cancellation.  */
+  xpthread_cond_wait (&cancel_thread_cond, &cancel_thread_mutex);
+  pthread_cleanup_pop (0);
+}
+
+static void *
+thread_function (void * arg)
+{
+  int i = (int) (intptr_t) arg;
+
+  xraise (SIGUSR1);
+  if (i == 0)
+    test_cancel_thread ();
+  TEST_COMPARE (pthread_setspecific (rseq_test_key, &one), 0);
+  return rseq_thread_registered () ? NULL : (void *) 1l;
+}
+
+static void
+sighandler (int sig)
+{
+  if (!rseq_thread_registered ())
+    {
+      printf ("error: rseq not registered in signal handler\n");
+      support_record_failure ();
+    }
+}
+
+static void
+setup_signals (void)
+{
+  struct sigaction sa;
+
+  sigemptyset (&sa.sa_mask);
+  sigaddset (&sa.sa_mask, SIGUSR1);
+  sa.sa_flags = 0;
+  sa.sa_handler = sighandler;
+  xsigaction (SIGUSR1, &sa, NULL);
+}
+
+static int
+do_rseq_threads_test (int nr_threads)
+{
+  pthread_t th[nr_threads];
+  int i;
+  int result = 0;
+
+  xpthread_barrier_init (&cancel_thread_barrier, NULL, 2);
+
+  for (i = 0; i < nr_threads; ++i)
+    th[i] = xpthread_create (NULL, thread_function,
+                             (void *) (intptr_t) i);
+
+  (void) xpthread_barrier_wait (&cancel_thread_barrier);
+
+  xpthread_cancel (th[0]);
+
+  for (i = 0; i < nr_threads; ++i)
+    {
+      void *v;
+
+      v = xpthread_join (th[i]);
+      if (i != 0 && v != NULL)
+        {
+          printf ("error: join %d successful, but child failed\n", i);
+          result = 1;
+        }
+      else if (i == 0 && v == NULL)
+        {
+          printf ("error: join %d successful, child did not fail as expected\n", i);
+          result = 1;
+        }
+    }
+
+  xpthread_barrier_destroy (&cancel_thread_barrier);
+
+  return result;
+}
+
+static void
+subprocess_callback (void *closure)
+{
+  do_rseq_main_test ();
+}
+
+static void
+do_rseq_fork_test (void)
+{
+  support_isolate_in_subprocess (subprocess_callback, NULL);
+}
+
+static int
+do_rseq_test (void)
+{
+  int t[] = { 1, 2, 6, 5, 4, 3, 50 };
+  int i, result = 0;
+
+  if (!rseq_available ())
+    FAIL_UNSUPPORTED ("kernel does not support rseq, skipping test");
+  setup_signals ();
+  xraise (SIGUSR1);
+  do_rseq_main_test ();
+  for (i = 0; i < array_length (t); i++)
+    if (do_rseq_threads_test (t[i]))
+      result = 1;
+  do_rseq_fork_test ();
+  return result;
+}
+
+static void __attribute__ ((destructor))
+do_rseq_destructor_test (void)
+{
+  /* Cannot use deferred failure reporting after main returns.  */
+  if (do_rseq_test ())
+    FAIL_EXIT1 ("rseq not registered within destructor");
+  xpthread_key_delete (rseq_test_key);
+}
+
+#else /* RSEQ_SIG */
+static int
+do_rseq_test (void)
+{
+  FAIL_UNSUPPORTED ("glibc does not define RSEQ_SIG, skipping test");
+  return 0;
+}
+#endif /* RSEQ_SIG */
+
+static int
+do_test (void)
+{
+  return do_rseq_test ();
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/unix/sysv/linux/tst-rseq.c b/sysdeps/unix/sysv/linux/tst-rseq.c
new file mode 100644
index 0000000000..926376b6a5
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/tst-rseq.c
@@ -0,0 +1,64 @@ 
+/* Restartable Sequences single-threaded tests.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* These tests validate that rseq is registered from main in an executable
+   not linked against libpthread.  */
+
+#include <support/check.h>
+#include <stdio.h>
+#include <sys/rseq.h>
+#include <unistd.h>
+
+#ifdef RSEQ_SIG
+# include <errno.h>
+# include <error.h>
+# include <stdlib.h>
+# include <string.h>
+# include <syscall.h>
+# include "tst-rseq.h"
+
+static void
+do_rseq_main_test (void)
+{
+  TEST_VERIFY_EXIT (rseq_thread_registered ());
+}
+
+static void
+do_rseq_test (void)
+{
+  if (!rseq_available ())
+    {
+      FAIL_UNSUPPORTED ("kernel does not support rseq, skipping test");
+    }
+  do_rseq_main_test ();
+}
+#else /* RSEQ_SIG */
+static void
+do_rseq_test (void)
+{
+  FAIL_UNSUPPORTED ("glibc does not define RSEQ_SIG, skipping test");
+}
+#endif /* RSEQ_SIG */
+
+static int
+do_test (void)
+{
+  do_rseq_test ();
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/unix/sysv/linux/tst-rseq.h b/sysdeps/unix/sysv/linux/tst-rseq.h
new file mode 100644
index 0000000000..a476c316fc
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/tst-rseq.h
@@ -0,0 +1,57 @@ 
+/* Restartable Sequences tests header.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <errno.h>
+#include <error.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <support/check.h>
+#include <syscall.h>
+#include <sys/rseq.h>
+#include <tls.h>
+
+static inline bool
+rseq_thread_registered (void)
+{
+  return THREAD_GETMEM_VOLATILE (THREAD_SELF, rseq_area.cpu_id) >= 0;
+}
+
+static inline int
+sys_rseq (struct rseq *rseq_abi, uint32_t rseq_len, int flags, uint32_t sig)
+{
+  return syscall (__NR_rseq, rseq_abi, rseq_len, flags, sig);
+}
+
+static inline bool
+rseq_available (void)
+{
+  int rc;
+
+  rc = sys_rseq (NULL, 0, 0, 0);
+  if (rc != -1)
+    FAIL_EXIT1 ("Unexpected rseq return value %d", rc);
+  switch (errno)
+    {
+    case ENOSYS:
+      return false;
+    case EINVAL:
+      /* rseq is implemented, but detected an invalid rseq_len parameter.  */
+      return true;
+    default:
+      FAIL_EXIT1 ("Unexpected rseq error %s", strerror (errno));
+    }
+}
diff --git a/sysdeps/unix/sysv/linux/x86/bits/rseq.h b/sysdeps/unix/sysv/linux/x86/bits/rseq.h
new file mode 100644
index 0000000000..9fc909e7c8
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86/bits/rseq.h
@@ -0,0 +1,30 @@ 
+/* Restartable Sequences Linux x86 architecture header.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _SYS_RSEQ_H
+# error "Never use <bits/rseq.h> directly; include <sys/rseq.h> instead."
+#endif
+
+/* RSEQ_SIG is a signature required before each abort handler code.
+
+   RSEQ_SIG is used with the following reserved undefined instructions, which
+   trap in user-space:
+
+   x86-32:    0f b9 3d 53 30 05 53      ud1    0x53053053,%edi
+   x86-64:    0f b9 3d 53 30 05 53      ud1    0x53053053(%rip),%edi  */
+
+#define RSEQ_SIG        0x53053053