[v11] nptl: fix potential merge of __rseq_* relro symbols

Message ID 20240703163538.1279426-1-mjeanson@efficios.com
State Committed
Commit 2b92982e2369d292560793bee8e730f695f48ff3
Headers
Series [v11] nptl: fix potential merge of __rseq_* relro symbols |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
redhat-pt-bot/TryBot-32bit success Build for i686
linaro-tcwg-bot/tcwg_glibc_build--master-arm warning Patch is already merged
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 warning Patch is already merged

Commit Message

Michael Jeanson July 3, 2024, 4:35 p.m. UTC
  While working on a patch to add support for the extensible rseq ABI, we
came across an issue where a new 'const' variable would be merged with
the existing '__rseq_size' variable. We tracked this to the use of
'-fmerge-all-constants' which allows the compiler to merge identical
constant variables. This means that all 'const' variables in a compile
unit that are of the same size and are initialized to the same value can
be merged.

In this specific case, on 32 bit systems 'unsigned int' and 'ptrdiff_t'
are both 4 bytes and initialized to 0 which should trigger the merge.
However for reasons we haven't delved into when the attribute 'section
(".data.rel.ro")' is added to the mix, only variables of the same exact
types are merged. As far as we know this behavior is not specified
anywhere and could change with a new compiler version, hence this patch.

Move the definitions of these variables into an assembler file and add
hidden writable aliases for internal use. This has the added bonus of
removing the asm workaround to set the values on rseq registration.

Tested on Debian 12 with GCC 12.2.

Signed-off-by: Michael Jeanson <mjeanson@efficios.com>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Florian Weimer <fweimer@redhat.com>
Cc: DJ Delorie <dj@redhat.com>
---
Changes sinve v10:
- Remove the *_ALIGN macros and use literals for *_SIZE
- Expand the comments to talk about copy relocation
- Add attribute_hidden to externs
- Resend as a single patch
Changes sinve v8:
- Remove superfluous attributes on externs
---
 elf/Makefile                  |  1 +
 elf/dl-rseq-symbols.S         | 64 +++++++++++++++++++++++++++++++++++
 sysdeps/nptl/dl-tls_init_tp.c | 14 ++++----
 3 files changed, 71 insertions(+), 8 deletions(-)
 create mode 100644 elf/dl-rseq-symbols.S
  

Comments

Florian Weimer July 3, 2024, 7:39 p.m. UTC | #1
* Michael Jeanson:

> While working on a patch to add support for the extensible rseq ABI, we
> came across an issue where a new 'const' variable would be merged with
> the existing '__rseq_size' variable. We tracked this to the use of
> '-fmerge-all-constants' which allows the compiler to merge identical
> constant variables. This means that all 'const' variables in a compile
> unit that are of the same size and are initialized to the same value can
> be merged.
>
> In this specific case, on 32 bit systems 'unsigned int' and 'ptrdiff_t'
> are both 4 bytes and initialized to 0 which should trigger the merge.
> However for reasons we haven't delved into when the attribute 'section
> (".data.rel.ro")' is added to the mix, only variables of the same exact
> types are merged. As far as we know this behavior is not specified
> anywhere and could change with a new compiler version, hence this patch.
>
> Move the definitions of these variables into an assembler file and add
> hidden writable aliases for internal use. This has the added bonus of
> removing the asm workaround to set the values on rseq registration.
>
> Tested on Debian 12 with GCC 12.2.
>
> Signed-off-by: Michael Jeanson <mjeanson@efficios.com>
> Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> Cc: Florian Weimer <fweimer@redhat.com>
> Cc: DJ Delorie <dj@redhat.com>

This version looks good to me.

Reviewed-by: Florian Weimer <fweimer@redhat.com>

I'll push it for you.

Thanks,
Florian
  
Adhemerval Zanella Netto July 4, 2024, 1:11 p.m. UTC | #2
On 03/07/24 13:35, Michael Jeanson wrote:
> While working on a patch to add support for the extensible rseq ABI, we
> came across an issue where a new 'const' variable would be merged with
> the existing '__rseq_size' variable. We tracked this to the use of
> '-fmerge-all-constants' which allows the compiler to merge identical
> constant variables. This means that all 'const' variables in a compile
> unit that are of the same size and are initialized to the same value can
> be merged.
> 
> In this specific case, on 32 bit systems 'unsigned int' and 'ptrdiff_t'
> are both 4 bytes and initialized to 0 which should trigger the merge.
> However for reasons we haven't delved into when the attribute 'section
> (".data.rel.ro")' is added to the mix, only variables of the same exact
> types are merged. As far as we know this behavior is not specified
> anywhere and could change with a new compiler version, hence this patch.
> 
> Move the definitions of these variables into an assembler file and add
> hidden writable aliases for internal use. This has the added bonus of
> removing the asm workaround to set the values on rseq registration.
> 
> Tested on Debian 12 with GCC 12.2.
> 
> Signed-off-by: Michael Jeanson <mjeanson@efficios.com>
> Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> Cc: Florian Weimer <fweimer@redhat.com>
> Cc: DJ Delorie <dj@redhat.com>
> ---
> Changes sinve v10:
> - Remove the *_ALIGN macros and use literals for *_SIZE
> - Expand the comments to talk about copy relocation
> - Add attribute_hidden to externs
> - Resend as a single patch
> Changes sinve v8:
> - Remove superfluous attributes on externs
> ---
>  elf/Makefile                  |  1 +
>  elf/dl-rseq-symbols.S         | 64 +++++++++++++++++++++++++++++++++++
>  sysdeps/nptl/dl-tls_init_tp.c | 14 ++++----
>  3 files changed, 71 insertions(+), 8 deletions(-)
>  create mode 100644 elf/dl-rseq-symbols.S
> 
> diff --git a/elf/Makefile b/elf/Makefile
> index a3475f3fb5..147f1d3437 100644
> --- a/elf/Makefile
> +++ b/elf/Makefile
> @@ -73,6 +73,7 @@ dl-routines = \
>    dl-origin \
>    dl-printf \
>    dl-reloc \
> +  dl-rseq-symbols \
>    dl-runtime \
>    dl-scope \
>    dl-setup_hash \
> diff --git a/elf/dl-rseq-symbols.S b/elf/dl-rseq-symbols.S
> new file mode 100644
> index 0000000000..b4bba06a99
> --- /dev/null
> +++ b/elf/dl-rseq-symbols.S
> @@ -0,0 +1,64 @@
> +/* Define symbols used by rseq.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +
> +#if __WORDSIZE == 64
> +#define RSEQ_OFFSET_SIZE	8
> +#else
> +#define RSEQ_OFFSET_SIZE	4
> +#endif

This has broken Hurd:

dl-rseq-symbols.S:21:5: error: "__WORDSIZE" is not defined, evaluates to 0 [-Werror=undef]
   21 | #if __WORDSIZE == 64
      |     ^~~~~~~~~~
cc1: some warnings being treated as errors

Although the fix would be simple (just include wordsize.h), the RSEQ support also
does not make sense to be in generic API anyway.  I am checking this patch to make
Linux only:

diff --git a/elf/Makefile b/elf/Makefile
index 147f1d3437..a3475f3fb5 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -73,7 +73,6 @@ dl-routines = \
   dl-origin \
   dl-printf \
   dl-reloc \
-  dl-rseq-symbols \
   dl-runtime \
   dl-scope \
   dl-setup_hash \
diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
index ae66590e91..097b5a26fc 100644
--- a/sysdeps/unix/sysv/linux/Makefile
+++ b/sysdeps/unix/sysv/linux/Makefile
@@ -616,6 +616,10 @@ tests += \
 endif
 
 ifeq ($(subdir),elf)
+dl-routines += \
+  dl-rseq-symbols \
+  # dl-routines
+
 sysdep-rtld-routines += \
   dl-brk \
   dl-getcwd \
diff --git a/elf/dl-rseq-symbols.S b/sysdeps/unix/sysv/linux/dl-rseq-symbols.S
similarity index 100%
rename from elf/dl-rseq-symbols.S
rename to sysdeps/unix/sysv/linux/dl-rseq-symbols.S
  
Florian Weimer July 4, 2024, 1:24 p.m. UTC | #3
* Adhemerval Zanella Netto:

> Although the fix would be simple (just include wordsize.h), the RSEQ
> support also does not make sense to be in generic API anyway.  I am
> checking this patch to make Linux only:

Makes sense, sorry for missing it.

Thanks,
Florian
  
Michael Jeanson July 4, 2024, 3:27 p.m. UTC | #4
On 2024-07-04 09:11, Adhemerval Zanella Netto wrote:
> 
> This has broken Hurd:
> 
> dl-rseq-symbols.S:21:5: error: "__WORDSIZE" is not defined, evaluates to 0 [-Werror=undef]
>     21 | #if __WORDSIZE == 64
>        |     ^~~~~~~~~~
> cc1: some warnings being treated as errors
> 
> Although the fix would be simple (just include wordsize.h), the RSEQ support also
> does not make sense to be in generic API anyway.  I am checking this patch to make
> Linux only:
> 
> diff --git a/elf/Makefile b/elf/Makefile
> index 147f1d3437..a3475f3fb5 100644
> --- a/elf/Makefile
> +++ b/elf/Makefile
> @@ -73,7 +73,6 @@ dl-routines = \
>     dl-origin \
>     dl-printf \
>     dl-reloc \
> -  dl-rseq-symbols \
>     dl-runtime \
>     dl-scope \
>     dl-setup_hash \
> diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
> index ae66590e91..097b5a26fc 100644
> --- a/sysdeps/unix/sysv/linux/Makefile
> +++ b/sysdeps/unix/sysv/linux/Makefile
> @@ -616,6 +616,10 @@ tests += \
>   endif
>   
>   ifeq ($(subdir),elf)
> +dl-routines += \
> +  dl-rseq-symbols \
> +  # dl-routines
> +
>   sysdep-rtld-routines += \
>     dl-brk \
>     dl-getcwd \
> diff --git a/elf/dl-rseq-symbols.S b/sysdeps/unix/sysv/linux/dl-rseq-symbols.S
> similarity index 100%
> rename from elf/dl-rseq-symbols.S
> rename to sysdeps/unix/sysv/linux/dl-rseq-symbols.S

Thanks for fixing this, I was wondering if there was a document somewhere 
describing at a high level how the glibc build system works? I am used to 
'regular' autotools projects and the glibc stuff seems quite different.

Florian pointed me to build-many-glibcs.py to build test my patches against 
Hurd which I'm looking into at the moment.

Michael
  
Adhemerval Zanella Netto July 8, 2024, 7:19 p.m. UTC | #5
On 04/07/24 12:27, Michael Jeanson wrote:
> On 2024-07-04 09:11, Adhemerval Zanella Netto wrote:
>>
>> This has broken Hurd:
>>
>> dl-rseq-symbols.S:21:5: error: "__WORDSIZE" is not defined, evaluates to 0 [-Werror=undef]
>>     21 | #if __WORDSIZE == 64
>>        |     ^~~~~~~~~~
>> cc1: some warnings being treated as errors
>>
>> Although the fix would be simple (just include wordsize.h), the RSEQ support also
>> does not make sense to be in generic API anyway.  I am checking this patch to make
>> Linux only:
>>
>> diff --git a/elf/Makefile b/elf/Makefile
>> index 147f1d3437..a3475f3fb5 100644
>> --- a/elf/Makefile
>> +++ b/elf/Makefile
>> @@ -73,7 +73,6 @@ dl-routines = \
>>     dl-origin \
>>     dl-printf \
>>     dl-reloc \
>> -  dl-rseq-symbols \
>>     dl-runtime \
>>     dl-scope \
>>     dl-setup_hash \
>> diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
>> index ae66590e91..097b5a26fc 100644
>> --- a/sysdeps/unix/sysv/linux/Makefile
>> +++ b/sysdeps/unix/sysv/linux/Makefile
>> @@ -616,6 +616,10 @@ tests += \
>>   endif
>>     ifeq ($(subdir),elf)
>> +dl-routines += \
>> +  dl-rseq-symbols \
>> +  # dl-routines
>> +
>>   sysdep-rtld-routines += \
>>     dl-brk \
>>     dl-getcwd \
>> diff --git a/elf/dl-rseq-symbols.S b/sysdeps/unix/sysv/linux/dl-rseq-symbols.S
>> similarity index 100%
>> rename from elf/dl-rseq-symbols.S
>> rename to sysdeps/unix/sysv/linux/dl-rseq-symbols.S
> 
> Thanks for fixing this, I was wondering if there was a document somewhere describing at a high level how the glibc build system works? I am used to 'regular' autotools projects and the glibc stuff seems quite different.
> 
> Florian pointed me to build-many-glibcs.py to build test my patches against Hurd which I'm looking into at the moment.

The build-many-glibcs.py should bootstrap a Hurd toolchain so you can check
if everything is ok:

$ build-many-glibcs.py /path/to/workdir checkout
$ build-many-glibcs.py /path/to/workdir host-libraries
$ build-many-glibcs.py /path/to/workdir compilers i686-gnu x86_64-gnu

On /path/to/workdir/src/glibc you can add any modification and thus trigger
a build with:

$ build-many-glibcs.py /path/to/workdir glibcs i686-gnu x86_64-gnu

And on /path/to/workdir/logs/glibcs you will have the logs.  You might need
to adjust the PATH if you want to build manually (and add the --keep all
option as well).
  

Patch

diff --git a/elf/Makefile b/elf/Makefile
index a3475f3fb5..147f1d3437 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -73,6 +73,7 @@  dl-routines = \
   dl-origin \
   dl-printf \
   dl-reloc \
+  dl-rseq-symbols \
   dl-runtime \
   dl-scope \
   dl-setup_hash \
diff --git a/elf/dl-rseq-symbols.S b/elf/dl-rseq-symbols.S
new file mode 100644
index 0000000000..b4bba06a99
--- /dev/null
+++ b/elf/dl-rseq-symbols.S
@@ -0,0 +1,64 @@ 
+/* Define symbols used by rseq.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+#if __WORDSIZE == 64
+#define RSEQ_OFFSET_SIZE	8
+#else
+#define RSEQ_OFFSET_SIZE	4
+#endif
+
+/* Some targets define a macro to denote the zero register.  */
+#undef zero
+
+/* Define 2 symbols: '__rseq_size' is public const and '_rseq_size' (an
+   alias of '__rseq_size') is hidden and writable for internal use by the
+   dynamic linker which will initialize the value both symbols point to
+   before copy relocations take place. */
+
+	.globl	__rseq_size
+	.type	__rseq_size, %object
+	.size	__rseq_size, 4
+	.hidden _rseq_size
+	.globl	_rseq_size
+	.type	_rseq_size, %object
+	.size	_rseq_size, 4
+	.section .data.rel.ro
+	.balign 4
+__rseq_size:
+_rseq_size:
+	.zero	4
+
+/* Define 2 symbols: '__rseq_offset' is public const and '_rseq_offset' (an
+   alias of '__rseq_offset') is hidden and writable for internal use by the
+   dynamic linker which will initialize the value both symbols point to
+   before copy relocations take place. */
+
+	.globl	__rseq_offset
+	.type	__rseq_offset, %object
+	.size	__rseq_offset, RSEQ_OFFSET_SIZE
+	.hidden _rseq_offset
+	.globl	_rseq_offset
+	.type	_rseq_offset, %object
+	.size	_rseq_offset, RSEQ_OFFSET_SIZE
+	.section .data.rel.ro
+	.balign RSEQ_OFFSET_SIZE
+__rseq_offset:
+_rseq_offset:
+	.zero	RSEQ_OFFSET_SIZE
diff --git a/sysdeps/nptl/dl-tls_init_tp.c b/sysdeps/nptl/dl-tls_init_tp.c
index 092c274f36..7eb35fb133 100644
--- a/sysdeps/nptl/dl-tls_init_tp.c
+++ b/sysdeps/nptl/dl-tls_init_tp.c
@@ -45,8 +45,10 @@  rtld_mutex_dummy (pthread_mutex_t *lock)
 #endif
 
 const unsigned int __rseq_flags;
-const unsigned int __rseq_size attribute_relro;
-const ptrdiff_t __rseq_offset attribute_relro;
+
+/* The variables are in .data.relro but are not yet write-protected.  */
+extern unsigned int _rseq_size attribute_hidden;
+extern ptrdiff_t _rseq_offset attribute_hidden;
 
 void
 __tls_pre_init_tp (void)
@@ -105,10 +107,7 @@  __tls_init_tp (void)
     do_rseq = TUNABLE_GET (rseq, int, NULL);
     if (rseq_register_current_thread (pd, do_rseq))
       {
-        /* We need a writable view of the variables.  They are in
-           .data.relro and are not yet write-protected.  */
-        extern unsigned int size __asm__ ("__rseq_size");
-        size = sizeof (pd->rseq_area);
+        _rseq_size = sizeof (pd->rseq_area);
       }
 
 #ifdef RSEQ_SIG
@@ -117,8 +116,7 @@  __tls_init_tp (void)
        all targets support __thread_pointer, so set __rseq_offset only
        if the rseq registration may have happened because RSEQ_SIG is
        defined.  */
-    extern ptrdiff_t offset __asm__ ("__rseq_offset");
-    offset = (char *) &pd->rseq_area - (char *) __thread_pointer ();
+    _rseq_offset = (char *) &pd->rseq_area - (char *) __thread_pointer ();
 #endif
   }