[v7,13/23] aarch64: Enable GCS in dynamic linked exe
Checks
Context |
Check |
Description |
redhat-pt-bot/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
Commit Message
From: Szabolcs Nagy <szabolcs.nagy@arm.com>
Use the dynamic linker start code to enable GCS in the dynamic linked
case after _dl_start returns and before _dl_start_user which marks
the point after which user code may run.
Like in the static linked case this ensures that GCS is enabled on a
top level stack frame.
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
---
sysdeps/aarch64/Makefile | 4 +++-
sysdeps/aarch64/dl-start.S | 25 +++++++++++++++++++++++--
sysdeps/aarch64/rtld-global-offsets.sym | 5 +++++
3 files changed, 31 insertions(+), 3 deletions(-)
Comments
On 03/01/25 12:41, Yury Khrustalev wrote:
> From: Szabolcs Nagy <szabolcs.nagy@arm.com>
>
> Use the dynamic linker start code to enable GCS in the dynamic linked
> case after _dl_start returns and before _dl_start_user which marks
> the point after which user code may run.
>
> Like in the static linked case this ensures that GCS is enabled on a
> top level stack frame.
>
> Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
> ---
> sysdeps/aarch64/Makefile | 4 +++-
> sysdeps/aarch64/dl-start.S | 25 +++++++++++++++++++++++--
> sysdeps/aarch64/rtld-global-offsets.sym | 5 +++++
> 3 files changed, 31 insertions(+), 3 deletions(-)
>
> diff --git a/sysdeps/aarch64/Makefile b/sysdeps/aarch64/Makefile
> index 141d7d9cc2..ca8b96f550 100644
> --- a/sysdeps/aarch64/Makefile
> +++ b/sysdeps/aarch64/Makefile
> @@ -35,7 +35,9 @@ endif
> ifeq ($(subdir),elf)
> sysdep-rtld-routines += dl-start
> sysdep-dl-routines += tlsdesc dl-tlsdesc
> -gen-as-const-headers += dl-link.sym
> +gen-as-const-headers += \
> + dl-link.sym \
> + rtld-global-offsets.sym
>
> tests-internal += tst-ifunc-arg-1 tst-ifunc-arg-2
>
> diff --git a/sysdeps/aarch64/dl-start.S b/sysdeps/aarch64/dl-start.S
> index 3b8eff0acd..3f9faf9d57 100644
> --- a/sysdeps/aarch64/dl-start.S
> +++ b/sysdeps/aarch64/dl-start.S
> @@ -18,6 +18,7 @@
> <https://www.gnu.org/licenses/>. */
>
> #include <sysdep.h>
> +#include <rtld-global-offsets.h>
>
> ENTRY (_start)
> /* Create an initial frame with 0 LR and FP */
> @@ -25,11 +26,32 @@ ENTRY (_start)
> mov x29, #0
> mov x30, #0
>
> + /* Load and relocate all library dependencies. */
> mov x0, sp
> PTR_ARG (0)
> bl _dl_start
> /* Returns user entry point in x0. */
> mov PTR_REG (21), PTR_REG (0)
> +
> + /* Use GL(dl_aarch64_gcs) to set the shadow stack status. */
> + adrp x16, _rtld_local
> + add PTR_REG (16), PTR_REG (16), :lo12:_rtld_local
> + ldr x1, [x16, GL_DL_AARCH64_GCS_OFFSET]
> + cbz x1, L(skip_gcs_enable)
> +
> + /* Enable GCS before user code runs. Note that IFUNC resolvers and
> + LD_AUDIT hooks may run before, but should not create threads. */
> +#define PR_SET_SHADOW_STACK_STATUS 75
> +#define PR_SHADOW_STACK_ENABLE (1UL << 0)
> + mov x0, PR_SET_SHADOW_STACK_STATUS
> + mov x1, PR_SHADOW_STACK_ENABLE
> + mov x2, 0
> + mov x3, 0
> + mov x4, 0
> + mov x8, #SYS_ify(prctl)
> + svc 0x0
Should it handle errors, for the case prctl is filtered or blocked?
> +L(skip_gcs_enable):
> +
> .globl _dl_start_user
> .type _dl_start_user, %function
> _dl_start_user:
> @@ -40,8 +62,7 @@ _dl_start_user:
> /* Compute envp. */
> add PTR_REG (3), PTR_REG (2), PTR_REG (1), lsl PTR_LOG_SIZE
> add PTR_REG (3), PTR_REG (3), PTR_SIZE
> - adrp x16, _rtld_local
> - add PTR_REG (16), PTR_REG (16), :lo12:_rtld_local
> + /* Run the init functions of the loaded modules. */
> ldr PTR_REG (0), [x16]
> bl _dl_init
> /* Load the finalizer function. */
> diff --git a/sysdeps/aarch64/rtld-global-offsets.sym b/sysdeps/aarch64/rtld-global-offsets.sym
> index 23cdaf7d9e..6c0690bb95 100644
> --- a/sysdeps/aarch64/rtld-global-offsets.sym
> +++ b/sysdeps/aarch64/rtld-global-offsets.sym
> @@ -3,8 +3,13 @@
> #include <ldsodefs.h>
>
> #define GLRO_offsetof(name) offsetof (struct rtld_global_ro, _##name)
> +#define GL_offsetof(name) offsetof (struct rtld_global, _##name)
>
> -- Offsets of _rtld_global_ro in libc.so
>
> GLRO_DL_HWCAP_OFFSET GLRO_offsetof (dl_hwcap)
> GLRO_DL_HWCAP2_OFFSET GLRO_offsetof (dl_hwcap2)
> +
> +-- Offsets of _rtld_global in libc.so
> +
> +GL_DL_AARCH64_GCS_OFFSET GL_offsetof (dl_aarch64_gcs)
On Tue, Jan 07, 2025 at 01:50:41PM -0300, Adhemerval Zanella Netto wrote:
>
> > + /* Enable GCS before user code runs. Note that IFUNC resolvers and
> > + LD_AUDIT hooks may run before, but should not create threads. */
> > +#define PR_SET_SHADOW_STACK_STATUS 75
> > +#define PR_SHADOW_STACK_ENABLE (1UL << 0)
> > + mov x0, PR_SET_SHADOW_STACK_STATUS
> > + mov x1, PR_SHADOW_STACK_ENABLE
> > + mov x2, 0
> > + mov x3, 0
> > + mov x4, 0
> > + mov x8, #SYS_ify(prctl)
> > + svc 0x0
>
> Should it handle errors, for the case prctl is filtered or blocked?
There is no need to handle errors here. If this syscall doesn't succeed, it
would be the same as if we didn't make it at all: GCS will remain disabled.
Having a check for error here would be an unnecessary overhead.
Thanks,
Yury
On 08/01/25 10:52, Yury Khrustalev wrote:
> On Tue, Jan 07, 2025 at 01:50:41PM -0300, Adhemerval Zanella Netto wrote:
>>
>>> + /* Enable GCS before user code runs. Note that IFUNC resolvers and
>>> + LD_AUDIT hooks may run before, but should not create threads. */
>>> +#define PR_SET_SHADOW_STACK_STATUS 75
>>> +#define PR_SHADOW_STACK_ENABLE (1UL << 0)
>>> + mov x0, PR_SET_SHADOW_STACK_STATUS
>>> + mov x1, PR_SHADOW_STACK_ENABLE
>>> + mov x2, 0
>>> + mov x3, 0
>>> + mov x4, 0
>>> + mov x8, #SYS_ify(prctl)
>>> + svc 0x0
>>
>> Should it handle errors, for the case prctl is filtered or blocked?
>
> There is no need to handle errors here. If this syscall doesn't succeed, it
> would be the same as if we didn't make it at all: GCS will remain disabled.
>
> Having a check for error here would be an unnecessary overhead.
But this is aaik not how other security hardening work, like BTI where
_dl_bti_check will _dl_fatal_printf if bti can not be enabled. Without a
check, GCS is taking as a hint, not as enforcement
On Wed, Jan 08, 2025 at 11:04:48AM -0300, Adhemerval Zanella Netto wrote:
>
>
> On 08/01/25 10:52, Yury Khrustalev wrote:
> > On Tue, Jan 07, 2025 at 01:50:41PM -0300, Adhemerval Zanella Netto wrote:
> >>
> >>> + /* Enable GCS before user code runs. Note that IFUNC resolvers and
> >>> + LD_AUDIT hooks may run before, but should not create threads. */
> >>> +#define PR_SET_SHADOW_STACK_STATUS 75
> >>> +#define PR_SHADOW_STACK_ENABLE (1UL << 0)
> >>> + mov x0, PR_SET_SHADOW_STACK_STATUS
> >>> + mov x1, PR_SHADOW_STACK_ENABLE
> >>> + mov x2, 0
> >>> + mov x3, 0
> >>> + mov x4, 0
> >>> + mov x8, #SYS_ify(prctl)
> >>> + svc 0x0
> >>
> >> Should it handle errors, for the case prctl is filtered or blocked?
> >
> > There is no need to handle errors here. If this syscall doesn't succeed, it
> > would be the same as if we didn't make it at all: GCS will remain disabled.
> >
> > Having a check for error here would be an unnecessary overhead.
>
> But this is aaik not how other security hardening work, like BTI where
> _dl_bti_check will _dl_fatal_printf if bti can not be enabled. Without a
> check, GCS is taking as a hint, not as enforcement
The _dl_bti_check and _dl_gcs_check functions do something different: they check
consistency of markings in the binaries and dependencies. Here we are trying to
enable GCS support (in case of BTI there is no need for this as this is handled
by kernel).
We could check the value returned from syscall and call a function that would
print error message and exit using _dl_fatal_printf.
I suppose the same should be done for static executables? We'd need to check
return value of the prctl syscall in aarch64_libc_setup_tls. Is it OK to call
_dl_fatal_printf in aarch64_libc_setup_tls?
Thanks,
Yury
On 08/01/25 13:54, Yury Khrustalev wrote:
> On Wed, Jan 08, 2025 at 11:04:48AM -0300, Adhemerval Zanella Netto wrote:
>>
>>
>> On 08/01/25 10:52, Yury Khrustalev wrote:
>>> On Tue, Jan 07, 2025 at 01:50:41PM -0300, Adhemerval Zanella Netto wrote:
>>>>
>>>>> + /* Enable GCS before user code runs. Note that IFUNC resolvers and
>>>>> + LD_AUDIT hooks may run before, but should not create threads. */
>>>>> +#define PR_SET_SHADOW_STACK_STATUS 75
>>>>> +#define PR_SHADOW_STACK_ENABLE (1UL << 0)
>>>>> + mov x0, PR_SET_SHADOW_STACK_STATUS
>>>>> + mov x1, PR_SHADOW_STACK_ENABLE
>>>>> + mov x2, 0
>>>>> + mov x3, 0
>>>>> + mov x4, 0
>>>>> + mov x8, #SYS_ify(prctl)
>>>>> + svc 0x0
>>>>
>>>> Should it handle errors, for the case prctl is filtered or blocked?
>>>
>>> There is no need to handle errors here. If this syscall doesn't succeed, it
>>> would be the same as if we didn't make it at all: GCS will remain disabled.
>>>
>>> Having a check for error here would be an unnecessary overhead.
>>
>> But this is aaik not how other security hardening work, like BTI where
>> _dl_bti_check will _dl_fatal_printf if bti can not be enabled. Without a
>> check, GCS is taking as a hint, not as enforcement
>
> The _dl_bti_check and _dl_gcs_check functions do something different: they check
> consistency of markings in the binaries and dependencies. Here we are trying to
> enable GCS support (in case of BTI there is no need for this as this is handled
> by kernel).
Yes, my point is kernel enforces BTI/PAC during process execution; while for GCS
this should be enforced by the program loader. I am not sure if this is really
a security issue, since to force a prctl failure here would require a syscall
interception; but it fells strange that a misconfigured environment (such as
old container runtime or alike) can disable GCS usage.
>
> We could check the value returned from syscall and call a function that would
> print error message and exit using _dl_fatal_printf.
>
> I suppose the same should be done for static executables? We'd need to check
> return value of the prctl syscall in aarch64_libc_setup_tls. Is it OK to call
> _dl_fatal_printf in aarch64_libc_setup_tls?
I think it should be, at least with a rapid test by issuing _dl_fatal_printf
unconditionally, static tests does print the message.
>
> Thanks,
> Yury
>
@@ -35,7 +35,9 @@ endif
ifeq ($(subdir),elf)
sysdep-rtld-routines += dl-start
sysdep-dl-routines += tlsdesc dl-tlsdesc
-gen-as-const-headers += dl-link.sym
+gen-as-const-headers += \
+ dl-link.sym \
+ rtld-global-offsets.sym
tests-internal += tst-ifunc-arg-1 tst-ifunc-arg-2
@@ -18,6 +18,7 @@
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
+#include <rtld-global-offsets.h>
ENTRY (_start)
/* Create an initial frame with 0 LR and FP */
@@ -25,11 +26,32 @@ ENTRY (_start)
mov x29, #0
mov x30, #0
+ /* Load and relocate all library dependencies. */
mov x0, sp
PTR_ARG (0)
bl _dl_start
/* Returns user entry point in x0. */
mov PTR_REG (21), PTR_REG (0)
+
+ /* Use GL(dl_aarch64_gcs) to set the shadow stack status. */
+ adrp x16, _rtld_local
+ add PTR_REG (16), PTR_REG (16), :lo12:_rtld_local
+ ldr x1, [x16, GL_DL_AARCH64_GCS_OFFSET]
+ cbz x1, L(skip_gcs_enable)
+
+ /* Enable GCS before user code runs. Note that IFUNC resolvers and
+ LD_AUDIT hooks may run before, but should not create threads. */
+#define PR_SET_SHADOW_STACK_STATUS 75
+#define PR_SHADOW_STACK_ENABLE (1UL << 0)
+ mov x0, PR_SET_SHADOW_STACK_STATUS
+ mov x1, PR_SHADOW_STACK_ENABLE
+ mov x2, 0
+ mov x3, 0
+ mov x4, 0
+ mov x8, #SYS_ify(prctl)
+ svc 0x0
+L(skip_gcs_enable):
+
.globl _dl_start_user
.type _dl_start_user, %function
_dl_start_user:
@@ -40,8 +62,7 @@ _dl_start_user:
/* Compute envp. */
add PTR_REG (3), PTR_REG (2), PTR_REG (1), lsl PTR_LOG_SIZE
add PTR_REG (3), PTR_REG (3), PTR_SIZE
- adrp x16, _rtld_local
- add PTR_REG (16), PTR_REG (16), :lo12:_rtld_local
+ /* Run the init functions of the loaded modules. */
ldr PTR_REG (0), [x16]
bl _dl_init
/* Load the finalizer function. */
@@ -3,8 +3,13 @@
#include <ldsodefs.h>
#define GLRO_offsetof(name) offsetof (struct rtld_global_ro, _##name)
+#define GL_offsetof(name) offsetof (struct rtld_global, _##name)
-- Offsets of _rtld_global_ro in libc.so
GLRO_DL_HWCAP_OFFSET GLRO_offsetof (dl_hwcap)
GLRO_DL_HWCAP2_OFFSET GLRO_offsetof (dl_hwcap2)
+
+-- Offsets of _rtld_global in libc.so
+
+GL_DL_AARCH64_GCS_OFFSET GL_offsetof (dl_aarch64_gcs)