[18/22] aarch64: libitm: Add GCS support
Checks
Context |
Check |
Description |
linaro-tcwg-bot/tcwg_gcc_build--master-arm |
success
|
Build passed
|
Commit Message
From: Szabolcs Nagy <szabolcs.nagy@arm.com>
Transaction begin and abort use setjmp/longjmp like operations that
need to be updated for GCS compatibility. We use similar logic to
libc setjmp/longjmp that support switching stack and thus switching
GCS (e.g. due to longjmp out of a makecontext stack), this is kept
even though it is likely not required for transaction aborts.
The gtm_jmpbuf is internal to libitm so we can change its layout
without breaking ABI.
libitm/ChangeLog:
* config/aarch64/sjlj.S: Add GCS support and mark GCS compatible.
* config/aarch64/target.h: Add gcs field to gtm_jmpbuf.
---
libitm/config/aarch64/sjlj.S | 60 ++++++++++++++++++++++++++++++++--
libitm/config/aarch64/target.h | 1 +
2 files changed, 58 insertions(+), 3 deletions(-)
Comments
Yury Khrustalev <yury.khrustalev@arm.com> writes:
> From: Szabolcs Nagy <szabolcs.nagy@arm.com>
>
> Transaction begin and abort use setjmp/longjmp like operations that
> need to be updated for GCS compatibility. We use similar logic to
> libc setjmp/longjmp that support switching stack and thus switching
> GCS (e.g. due to longjmp out of a makecontext stack), this is kept
> even though it is likely not required for transaction aborts.
>
> The gtm_jmpbuf is internal to libitm so we can change its layout
> without breaking ABI.
>
> libitm/ChangeLog:
>
> * config/aarch64/sjlj.S: Add GCS support and mark GCS compatible.
> * config/aarch64/target.h: Add gcs field to gtm_jmpbuf.
> ---
> libitm/config/aarch64/sjlj.S | 60 ++++++++++++++++++++++++++++++++--
> libitm/config/aarch64/target.h | 1 +
> 2 files changed, 58 insertions(+), 3 deletions(-)
>
> diff --git a/libitm/config/aarch64/sjlj.S b/libitm/config/aarch64/sjlj.S
> index aeffd4d1070..cf1d8af2c96 100644
> --- a/libitm/config/aarch64/sjlj.S
> +++ b/libitm/config/aarch64/sjlj.S
> @@ -29,6 +29,13 @@
> #define AUTIASP hint 29
> #define PACIBSP hint 27
> #define AUTIBSP hint 31
> +#define CHKFEAT_X16 hint 40
> +#define MRS_GCSPR(x) mrs x, s3_3_c2_c5_1
> +#define GCSPOPM(x) sysl x, #3, c7, c7, #1
> +#define GCSSS1(x) sys #3, c7, c7, #2, x
> +#define GCSSS2(x) sysl x, #3, c7, c7, #3
> +
> +#define L(name) .L##name
>
> #if defined(HAVE_AS_CFI_PSEUDO_OP) && defined(__GCC_HAVE_DWARF2_CFI_ASM)
> # define cfi_negate_ra_state .cfi_negate_ra_state
> @@ -80,7 +87,16 @@ _ITM_beginTransaction:
> stp d10, d11, [sp, 7*16]
> stp d12, d13, [sp, 8*16]
> stp d14, d15, [sp, 9*16]
> - str x1, [sp, 10*16]
> +
> + /* GCS support. */
> + mov x2, 0
> + mov x16, 1
> + CHKFEAT_X16
> + tbnz x16, 0, L(gcs_done_sj)
> + MRS_GCSPR (x2)
> + add x2, x2, 8 /* GCS after _ITM_beginTransaction returns. */
> +L(gcs_done_sj):
> + stp x2, x1, [sp, 10*16]
>
> /* Invoke GTM_begin_transaction with the struct we just built. */
> mov x1, sp
> @@ -117,7 +133,38 @@ GTM_longjmp:
> ldp d10, d11, [x1, 7*16]
> ldp d12, d13, [x1, 8*16]
> ldp d14, d15, [x1, 9*16]
> +
> + /* GCS support. */
> + mov x16, 1
> + CHKFEAT_X16
> + tbnz x16, 0, L(gcs_done_lj)
> + MRS_GCSPR (x7)
> ldr x3, [x1, 10*16]
> + mov x4, x3
> + /* x7: GCSPR now. x3, x4: target GCSPR. x5, x6: tmp regs. */
> +L(gcs_scan):
> + cmp x7, x4
> + b.eq L(gcs_pop)
> + sub x4, x4, 8
> + /* Check for a cap token. */
> + ldr x5, [x4]
> + and x6, x4, 0xfffffffffffff000
> + orr x6, x6, 1
> + cmp x5, x6
> + b.ne L(gcs_scan)
> +L(gcs_switch):
> + add x7, x4, 8
> + GCSSS1 (x4)
> + GCSSS2 (xzr)
Don't we still need to pop from the current stack up to the switch point,
in case something further up the call frame wants to switch back to it?
If so, don't we also need to handle multiple switches, and similarly
pop from intermediate stacks?
E.g. if we have stacks S1-S3 and functions f1-f4, and a call stack:
f1 initially uses S1, switches to S2, calls f2
f2 initially uses S2, switches to S3, calls f3
f3 initially uses S3, switches to S1, calls f4
f4 initially uses S1, triggers a longjmp to f2
then wouldn't the longjmp need unwind S1 to the switch point;
unwind S3 through f3's entry to the switch point; and then unwind
S2 in the way that the routine currently does? Or is that kind of
situation not supported?
Thanks,
Richard
> +L(gcs_pop):
> + cmp x7, x3
> + b.eq L(gcs_done_lj)
> + GCSPOPM (xzr)
> + add x7, x7, 8
> + b L(gcs_pop)
> +L(gcs_done_lj):
> +
> + ldr x3, [x1, 10*16 + 8]
> ldp x29, x30, [x1]
> cfi_def_cfa(x1, 0)
> CFI_PAC_TOGGLE
> @@ -132,6 +179,7 @@ GTM_longjmp:
> #define FEATURE_1_AND 0xc0000000
> #define FEATURE_1_BTI 1
> #define FEATURE_1_PAC 2
> +#define FEATURE_1_GCS 4
>
> /* Supported features based on the code generation options. */
> #if defined(__ARM_FEATURE_BTI_DEFAULT)
> @@ -146,6 +194,12 @@ GTM_longjmp:
> # define PAC_FLAG 0
> #endif
>
> +#if __ARM_FEATURE_GCS_DEFAULT
> +# define GCS_FLAG FEATURE_1_GCS
> +#else
> +# define GCS_FLAG 0
> +#endif
> +
> /* Add a NT_GNU_PROPERTY_TYPE_0 note. */
> #define GNU_PROPERTY(type, value) \
> .section .note.gnu.property, "a"; \
> @@ -163,7 +217,7 @@ GTM_longjmp:
> .section .note.GNU-stack, "", %progbits
>
> /* Add GNU property note if built with branch protection. */
> -# if (BTI_FLAG|PAC_FLAG) != 0
> -GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG)
> +# if (BTI_FLAG|PAC_FLAG|GCS_FLAG) != 0
> +GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG|GCS_FLAG)
> # endif
> #endif
> diff --git a/libitm/config/aarch64/target.h b/libitm/config/aarch64/target.h
> index 3d99197bfab..a1f39b4bf7a 100644
> --- a/libitm/config/aarch64/target.h
> +++ b/libitm/config/aarch64/target.h
> @@ -30,6 +30,7 @@ typedef struct gtm_jmpbuf
> unsigned long long pc; /* x30 */
> unsigned long long gr[10]; /* x19-x28 */
> unsigned long long vr[8]; /* d8-d15 */
> + void *gcs; /* GCSPR_EL0 */
> void *cfa;
> } gtm_jmpbuf;
On Thu, Oct 24, 2024 at 05:53:45PM +0100, Richard Sandiford wrote:
> Yury Khrustalev <yury.khrustalev@arm.com> writes:
> > From: Szabolcs Nagy <szabolcs.nagy@arm.com>
> >
> Don't we still need to pop from the current stack up to the switch point,
> in case something further up the call frame wants to switch back to it?
>
> If so, don't we also need to handle multiple switches, and similarly
> pop from intermediate stacks?
>
> E.g. if we have stacks S1-S3 and functions f1-f4, and a call stack:
>
> f1 initially uses S1, switches to S2, calls f2
> f2 initially uses S2, switches to S3, calls f3
> f3 initially uses S3, switches to S1, calls f4
> f4 initially uses S1, triggers a longjmp to f2
>
> then wouldn't the longjmp need unwind S1 to the switch point;
> unwind S3 through f3's entry to the switch point; and then unwind
> S2 in the way that the routine currently does? Or is that kind of
> situation not supported?
This is a valid question and it might require some time to find a good
answer and change implementation accordingly. In the v2 of this patch
series [1] I have removed the addition of the GCS support in libitm.
When configured and built with branch protection that implies GCS (e.g
"standard") libitm will be linked without GCS marker.
Thanks,
Yury
[1] https://inbox.sourceware.org/gcc-patches/20241031132323.948159-1-yury.khrustalev@arm.com/
@@ -29,6 +29,13 @@
#define AUTIASP hint 29
#define PACIBSP hint 27
#define AUTIBSP hint 31
+#define CHKFEAT_X16 hint 40
+#define MRS_GCSPR(x) mrs x, s3_3_c2_c5_1
+#define GCSPOPM(x) sysl x, #3, c7, c7, #1
+#define GCSSS1(x) sys #3, c7, c7, #2, x
+#define GCSSS2(x) sysl x, #3, c7, c7, #3
+
+#define L(name) .L##name
#if defined(HAVE_AS_CFI_PSEUDO_OP) && defined(__GCC_HAVE_DWARF2_CFI_ASM)
# define cfi_negate_ra_state .cfi_negate_ra_state
@@ -80,7 +87,16 @@ _ITM_beginTransaction:
stp d10, d11, [sp, 7*16]
stp d12, d13, [sp, 8*16]
stp d14, d15, [sp, 9*16]
- str x1, [sp, 10*16]
+
+ /* GCS support. */
+ mov x2, 0
+ mov x16, 1
+ CHKFEAT_X16
+ tbnz x16, 0, L(gcs_done_sj)
+ MRS_GCSPR (x2)
+ add x2, x2, 8 /* GCS after _ITM_beginTransaction returns. */
+L(gcs_done_sj):
+ stp x2, x1, [sp, 10*16]
/* Invoke GTM_begin_transaction with the struct we just built. */
mov x1, sp
@@ -117,7 +133,38 @@ GTM_longjmp:
ldp d10, d11, [x1, 7*16]
ldp d12, d13, [x1, 8*16]
ldp d14, d15, [x1, 9*16]
+
+ /* GCS support. */
+ mov x16, 1
+ CHKFEAT_X16
+ tbnz x16, 0, L(gcs_done_lj)
+ MRS_GCSPR (x7)
ldr x3, [x1, 10*16]
+ mov x4, x3
+ /* x7: GCSPR now. x3, x4: target GCSPR. x5, x6: tmp regs. */
+L(gcs_scan):
+ cmp x7, x4
+ b.eq L(gcs_pop)
+ sub x4, x4, 8
+ /* Check for a cap token. */
+ ldr x5, [x4]
+ and x6, x4, 0xfffffffffffff000
+ orr x6, x6, 1
+ cmp x5, x6
+ b.ne L(gcs_scan)
+L(gcs_switch):
+ add x7, x4, 8
+ GCSSS1 (x4)
+ GCSSS2 (xzr)
+L(gcs_pop):
+ cmp x7, x3
+ b.eq L(gcs_done_lj)
+ GCSPOPM (xzr)
+ add x7, x7, 8
+ b L(gcs_pop)
+L(gcs_done_lj):
+
+ ldr x3, [x1, 10*16 + 8]
ldp x29, x30, [x1]
cfi_def_cfa(x1, 0)
CFI_PAC_TOGGLE
@@ -132,6 +179,7 @@ GTM_longjmp:
#define FEATURE_1_AND 0xc0000000
#define FEATURE_1_BTI 1
#define FEATURE_1_PAC 2
+#define FEATURE_1_GCS 4
/* Supported features based on the code generation options. */
#if defined(__ARM_FEATURE_BTI_DEFAULT)
@@ -146,6 +194,12 @@ GTM_longjmp:
# define PAC_FLAG 0
#endif
+#if __ARM_FEATURE_GCS_DEFAULT
+# define GCS_FLAG FEATURE_1_GCS
+#else
+# define GCS_FLAG 0
+#endif
+
/* Add a NT_GNU_PROPERTY_TYPE_0 note. */
#define GNU_PROPERTY(type, value) \
.section .note.gnu.property, "a"; \
@@ -163,7 +217,7 @@ GTM_longjmp:
.section .note.GNU-stack, "", %progbits
/* Add GNU property note if built with branch protection. */
-# if (BTI_FLAG|PAC_FLAG) != 0
-GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG)
+# if (BTI_FLAG|PAC_FLAG|GCS_FLAG) != 0
+GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG|GCS_FLAG)
# endif
#endif
@@ -30,6 +30,7 @@ typedef struct gtm_jmpbuf
unsigned long long pc; /* x30 */
unsigned long long gr[10]; /* x19-x28 */
unsigned long long vr[8]; /* d8-d15 */
+ void *gcs; /* GCSPR_EL0 */
void *cfa;
} gtm_jmpbuf;