[18/22] aarch64: libitm: Add GCS support

Message ID 20241023110528.487830-19-yury.khrustalev@arm.com
State New
Headers
Series aarch64: Add support for Guarded Control Stack extension |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Build passed

Commit Message

Yury Khrustalev Oct. 23, 2024, 11:05 a.m. UTC
  From: Szabolcs Nagy <szabolcs.nagy@arm.com>

Transaction begin and abort use setjmp/longjmp like operations that
need to be updated for GCS compatibility. We use similar logic to
libc setjmp/longjmp that support switching stack and thus switching
GCS (e.g. due to longjmp out of a makecontext stack), this is kept
even though it is likely not required for transaction aborts.

The gtm_jmpbuf is internal to libitm so we can change its layout
without breaking ABI.

libitm/ChangeLog:

	* config/aarch64/sjlj.S: Add GCS support and mark GCS compatible.
	* config/aarch64/target.h: Add gcs field to gtm_jmpbuf.
---
 libitm/config/aarch64/sjlj.S   | 60 ++++++++++++++++++++++++++++++++--
 libitm/config/aarch64/target.h |  1 +
 2 files changed, 58 insertions(+), 3 deletions(-)
  

Comments

Richard Sandiford Oct. 24, 2024, 4:53 p.m. UTC | #1
Yury Khrustalev <yury.khrustalev@arm.com> writes:
> From: Szabolcs Nagy <szabolcs.nagy@arm.com>
>
> Transaction begin and abort use setjmp/longjmp like operations that
> need to be updated for GCS compatibility. We use similar logic to
> libc setjmp/longjmp that support switching stack and thus switching
> GCS (e.g. due to longjmp out of a makecontext stack), this is kept
> even though it is likely not required for transaction aborts.
>
> The gtm_jmpbuf is internal to libitm so we can change its layout
> without breaking ABI.
>
> libitm/ChangeLog:
>
> 	* config/aarch64/sjlj.S: Add GCS support and mark GCS compatible.
> 	* config/aarch64/target.h: Add gcs field to gtm_jmpbuf.
> ---
>  libitm/config/aarch64/sjlj.S   | 60 ++++++++++++++++++++++++++++++++--
>  libitm/config/aarch64/target.h |  1 +
>  2 files changed, 58 insertions(+), 3 deletions(-)
>
> diff --git a/libitm/config/aarch64/sjlj.S b/libitm/config/aarch64/sjlj.S
> index aeffd4d1070..cf1d8af2c96 100644
> --- a/libitm/config/aarch64/sjlj.S
> +++ b/libitm/config/aarch64/sjlj.S
> @@ -29,6 +29,13 @@
>  #define AUTIASP	hint	29
>  #define PACIBSP	hint	27
>  #define AUTIBSP	hint	31
> +#define CHKFEAT_X16	hint	40
> +#define MRS_GCSPR(x)	mrs	x, s3_3_c2_c5_1
> +#define GCSPOPM(x)	sysl	x, #3, c7, c7, #1
> +#define GCSSS1(x)	sys	#3, c7, c7, #2, x
> +#define GCSSS2(x)	sysl	x, #3, c7, c7, #3
> +
> +#define L(name) .L##name
>  
>  #if defined(HAVE_AS_CFI_PSEUDO_OP) && defined(__GCC_HAVE_DWARF2_CFI_ASM)
>  # define cfi_negate_ra_state .cfi_negate_ra_state
> @@ -80,7 +87,16 @@ _ITM_beginTransaction:
>  	stp	d10, d11, [sp, 7*16]
>  	stp	d12, d13, [sp, 8*16]
>  	stp	d14, d15, [sp, 9*16]
> -	str	x1, [sp, 10*16]
> +
> +	/* GCS support.  */
> +	mov	x2, 0
> +	mov	x16, 1
> +	CHKFEAT_X16
> +	tbnz	x16, 0, L(gcs_done_sj)
> +	MRS_GCSPR (x2)
> +	add	x2, x2, 8 /* GCS after _ITM_beginTransaction returns.  */
> +L(gcs_done_sj):
> +	stp	x2, x1, [sp, 10*16]
>  
>  	/* Invoke GTM_begin_transaction with the struct we just built.  */
>  	mov	x1, sp
> @@ -117,7 +133,38 @@ GTM_longjmp:
>  	ldp	d10, d11, [x1, 7*16]
>  	ldp	d12, d13, [x1, 8*16]
>  	ldp	d14, d15, [x1, 9*16]
> +
> +	/* GCS support.  */
> +	mov	x16, 1
> +	CHKFEAT_X16
> +	tbnz	x16, 0, L(gcs_done_lj)
> +	MRS_GCSPR (x7)
>  	ldr	x3, [x1, 10*16]
> +	mov	x4, x3
> +	/* x7: GCSPR now.  x3, x4: target GCSPR.  x5, x6: tmp regs.  */
> +L(gcs_scan):
> +	cmp	x7, x4
> +	b.eq	L(gcs_pop)
> +	sub	x4, x4, 8
> +	/* Check for a cap token.  */
> +	ldr	x5, [x4]
> +	and	x6, x4, 0xfffffffffffff000
> +	orr	x6, x6, 1
> +	cmp	x5, x6
> +	b.ne	L(gcs_scan)
> +L(gcs_switch):
> +	add	x7, x4, 8
> +	GCSSS1 (x4)
> +	GCSSS2 (xzr)

Don't we still need to pop from the current stack up to the switch point,
in case something further up the call frame wants to switch back to it?

If so, don't we also need to handle multiple switches, and similarly
pop from intermediate stacks?

E.g. if we have stacks S1-S3 and functions f1-f4, and a call stack:

  f1 initially uses S1, switches to S2, calls f2
  f2 initially uses S2, switches to S3, calls f3
  f3 initially uses S3, switches to S1, calls f4
  f4 initially uses S1, triggers a longjmp to f2

then wouldn't the longjmp need unwind S1 to the switch point;
unwind S3 through f3's entry to the switch point; and then unwind
S2 in the way that the routine currently does?  Or is that kind of
situation not supported?

Thanks,
Richard

> +L(gcs_pop):
> +	cmp	x7, x3
> +	b.eq	L(gcs_done_lj)
> +	GCSPOPM (xzr)
> +	add	x7, x7, 8
> +	b	L(gcs_pop)
> +L(gcs_done_lj):
> +
> +	ldr	x3, [x1, 10*16 + 8]
>  	ldp	x29, x30, [x1]
>  	cfi_def_cfa(x1, 0)
>  	CFI_PAC_TOGGLE
> @@ -132,6 +179,7 @@ GTM_longjmp:
>  #define FEATURE_1_AND 0xc0000000
>  #define FEATURE_1_BTI 1
>  #define FEATURE_1_PAC 2
> +#define FEATURE_1_GCS 4
>  
>  /* Supported features based on the code generation options.  */
>  #if defined(__ARM_FEATURE_BTI_DEFAULT)
> @@ -146,6 +194,12 @@ GTM_longjmp:
>  # define PAC_FLAG 0
>  #endif
>  
> +#if __ARM_FEATURE_GCS_DEFAULT
> +# define GCS_FLAG FEATURE_1_GCS
> +#else
> +# define GCS_FLAG 0
> +#endif
> +
>  /* Add a NT_GNU_PROPERTY_TYPE_0 note.  */
>  #define GNU_PROPERTY(type, value)	\
>    .section .note.gnu.property, "a";	\
> @@ -163,7 +217,7 @@ GTM_longjmp:
>  .section .note.GNU-stack, "", %progbits
>  
>  /* Add GNU property note if built with branch protection.  */
> -# if (BTI_FLAG|PAC_FLAG) != 0
> -GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG)
> +# if (BTI_FLAG|PAC_FLAG|GCS_FLAG) != 0
> +GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG|GCS_FLAG)
>  # endif
>  #endif
> diff --git a/libitm/config/aarch64/target.h b/libitm/config/aarch64/target.h
> index 3d99197bfab..a1f39b4bf7a 100644
> --- a/libitm/config/aarch64/target.h
> +++ b/libitm/config/aarch64/target.h
> @@ -30,6 +30,7 @@ typedef struct gtm_jmpbuf
>    unsigned long long pc;	/* x30 */
>    unsigned long long gr[10];	/* x19-x28 */
>    unsigned long long vr[8];	/* d8-d15 */
> +  void *gcs;			/* GCSPR_EL0 */
>    void *cfa;
>  } gtm_jmpbuf;
  
Yury Khrustalev Oct. 31, 2024, 1:42 p.m. UTC | #2
On Thu, Oct 24, 2024 at 05:53:45PM +0100, Richard Sandiford wrote:
> Yury Khrustalev <yury.khrustalev@arm.com> writes:
> > From: Szabolcs Nagy <szabolcs.nagy@arm.com>
> >
> Don't we still need to pop from the current stack up to the switch point,
> in case something further up the call frame wants to switch back to it?
> 
> If so, don't we also need to handle multiple switches, and similarly
> pop from intermediate stacks?
> 
> E.g. if we have stacks S1-S3 and functions f1-f4, and a call stack:
> 
>   f1 initially uses S1, switches to S2, calls f2
>   f2 initially uses S2, switches to S3, calls f3
>   f3 initially uses S3, switches to S1, calls f4
>   f4 initially uses S1, triggers a longjmp to f2
> 
> then wouldn't the longjmp need unwind S1 to the switch point;
> unwind S3 through f3's entry to the switch point; and then unwind
> S2 in the way that the routine currently does?  Or is that kind of
> situation not supported?
 
This is a valid question and it might require some time to find a good
answer and change implementation accordingly. In the v2 of this patch
series [1] I have removed the addition of the GCS support in libitm.

When configured and built with branch protection that implies GCS (e.g
"standard") libitm will be linked without GCS marker.

Thanks,
Yury

[1] https://inbox.sourceware.org/gcc-patches/20241031132323.948159-1-yury.khrustalev@arm.com/
  

Patch

diff --git a/libitm/config/aarch64/sjlj.S b/libitm/config/aarch64/sjlj.S
index aeffd4d1070..cf1d8af2c96 100644
--- a/libitm/config/aarch64/sjlj.S
+++ b/libitm/config/aarch64/sjlj.S
@@ -29,6 +29,13 @@ 
 #define AUTIASP	hint	29
 #define PACIBSP	hint	27
 #define AUTIBSP	hint	31
+#define CHKFEAT_X16	hint	40
+#define MRS_GCSPR(x)	mrs	x, s3_3_c2_c5_1
+#define GCSPOPM(x)	sysl	x, #3, c7, c7, #1
+#define GCSSS1(x)	sys	#3, c7, c7, #2, x
+#define GCSSS2(x)	sysl	x, #3, c7, c7, #3
+
+#define L(name) .L##name
 
 #if defined(HAVE_AS_CFI_PSEUDO_OP) && defined(__GCC_HAVE_DWARF2_CFI_ASM)
 # define cfi_negate_ra_state .cfi_negate_ra_state
@@ -80,7 +87,16 @@  _ITM_beginTransaction:
 	stp	d10, d11, [sp, 7*16]
 	stp	d12, d13, [sp, 8*16]
 	stp	d14, d15, [sp, 9*16]
-	str	x1, [sp, 10*16]
+
+	/* GCS support.  */
+	mov	x2, 0
+	mov	x16, 1
+	CHKFEAT_X16
+	tbnz	x16, 0, L(gcs_done_sj)
+	MRS_GCSPR (x2)
+	add	x2, x2, 8 /* GCS after _ITM_beginTransaction returns.  */
+L(gcs_done_sj):
+	stp	x2, x1, [sp, 10*16]
 
 	/* Invoke GTM_begin_transaction with the struct we just built.  */
 	mov	x1, sp
@@ -117,7 +133,38 @@  GTM_longjmp:
 	ldp	d10, d11, [x1, 7*16]
 	ldp	d12, d13, [x1, 8*16]
 	ldp	d14, d15, [x1, 9*16]
+
+	/* GCS support.  */
+	mov	x16, 1
+	CHKFEAT_X16
+	tbnz	x16, 0, L(gcs_done_lj)
+	MRS_GCSPR (x7)
 	ldr	x3, [x1, 10*16]
+	mov	x4, x3
+	/* x7: GCSPR now.  x3, x4: target GCSPR.  x5, x6: tmp regs.  */
+L(gcs_scan):
+	cmp	x7, x4
+	b.eq	L(gcs_pop)
+	sub	x4, x4, 8
+	/* Check for a cap token.  */
+	ldr	x5, [x4]
+	and	x6, x4, 0xfffffffffffff000
+	orr	x6, x6, 1
+	cmp	x5, x6
+	b.ne	L(gcs_scan)
+L(gcs_switch):
+	add	x7, x4, 8
+	GCSSS1 (x4)
+	GCSSS2 (xzr)
+L(gcs_pop):
+	cmp	x7, x3
+	b.eq	L(gcs_done_lj)
+	GCSPOPM (xzr)
+	add	x7, x7, 8
+	b	L(gcs_pop)
+L(gcs_done_lj):
+
+	ldr	x3, [x1, 10*16 + 8]
 	ldp	x29, x30, [x1]
 	cfi_def_cfa(x1, 0)
 	CFI_PAC_TOGGLE
@@ -132,6 +179,7 @@  GTM_longjmp:
 #define FEATURE_1_AND 0xc0000000
 #define FEATURE_1_BTI 1
 #define FEATURE_1_PAC 2
+#define FEATURE_1_GCS 4
 
 /* Supported features based on the code generation options.  */
 #if defined(__ARM_FEATURE_BTI_DEFAULT)
@@ -146,6 +194,12 @@  GTM_longjmp:
 # define PAC_FLAG 0
 #endif
 
+#if __ARM_FEATURE_GCS_DEFAULT
+# define GCS_FLAG FEATURE_1_GCS
+#else
+# define GCS_FLAG 0
+#endif
+
 /* Add a NT_GNU_PROPERTY_TYPE_0 note.  */
 #define GNU_PROPERTY(type, value)	\
   .section .note.gnu.property, "a";	\
@@ -163,7 +217,7 @@  GTM_longjmp:
 .section .note.GNU-stack, "", %progbits
 
 /* Add GNU property note if built with branch protection.  */
-# if (BTI_FLAG|PAC_FLAG) != 0
-GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG)
+# if (BTI_FLAG|PAC_FLAG|GCS_FLAG) != 0
+GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG|GCS_FLAG)
 # endif
 #endif
diff --git a/libitm/config/aarch64/target.h b/libitm/config/aarch64/target.h
index 3d99197bfab..a1f39b4bf7a 100644
--- a/libitm/config/aarch64/target.h
+++ b/libitm/config/aarch64/target.h
@@ -30,6 +30,7 @@  typedef struct gtm_jmpbuf
   unsigned long long pc;	/* x30 */
   unsigned long long gr[10];	/* x19-x28 */
   unsigned long long vr[8];	/* d8-d15 */
+  void *gcs;			/* GCSPR_EL0 */
   void *cfa;
 } gtm_jmpbuf;