[4/6] aarch64: Improve sysdep-cancel.h
Commit Message
From: Richard Henderson <rth@redhat.com>
Use a constant frame size, rather than pushing/popping for every saved
register. Use stp, ldp, cbz. Share code with the _nocancel path.
* sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h (PSEUDO):
Use ENTRY for _nocancel entry point. Reuse pieces of _nocancel
entry point for implementing the cancel path. Simplify cancel
path frame setup. Use cbz instead of cmp+bne for singlethread path.
(DOCARGS_2, UNDOCARGS_2): Use stp/ldp.
(DOCARGS_4, UNDOCARGS_4, DOCARGS_6, UNDOCARGS_6): Likewise.
(SINGLE_THREAD_P) [ASM]: Take a register number in which to
return the result.
---
.../unix/sysv/linux/aarch64/nptl/sysdep-cancel.h | 186 +++++++--------------
1 file changed, 64 insertions(+), 122 deletions(-)
Comments
On 20 May 2014 21:56, Richard Henderson <rth@twiddle.net> wrote:
> From: Richard Henderson <rth@redhat.com>
>
> Use a constant frame size, rather than pushing/popping for every saved
> register. Use stp, ldp, cbz. Share code with the _nocancel path.
>
> * sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h (PSEUDO):
> Use ENTRY for _nocancel entry point. Reuse pieces of _nocancel
> entry point for implementing the cancel path. Simplify cancel
> path frame setup. Use cbz instead of cmp+bne for singlethread path.
> (DOCARGS_2, UNDOCARGS_2): Use stp/ldp.
> (DOCARGS_4, UNDOCARGS_4, DOCARGS_6, UNDOCARGS_6): Likewise.
> (SINGLE_THREAD_P) [ASM]: Take a register number in which to
> return the result.
> ---
> .../unix/sysv/linux/aarch64/nptl/sysdep-cancel.h | 186 +++++++--------------
> 1 file changed, 64 insertions(+), 122 deletions(-)
>
> diff --git a/sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h b/sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h
> index e3b4b56..5cf3fd5 100644
> --- a/sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h
> +++ b/sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h
> @@ -26,121 +26,66 @@
>
> # undef PSEUDO
> # define PSEUDO(name, syscall_name, args) \
> - .section ".text"; \
> - .type __##syscall_name##_nocancel,%function; \
> - .globl __##syscall_name##_nocancel; \
> - __##syscall_name##_nocancel: \
> - cfi_startproc; \
> - DO_CALL (syscall_name, args); \
> - cmn x0, 4095; \
> - b.cs .Lsyscall_error; \
> - PSEUDO_RET; \
> - cfi_endproc; \
> - .size __##syscall_name##_nocancel,.-__##syscall_name##_nocancel; \
> - ENTRY (name); \
> - SINGLE_THREAD_P; \
> - bne .Lpseudo_cancel; \
> - DO_CALL (syscall_name, 0); \
> - cmn x0, 4095; \
> - b.cs .Lsyscall_error; \
> - PSEUDO_RET; \
> - .Lpseudo_cancel: \
> - DOCARGS_##args; /* save syscall args etc. around CENABLE. */ \
> - CENABLE; \
> - mov x16, x0; /* put mask in safe place. */ \
> - UNDOCARGS_##args; /* restore syscall args. */ \
> - mov x8, SYS_ify (syscall_name); /* do the call. */ \
> - svc 0; \
> - str x0, [sp, -16]!; /* save syscall return value. */ \
> - cfi_adjust_cfa_offset (16); \
> - mov x0, x16; /* get mask back. */ \
> - CDISABLE; \
> - ldr x0, [sp], 16; \
> - cfi_adjust_cfa_offset (-16); \
> - ldr x30, [sp], 16; \
> - cfi_adjust_cfa_offset (-16); \
> - cfi_restore (x30); \
> - UNDOARGS_##args; \
> - cmn x0, 4095; \
> - b.cs .Lsyscall_error;
> -
> -# define DOCARGS_0 \
> - str x30, [sp, -16]!; \
> - cfi_adjust_cfa_offset (16); \
> - cfi_rel_offset (x30, 0)
> -
> + .section ".text"; \
> +ENTRY (__##syscall_name##_nocancel); \
> +.Lpseudo_nocancel: \
> + DO_CALL (syscall_name, args); \
> +.Lpseudo_ret: \
> + cmn x0, 4095; \
> + b.cs .Lsyscall_error; \
> + .subsection 2; \
> + .size __##syscall_name##_nocancel,.-__##syscall_name##_nocancel; \
> +ENTRY (name); \
> + SINGLE_THREAD_P(16); \
> + cbz w16, .Lpseudo_nocancel; \
> + /* Setup common stack frame no matter the number of args. */ \
> + stp x19, x30, [sp, -64]!; \
> + cfi_adjust_cfa_offset (64); \
> + cfi_rel_offset (x19, 0); \
> + cfi_rel_offset (x30, 8); \
> + DOCARGS_##args; /* save syscall args around CENABLE. */ \
> + CENABLE; \
> + mov x19, x0; /* save mask around syscall. */ \
> + UNDOCARGS_##args; /* restore syscall args. */ \
> + DO_CALL (syscall_name, args); \
> + str x0, [sp, 16]; /* save syscall return value. */ \
> + mov x0, x19; /* pass mask to CDISABLE. */ \
> + CDISABLE; \
> + ldr x0, [sp, 16]; \
> + ldp x19, x30, [sp], 64; \
> + cfi_adjust_cfa_offset (-64); \
> + cfi_restore (x19); \
> + cfi_restore (x30); \
> + b .Lpseudo_ret; \
> + cfi_endproc; \
> + .size name, .-name; \
> + .previous
> +
> +# undef PSEUDO_END
> +# define PSEUDO_END(name) \
> + SYSCALL_ERROR_HANDLER; \
> + cfi_endproc
> +
> +# define DOCARGS_0
> # define UNDOCARGS_0
>
> -# define DOCARGS_1 \
> - DOCARGS_0; \
> - str x0, [sp, -16]!; \
> - cfi_adjust_cfa_offset (16); \
> - cfi_rel_offset (x0, 0)
> -
> -# define UNDOCARGS_1 \
> - ldr x0, [sp], 16; \
> - cfi_restore (x0); \
> - cfi_adjust_cfa_offset (-16); \
> -
> -# define DOCARGS_2 \
> - DOCARGS_1; \
> - str x1, [sp, -16]!; \
> - cfi_adjust_cfa_offset (16); \
> - cfi_rel_offset (x1, 0)
> -
> -# define UNDOCARGS_2 \
> - ldr x1, [sp], 16; \
> - cfi_restore (x1); \
> - cfi_adjust_cfa_offset (-16); \
> - UNDOCARGS_1
> -
> -# define DOCARGS_3 \
> - DOCARGS_2; \
> - str x2, [sp, -16]!; \
> - cfi_adjust_cfa_offset (16); \
> - cfi_rel_offset (x2, 0)
> -
> -# define UNDOCARGS_3 \
> - ldr x2, [sp], 16; \
> - cfi_restore (x2); \
> - cfi_adjust_cfa_offset (-16); \
> - UNDOCARGS_2
> -
> -# define DOCARGS_4 \
> - DOCARGS_3; \
> - str x3, [sp, -16]!; \
> - cfi_adjust_cfa_offset (16); \
> - cfi_rel_offset (x3, 0)
> -
> -# define UNDOCARGS_4 \
> - ldr x3, [sp], 16; \
> - cfi_restore (x3); \
> - cfi_adjust_cfa_offset (-16); \
> - UNDOCARGS_3
> -
> -# define DOCARGS_5 \
> - DOCARGS_4; \
> - str x4, [sp, -16]!; \
> - cfi_adjust_cfa_offset (16); \
> - cfi_rel_offset (x4, 0)
> -
> -# define UNDOCARGS_5 \
> - ldr x4, [sp], 16; \
> - cfi_restore (x4); \
> - cfi_adjust_cfa_offset (-16); \
> - UNDOCARGS_4
> -
> -# define DOCARGS_6 \
> - DOCARGS_5; \
> - str x5, [sp, -16]!; \
> - cfi_adjust_cfa_offset (16); \
> - cfi_rel_offset (x5, 0)
> -
> -# define UNDOCARGS_6 \
> - ldr x5, [sp], 16; \
> - cfi_restore (x5); \
> - cfi_adjust_cfa_offset (-16); \
> - UNDOCARGS_5
> +# define DOCARGS_1 str x0, [sp, 16]
> +# define UNDOCARGS_1 ldr x0, [sp, 16]
> +
> +# define DOCARGS_2 stp x0, x1, [sp, 16]
> +# define UNDOCARGS_2 ldp x0, x1, [sp, 16]
> +
> +# define DOCARGS_3 DOCARGS_2; str x2, [sp, 32]
> +# define UNDOCARGS_3 UNDOCARGS_2; ldr x2, [sp, 32]
> +
> +# define DOCARGS_4 DOCARGS_2; stp x2, x3, [sp, 32]
> +# define UNDOCARGS_4 UNDOCARGS_2; ldp x2, x3, [sp, 32]
> +
> +# define DOCARGS_5 DOCARGS_4; str x4, [sp, 48]
> +# define UNDOCARGS_5 UNDOCARGS_4; ldr x4, [sp, 48]
> +
> +# define DOCARGS_6 DOCARGS_4; stp x4, x5, [sp, 48]
> +# define UNDOCARGS_6 UNDOCARGS_4; ldp x4, x5, [sp, 48]
>
> # ifdef IS_IN_libpthread
> # define CENABLE bl __pthread_enable_asynccancel
> @@ -162,10 +107,9 @@
> extern int __local_multiple_threads attribute_hidden;
> # define SINGLE_THREAD_P __builtin_expect (__local_multiple_threads == 0, 1)
> # else
> -# define SINGLE_THREAD_P \
> - adrp x16, __local_multiple_threads; \
> - ldr w16, [x16, #:lo12:__local_multiple_threads]; \
> - cmp w16, 0;
> +# define SINGLE_THREAD_P(R) \
> + adrp x##R, __local_multiple_threads; \
> + ldr w##R, [x##R, #:lo12:__local_multiple_threads]
> # endif
> # else
> /* There is no __local_multiple_threads for librt, so use the TCB. */
> @@ -174,20 +118,18 @@ extern int __local_multiple_threads attribute_hidden;
> __builtin_expect (THREAD_GETMEM (THREAD_SELF, \
> header.multiple_threads) == 0, 1)
> # else
> -# define SINGLE_THREAD_P \
> +# define SINGLE_THREAD_P(R) \
> stp x0, x30, [sp, -16]!; \
> cfi_adjust_cfa_offset (16); \
> cfi_rel_offset (x0, 0); \
> cfi_rel_offset (x30, 8); \
> bl __read_tp; \
> sub x0, x0, PTHREAD_SIZEOF; \
> - ldr w16, [x0, PTHREAD_MULTIPLE_THREADS_OFFSET]; \
> + ldr w##R, [x0, PTHREAD_MULTIPLE_THREADS_OFFSET]; \
> ldp x0, x30, [sp], 16; \
> cfi_restore (x0); \
> cfi_restore (x30); \
> - cfi_adjust_cfa_offset (-16); \
> - cmp w16, 0
> -# define SINGLE_THREAD_P_PIC(x) SINGLE_THREAD_P
This macro gets removed which I think probably deserves a mention in
the ChangeLog.
It looks like it can be removed from the ARM port too...
On 05/20/2014 01:56 PM, Richard Henderson wrote:
> - mov x16, x0; /* put mask in safe place. */ \
> - UNDOCARGS_##args; /* restore syscall args. */ \
> - mov x8, SYS_ify (syscall_name); /* do the call. */ \
> - svc 0; \
> - str x0, [sp, -16]!; /* save syscall return value. */ \
> - cfi_adjust_cfa_offset (16); \
> - mov x0, x16; /* get mask back. */ \
Oh, I should have mentioned, that there's either a bug or inconsistency here.
We're saving a value in x16 around the syscall. Except that on the C side, for
the inline syscalls, we mark x16 as clobbered.
My patch assumes that we want to assume "normal-ish" calling conventions for
the kernel and puts the value in the normal call saved register x19.
If there are in fact a set of calling-convention call-clobbered registers that
are not clobbered by syscalls, then we should consider adjusting the inline
syscalls to match.
r~
On 21 May 2014 16:10, Richard Henderson <rth@twiddle.net> wrote:
> On 05/20/2014 01:56 PM, Richard Henderson wrote:
>> - mov x16, x0; /* put mask in safe place. */ \
>> - UNDOCARGS_##args; /* restore syscall args. */ \
>> - mov x8, SYS_ify (syscall_name); /* do the call. */ \
>> - svc 0; \
>> - str x0, [sp, -16]!; /* save syscall return value. */ \
>> - cfi_adjust_cfa_offset (16); \
>> - mov x0, x16; /* get mask back. */ \
>
> Oh, I should have mentioned, that there's either a bug or inconsistency here.
>
> We're saving a value in x16 around the syscall. Except that on the C side, for
> the inline syscalls, we mark x16 as clobbered.
>
> My patch assumes that we want to assume "normal-ish" calling conventions for
> the kernel and puts the value in the normal call saved register x19.
>
> If there are in fact a set of calling-convention call-clobbered registers that
> are not clobbered by syscalls, then we should consider adjusting the inline
> syscalls to match.
When this was discussed previously it turns out that everything apart
from the return value is preserved:
https://sourceware.org/ml/libc-alpha/2014-03/msg00552.html
I guess it would be better to avoid clobbering so many registers in
the inline case.
@@ -26,121 +26,66 @@
# undef PSEUDO
# define PSEUDO(name, syscall_name, args) \
- .section ".text"; \
- .type __##syscall_name##_nocancel,%function; \
- .globl __##syscall_name##_nocancel; \
- __##syscall_name##_nocancel: \
- cfi_startproc; \
- DO_CALL (syscall_name, args); \
- cmn x0, 4095; \
- b.cs .Lsyscall_error; \
- PSEUDO_RET; \
- cfi_endproc; \
- .size __##syscall_name##_nocancel,.-__##syscall_name##_nocancel; \
- ENTRY (name); \
- SINGLE_THREAD_P; \
- bne .Lpseudo_cancel; \
- DO_CALL (syscall_name, 0); \
- cmn x0, 4095; \
- b.cs .Lsyscall_error; \
- PSEUDO_RET; \
- .Lpseudo_cancel: \
- DOCARGS_##args; /* save syscall args etc. around CENABLE. */ \
- CENABLE; \
- mov x16, x0; /* put mask in safe place. */ \
- UNDOCARGS_##args; /* restore syscall args. */ \
- mov x8, SYS_ify (syscall_name); /* do the call. */ \
- svc 0; \
- str x0, [sp, -16]!; /* save syscall return value. */ \
- cfi_adjust_cfa_offset (16); \
- mov x0, x16; /* get mask back. */ \
- CDISABLE; \
- ldr x0, [sp], 16; \
- cfi_adjust_cfa_offset (-16); \
- ldr x30, [sp], 16; \
- cfi_adjust_cfa_offset (-16); \
- cfi_restore (x30); \
- UNDOARGS_##args; \
- cmn x0, 4095; \
- b.cs .Lsyscall_error;
-
-# define DOCARGS_0 \
- str x30, [sp, -16]!; \
- cfi_adjust_cfa_offset (16); \
- cfi_rel_offset (x30, 0)
-
+ .section ".text"; \
+ENTRY (__##syscall_name##_nocancel); \
+.Lpseudo_nocancel: \
+ DO_CALL (syscall_name, args); \
+.Lpseudo_ret: \
+ cmn x0, 4095; \
+ b.cs .Lsyscall_error; \
+ .subsection 2; \
+ .size __##syscall_name##_nocancel,.-__##syscall_name##_nocancel; \
+ENTRY (name); \
+ SINGLE_THREAD_P(16); \
+ cbz w16, .Lpseudo_nocancel; \
+ /* Setup common stack frame no matter the number of args. */ \
+ stp x19, x30, [sp, -64]!; \
+ cfi_adjust_cfa_offset (64); \
+ cfi_rel_offset (x19, 0); \
+ cfi_rel_offset (x30, 8); \
+ DOCARGS_##args; /* save syscall args around CENABLE. */ \
+ CENABLE; \
+ mov x19, x0; /* save mask around syscall. */ \
+ UNDOCARGS_##args; /* restore syscall args. */ \
+ DO_CALL (syscall_name, args); \
+ str x0, [sp, 16]; /* save syscall return value. */ \
+ mov x0, x19; /* pass mask to CDISABLE. */ \
+ CDISABLE; \
+ ldr x0, [sp, 16]; \
+ ldp x19, x30, [sp], 64; \
+ cfi_adjust_cfa_offset (-64); \
+ cfi_restore (x19); \
+ cfi_restore (x30); \
+ b .Lpseudo_ret; \
+ cfi_endproc; \
+ .size name, .-name; \
+ .previous
+
+# undef PSEUDO_END
+# define PSEUDO_END(name) \
+ SYSCALL_ERROR_HANDLER; \
+ cfi_endproc
+
+# define DOCARGS_0
# define UNDOCARGS_0
-# define DOCARGS_1 \
- DOCARGS_0; \
- str x0, [sp, -16]!; \
- cfi_adjust_cfa_offset (16); \
- cfi_rel_offset (x0, 0)
-
-# define UNDOCARGS_1 \
- ldr x0, [sp], 16; \
- cfi_restore (x0); \
- cfi_adjust_cfa_offset (-16); \
-
-# define DOCARGS_2 \
- DOCARGS_1; \
- str x1, [sp, -16]!; \
- cfi_adjust_cfa_offset (16); \
- cfi_rel_offset (x1, 0)
-
-# define UNDOCARGS_2 \
- ldr x1, [sp], 16; \
- cfi_restore (x1); \
- cfi_adjust_cfa_offset (-16); \
- UNDOCARGS_1
-
-# define DOCARGS_3 \
- DOCARGS_2; \
- str x2, [sp, -16]!; \
- cfi_adjust_cfa_offset (16); \
- cfi_rel_offset (x2, 0)
-
-# define UNDOCARGS_3 \
- ldr x2, [sp], 16; \
- cfi_restore (x2); \
- cfi_adjust_cfa_offset (-16); \
- UNDOCARGS_2
-
-# define DOCARGS_4 \
- DOCARGS_3; \
- str x3, [sp, -16]!; \
- cfi_adjust_cfa_offset (16); \
- cfi_rel_offset (x3, 0)
-
-# define UNDOCARGS_4 \
- ldr x3, [sp], 16; \
- cfi_restore (x3); \
- cfi_adjust_cfa_offset (-16); \
- UNDOCARGS_3
-
-# define DOCARGS_5 \
- DOCARGS_4; \
- str x4, [sp, -16]!; \
- cfi_adjust_cfa_offset (16); \
- cfi_rel_offset (x4, 0)
-
-# define UNDOCARGS_5 \
- ldr x4, [sp], 16; \
- cfi_restore (x4); \
- cfi_adjust_cfa_offset (-16); \
- UNDOCARGS_4
-
-# define DOCARGS_6 \
- DOCARGS_5; \
- str x5, [sp, -16]!; \
- cfi_adjust_cfa_offset (16); \
- cfi_rel_offset (x5, 0)
-
-# define UNDOCARGS_6 \
- ldr x5, [sp], 16; \
- cfi_restore (x5); \
- cfi_adjust_cfa_offset (-16); \
- UNDOCARGS_5
+# define DOCARGS_1 str x0, [sp, 16]
+# define UNDOCARGS_1 ldr x0, [sp, 16]
+
+# define DOCARGS_2 stp x0, x1, [sp, 16]
+# define UNDOCARGS_2 ldp x0, x1, [sp, 16]
+
+# define DOCARGS_3 DOCARGS_2; str x2, [sp, 32]
+# define UNDOCARGS_3 UNDOCARGS_2; ldr x2, [sp, 32]
+
+# define DOCARGS_4 DOCARGS_2; stp x2, x3, [sp, 32]
+# define UNDOCARGS_4 UNDOCARGS_2; ldp x2, x3, [sp, 32]
+
+# define DOCARGS_5 DOCARGS_4; str x4, [sp, 48]
+# define UNDOCARGS_5 UNDOCARGS_4; ldr x4, [sp, 48]
+
+# define DOCARGS_6 DOCARGS_4; stp x4, x5, [sp, 48]
+# define UNDOCARGS_6 UNDOCARGS_4; ldp x4, x5, [sp, 48]
# ifdef IS_IN_libpthread
# define CENABLE bl __pthread_enable_asynccancel
@@ -162,10 +107,9 @@
extern int __local_multiple_threads attribute_hidden;
# define SINGLE_THREAD_P __builtin_expect (__local_multiple_threads == 0, 1)
# else
-# define SINGLE_THREAD_P \
- adrp x16, __local_multiple_threads; \
- ldr w16, [x16, #:lo12:__local_multiple_threads]; \
- cmp w16, 0;
+# define SINGLE_THREAD_P(R) \
+ adrp x##R, __local_multiple_threads; \
+ ldr w##R, [x##R, #:lo12:__local_multiple_threads]
# endif
# else
/* There is no __local_multiple_threads for librt, so use the TCB. */
@@ -174,20 +118,18 @@ extern int __local_multiple_threads attribute_hidden;
__builtin_expect (THREAD_GETMEM (THREAD_SELF, \
header.multiple_threads) == 0, 1)
# else
-# define SINGLE_THREAD_P \
+# define SINGLE_THREAD_P(R) \
stp x0, x30, [sp, -16]!; \
cfi_adjust_cfa_offset (16); \
cfi_rel_offset (x0, 0); \
cfi_rel_offset (x30, 8); \
bl __read_tp; \
sub x0, x0, PTHREAD_SIZEOF; \
- ldr w16, [x0, PTHREAD_MULTIPLE_THREADS_OFFSET]; \
+ ldr w##R, [x0, PTHREAD_MULTIPLE_THREADS_OFFSET]; \
ldp x0, x30, [sp], 16; \
cfi_restore (x0); \
cfi_restore (x30); \
- cfi_adjust_cfa_offset (-16); \
- cmp w16, 0
-# define SINGLE_THREAD_P_PIC(x) SINGLE_THREAD_P
+ cfi_adjust_cfa_offset (-16)
# endif
# endif