Fix runtime linker auditing on aarch64 V2

Message ID 20200923160448.2321909-1-woodard@redhat.com
State Superseded
Delegated to: Szabolcs Nagy
Headers
Series Fix runtime linker auditing on aarch64 V2 |

Commit Message

Ben Woodard Sept. 23, 2020, 4:04 p.m. UTC
  The dynamic linker's auditing was not working on aarch64. There were two
distinct problems:
  * _dl_runtime_resolve was not preserving x8 the indirect result location
    register.
  * The NEON Q registers pushed onto the stack by _dl_runtime_resolve
    were twice the size of D registers extracted from the stack frame by
    _dl_runtime_profile.

To fix this
  * The La_aarch64_regs structure was expanded to include x8 and the full
    sized NEON V registers that are required to be preserved by the ABI.
  * _dl_runtime_profile needed to extract registers saved by
    _dl_runtime_resolve and put them into the new correctly sized
    La_aarch64_regs structure.
  * The return value structure La_aarch64_retval also didn't have the correctly
    sized NEON V registers.

As a couple of additional cleanups
  * The names of the NEON registers saved within the La_aarch64_regs and the
    La_aarch_retval structures referred to the old D registers which were
    doubles. Now the registers are quads and are called V for vector registers.
    So the name of the field in the structure and the names of the offsets
    within that structure were renamed to use the more modern names.
  * The ABI specification says that r0-r7 register as well as the NEON v0-v7
    registers can be used to return values from a function. Therefore, I addded
    those to the La_aarch64_retval structure so that it also correctly matches
    the ABI.

Differences between V2 and V1 of the patch:
  * V registers are now typed "long double" rather than "__uint128_t" an
    internal only type.
  * x8 is no longer preserved in the La_aarch64_retval
  * Comment describing the stack frame in _dl_runtime_profile had incorrect
    offsets after the size of structures changed.
---
 sysdeps/aarch64/bits/link.h     | 16 +++---
 sysdeps/aarch64/dl-link.sym     |  4 +-
 sysdeps/aarch64/dl-trampoline.S | 88 ++++++++++++++++++++-------------
 3 files changed, 64 insertions(+), 44 deletions(-)
  

Patch

diff --git a/sysdeps/aarch64/bits/link.h b/sysdeps/aarch64/bits/link.h
index 0c54e6ea7b..948a61b295 100644
--- a/sysdeps/aarch64/bits/link.h
+++ b/sysdeps/aarch64/bits/link.h
@@ -23,19 +23,19 @@ 
 /* Registers for entry into PLT on AArch64.  */
 typedef struct La_aarch64_regs
 {
-  uint64_t lr_xreg[8];
-  uint64_t lr_dreg[8];
-  uint64_t lr_sp;
-  uint64_t lr_lr;
+  uint64_t    lr_xreg[9];
+  long double lr_vreg[8];
+  uint64_t    lr_sp;
+  uint64_t    lr_lr;
 } La_aarch64_regs;
 
 /* Return values for calls from PLT on AArch64.  */
 typedef struct La_aarch64_retval
 {
-  /* Up to two integer registers can be used for a return value.  */
-  uint64_t lrv_xreg[2];
-  /* Up to four D registers can be used for a return value.  */
-  uint64_t lrv_dreg[4];
+  /* Up to eight integer registers can be used for a return value.  */
+  uint64_t    lrv_xreg[8];
+  /* Up to eight V registers can be used for a return value.  */
+  long double lrv_vreg[8];
 
 } La_aarch64_retval;
 __BEGIN_DECLS
diff --git a/sysdeps/aarch64/dl-link.sym b/sysdeps/aarch64/dl-link.sym
index d67d28b40c..70d153a1d5 100644
--- a/sysdeps/aarch64/dl-link.sym
+++ b/sysdeps/aarch64/dl-link.sym
@@ -7,9 +7,9 @@  DL_SIZEOF_RG		sizeof(struct La_aarch64_regs)
 DL_SIZEOF_RV		sizeof(struct La_aarch64_retval)
 
 DL_OFFSET_RG_X0		offsetof(struct La_aarch64_regs, lr_xreg)
-DL_OFFSET_RG_D0		offsetof(struct La_aarch64_regs, lr_dreg)
+DL_OFFSET_RG_V0		offsetof(struct La_aarch64_regs, lr_vreg)
 DL_OFFSET_RG_SP		offsetof(struct La_aarch64_regs, lr_sp)
 DL_OFFSET_RG_LR		offsetof(struct La_aarch64_regs, lr_lr)
 
 DL_OFFSET_RV_X0		offsetof(struct La_aarch64_retval, lrv_xreg)
-DL_OFFSET_RV_D0		offsetof(struct La_aarch64_retval, lrv_dreg)
+DL_OFFSET_RV_V0		offsetof(struct La_aarch64_retval, lrv_vreg)
diff --git a/sysdeps/aarch64/dl-trampoline.S b/sysdeps/aarch64/dl-trampoline.S
index 794876fffa..22363c71a1 100644
--- a/sysdeps/aarch64/dl-trampoline.S
+++ b/sysdeps/aarch64/dl-trampoline.S
@@ -46,6 +46,8 @@  _dl_runtime_resolve:
 	cfi_rel_offset (lr, 8)
 
 	/* Save arguments.  */
+	/* Note: Saving x9 is not required by the ABI but the assember requires
+	   the immediate values of operand 3 to be a multiple of 16 */
 	stp	x8, x9, [sp, #-(80+8*16)]!
 	cfi_adjust_cfa_offset (80+8*16)
 	cfi_rel_offset (x8, 0)
@@ -142,13 +144,14 @@  _dl_runtime_profile:
 	   Stack frame layout:
 	   [sp,   #...] lr
 	   [sp,   #...] &PLTGOT[n]
-	   [sp,    #96] La_aarch64_regs
-	   [sp,    #48] La_aarch64_retval
-	   [sp,    #40] frame size return from pltenter
-	   [sp,    #32] dl_profile_call saved x1
-	   [sp,    #24] dl_profile_call saved x0
-	   [sp,    #16] t1
-	   [sp,     #0] x29, lr   <- x29
+	   alignment padding 8 bytes
+	   La_aarch64_regs
+	   La_aarch64_retval
+	   frame size return from pltenter
+	   dl_profile_call saved x1
+	   dl_profile_call saved x0
+	   t1
+	   x29, lr   <- x29
 	 */
 
 # define OFFSET_T1		16
@@ -183,19 +186,22 @@  _dl_runtime_profile:
 	stp	x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3]
 	cfi_rel_offset (x6, OFFSET_RG + DL_OFFSET_RG_X0 + 16*3 + 0)
 	cfi_rel_offset (x7, OFFSET_RG + DL_OFFSET_RG_X0 + 16*3 + 8)
-
-	stp	d0, d1, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0]
-	cfi_rel_offset (d0, OFFSET_RG + DL_OFFSET_RG_D0 + 16*0)
-	cfi_rel_offset (d1, OFFSET_RG + DL_OFFSET_RG_D0 + 16*0 + 8)
-	stp	d2, d3, [X29, #OFFSET_RG+ DL_OFFSET_RG_D0 + 16*1]
-	cfi_rel_offset (d2, OFFSET_RG + DL_OFFSET_RG_D0 + 16*1 + 0)
-	cfi_rel_offset (d3, OFFSET_RG + DL_OFFSET_RG_D0 + 16*1 + 8)
-	stp	d4, d5, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2]
-	cfi_rel_offset (d4, OFFSET_RG + DL_OFFSET_RG_D0 + 16*2 + 0)
-	cfi_rel_offset (d5, OFFSET_RG + DL_OFFSET_RG_D0 + 16*2 + 8)
-	stp	d6, d7, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3]
-	cfi_rel_offset (d6, OFFSET_RG + DL_OFFSET_RG_D0 + 16*3 + 0)
-	cfi_rel_offset (d7, OFFSET_RG + DL_OFFSET_RG_D0 + 16*3 + 8)
+	str	x8, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*4 + 0]
+	cfi_rel_offset (x8, OFFSET_RG + DL_OFFSET_RG_X0 + 16*4 + 0)
+	/* Note 8 bytes of padding is in the stack frame for alignment */
+
+	stp	q0, q1, [X29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*0]
+	cfi_rel_offset (q0, OFFSET_RG + DL_OFFSET_RG_V0 + 32*0)
+	cfi_rel_offset (q1, OFFSET_RG + DL_OFFSET_RG_V0 + 32*0 + 16)
+	stp	q2, q3, [X29, #OFFSET_RG+ DL_OFFSET_RG_V0 + 32*1]
+	cfi_rel_offset (q2, OFFSET_RG + DL_OFFSET_RG_V0 + 32*1 + 0)
+	cfi_rel_offset (q3, OFFSET_RG + DL_OFFSET_RG_V0 + 32*1 + 16)
+	stp	q4, q5, [X29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*2]
+	cfi_rel_offset (q4, OFFSET_RG + DL_OFFSET_RG_V0 + 32*2 + 0)
+	cfi_rel_offset (q5, OFFSET_RG + DL_OFFSET_RG_V0 + 32*2 + 16)
+	stp	q6, q7, [X29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*3]
+	cfi_rel_offset (q6, OFFSET_RG + DL_OFFSET_RG_V0 + 32*3 + 0)
+	cfi_rel_offset (q7, OFFSET_RG + DL_OFFSET_RG_V0 + 32*3 + 16)
 
 	add     x0, x29, #SF_SIZE + 16
 	ldr	x1, [x29, #OFFSET_LR]
@@ -234,10 +240,10 @@  _dl_runtime_profile:
 	ldp	x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1]
 	ldp	x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2]
 	ldp	x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3]
-	ldp	d0, d1, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0]
-	ldp	d2, d3, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*1]
-	ldp	d4, d5, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2]
-	ldp	d6, d7, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3]
+	ldp	q0, q1, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*0]
+	ldp	q2, q3, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*1]
+	ldp	q4, q5, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*2]
+	ldp	q6, q7, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*3]
 
 	cfi_def_cfa_register (sp)
 	ldp	x29, x30, [x29, #0]
@@ -280,14 +286,21 @@  _dl_runtime_profile:
 	ldp	x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1]
 	ldp	x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2]
 	ldp	x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3]
-	ldp	d0, d1, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0]
-	ldp	d2, d3, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*1]
-	ldp	d4, d5, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2]
-	ldp	d6, d7, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3]
+	ldr	x8, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*4]
+	ldp	q0, q1, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*0]
+	ldp	q2, q3, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*1]
+	ldp	q4, q5, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*2]
+	ldp	q6, q7, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*3]
 	blr	ip0
-	stp	x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0]
-	stp	d0, d1, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*0]
-	stp	d2, d3, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*1]
+	stp	x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*0]
+	stp	x2, x3, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*1]
+	stp	x4, x5, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*2]
+	stp	x6, x7, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*3]
+	str	x8, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*4]
+	stp	q0, q1, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*0]
+	stp	q2, q3, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*1]
+	stp	q4, q5, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*2]
+	stp	q6, q7, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*3]
 
 	/* Setup call to pltexit  */
 	ldp	x0, x1, [x29, #OFFSET_SAVED_CALL_X0]
@@ -295,9 +308,16 @@  _dl_runtime_profile:
 	add	x3, x29, #OFFSET_RV
 	bl	_dl_call_pltexit
 
-	ldp	x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0]
-	ldp	d0, d1, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*0]
-	ldp	d2, d3, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*1]
+	ldp	x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*0]
+	ldp	x2, x3, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*1]
+	ldp	x4, x5, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*2]
+	ldp	x6, x7, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*3]
+	ldr	x8, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*4]
+	ldp	q0, q1, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*0]
+	ldp	q2, q3, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*1]
+	ldp	q4, q5, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*2]
+	ldp	q6, q7, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*3]
+
 	/* LR from within La_aarch64_reg */
 	ldr	lr, [x29, #OFFSET_RG + DL_OFFSET_RG_LR]
 	cfi_restore(lr)