x86-64: Properly align La_x86_64_retval to VEC_SIZE [BZ #22715]
Commit Message
_dl_runtime_profile calls _dl_call_pltexit, passing a pointer to
La_x86_64_retval which is allocated on stack. The lrv_vector0
field in La_x86_64_retval must be aligned to size of vector register.
When allocating stack space for La_x86_64_retval, we need to make sure
that the address of La_x86_64_retval + RV_VECTOR0_OFFSET is aligned to
VEC_SIZE. This patch checks the alignment of the lrv_vector0 field
and pads the stack space if needed.
Tested with x32 and x86-64 on SSE4, AVX and AVX512 machines.
OK for master?
H.J.
---
[BZ #22715]
* sysdeps/x86_64/dl-trampoline.h (_dl_runtime_profile): Properly
align La_x86_64_retval to VEC_SIZE.
---
sysdeps/x86_64/dl-trampoline.h | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
Comments
On Mon, Jan 15, 2018 at 8:18 PM, H.J. Lu <hongjiu.lu@intel.com> wrote:
> _dl_runtime_profile calls _dl_call_pltexit, passing a pointer to
> La_x86_64_retval which is allocated on stack. The lrv_vector0
> field in La_x86_64_retval must be aligned to size of vector register.
> When allocating stack space for La_x86_64_retval, we need to make sure
> that the address of La_x86_64_retval + RV_VECTOR0_OFFSET is aligned to
> VEC_SIZE. This patch checks the alignment of the lrv_vector0 field
> and pads the stack space if needed.
>
> Tested with x32 and x86-64 on SSE4, AVX and AVX512 machines.
>
> OK for master?
>
> H.J.
> ---
> [BZ #22715]
> * sysdeps/x86_64/dl-trampoline.h (_dl_runtime_profile): Properly
> align La_x86_64_retval to VEC_SIZE.
> ---
> sysdeps/x86_64/dl-trampoline.h | 12 ++++++++++--
> 1 file changed, 10 insertions(+), 2 deletions(-)
>
> diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h
> index 15edf49674..298cfb3d99 100644
> --- a/sysdeps/x86_64/dl-trampoline.h
> +++ b/sysdeps/x86_64/dl-trampoline.h
> @@ -440,8 +440,16 @@ _dl_runtime_profile:
> # ifdef RESTORE_AVX
> /* sizeof(La_x86_64_retval). Need extra space for 2 SSE
> registers to detect if xmm0/xmm1 registers are changed
> - by audit module. */
> - sub $(LRV_SIZE + XMM_SIZE*2), %RSP_LP
> + by audit module. Since rsp is aligned to VEC_SIZE, we
> + need to make sure that the address of La_x86_64_retval +
> + LRV_VECTOR0_OFFSET is aligned to VEC_SIZE. */
> +# define LRV_SPACE (LRV_SIZE + XMM_SIZE*2)
> +# define LRV_MISALIGNED ((LRV_SIZE + LRV_VECTOR0_OFFSET) & (VEC_SIZE - 1))
> +# if LRV_MISALIGNED == 0
> + sub $LRV_SPACE, %RSP_LP
> +# else
> + sub $(LRV_SPACE + VEC_SIZE - LRV_MISALIGNED), %RSP_LP
> +# endif
> # else
> sub $LRV_SIZE, %RSP_LP # sizeof(La_x86_64_retval)
> # endif
> --
> 2.14.3
>
I am checking it in.
@@ -440,8 +440,16 @@ _dl_runtime_profile:
# ifdef RESTORE_AVX
/* sizeof(La_x86_64_retval). Need extra space for 2 SSE
registers to detect if xmm0/xmm1 registers are changed
- by audit module. */
- sub $(LRV_SIZE + XMM_SIZE*2), %RSP_LP
+ by audit module. Since rsp is aligned to VEC_SIZE, we
+ need to make sure that the address of La_x86_64_retval +
+ LRV_VECTOR0_OFFSET is aligned to VEC_SIZE. */
+# define LRV_SPACE (LRV_SIZE + XMM_SIZE*2)
+# define LRV_MISALIGNED ((LRV_SIZE + LRV_VECTOR0_OFFSET) & (VEC_SIZE - 1))
+# if LRV_MISALIGNED == 0
+ sub $LRV_SPACE, %RSP_LP
+# else
+ sub $(LRV_SPACE + VEC_SIZE - LRV_MISALIGNED), %RSP_LP
+# endif
# else
sub $LRV_SIZE, %RSP_LP # sizeof(La_x86_64_retval)
# endif