[1/N,MPX,x86_64] Intel MPX support in glibc for x86_64

Message ID CAMe9rOo=Vu_KSitiRp6k09kjm+QT2XgOw7Wkfz_6cFPTuneDHg@mail.gmail.com
State Committed
Headers

Commit Message

H.J. Lu April 1, 2014, 7:43 p.m. UTC
  On Tue, Apr 1, 2014 at 11:29 AM, Roland McGrath <roland@hack.frob.com> wrote:
> Can you use some macros for the stack offset constants?  There are too many
> magic numbers and too much undescribed arithmetic in that code already.
> That should be able to consolidate the #ifdef __ILP32__ into one spot.
>

This patch defines REGISTER_SAVE_AREA to specify area on stack
to save/restore registers used in parameter passing.  I didn't add stack
offset macros for individual registers.  I can submit a separate patch if
it is desired.

Any feedbacks on this patch?

Thanks.
  

Patch

From 88c6ca0bef20fa9fb71355fac3da526a138dd724 Mon Sep 17 00:00:00 2001
From: Igor Zamyatin <igor.zamyatin@intel.com>
Date: Tue, 1 Apr 2014 10:16:04 -0700
Subject: [PATCH] Save/restore bound registers in _dl_runtime_resolve

This patch saves and restores bound registers in symbol lookup for x86-64:

1. Branches without BND prefix clear bound registers.
2. x86-64 pass bounds in bound registers as specified in MPX psABI
extension on hjl/mpx/master branch at

https://github.com/hjl-tools/x86-64-psABI
https://groups.google.com/forum/#!topic/x86-64-abi/KFsB0XTgWYc

Binutils has been updated to create an alternate PLT to add BND prefix
when branching to ld.so.

	* config.h.in (HAVE_MPX_SUPPORT): New #undef.
	* sysdeps/x86_64/configure.ac: Set HAVE_MPX_SUPPORT.
	* sysdeps/x86_64/configure: Regenerated.
	* sysdeps/x86_64/dl-trampoline.S (REGISTER_SAVE_AREA): New
	macro.
	(_dl_runtime_resolve): Use it.  Save and restore Intel MPX
	bound registers when calling _dl_fixup.
---
 ChangeLog                      | 11 +++++++++
 config.h.in                    |  3 +++
 sysdeps/x86_64/configure       | 27 ++++++++++++++++++++++
 sysdeps/x86_64/configure.ac    | 15 +++++++++++++
 sysdeps/x86_64/dl-trampoline.S | 51 +++++++++++++++++++++++++++++++++++++-----
 5 files changed, 101 insertions(+), 6 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index bfb3083..b5b2266 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@ 
+2014-04-01  Igor Zamyatin  <igor.zamyatin@intel.com>
+	    H.J. Lu  <hongjiu.lu@intel.com>
+
+	* config.h.in (HAVE_MPX_SUPPORT): New #undef.
+	* sysdeps/x86_64/configure.ac: Set HAVE_MPX_SUPPORT.
+	* sysdeps/x86_64/configure: Regenerated.
+	* sysdeps/x86_64/dl-trampoline.S (REGISTER_SAVE_AREA): New
+	macro.
+	(_dl_runtime_resolve): Use it.  Save and restore Intel MPX
+	bound registers when calling _dl_fixup.
+
 2014-04-01  Will Newton  <will.newton@linaro.org>
 
 	* benchtests/Makefile (CFLAGS-bench-ffs.c): Add
diff --git a/config.h.in b/config.h.in
index 3fc34bd..b6e3623 100644
--- a/config.h.in
+++ b/config.h.in
@@ -104,6 +104,9 @@ 
 /* Define if assembler supports AVX512.  */
 #undef  HAVE_AVX512_ASM_SUPPORT
 
+/* Define if assembler supports Intel MPX.  */
+#undef  HAVE_MPX_SUPPORT
+
 /* Define if gcc supports FMA4.  */
 #undef	HAVE_FMA4_SUPPORT
 
diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
index c1c88c8..45d868d 100644
--- a/sysdeps/x86_64/configure
+++ b/sysdeps/x86_64/configure
@@ -222,6 +222,33 @@  $as_echo "$libc_cv_cc_novzeroupper" >&6; }
 config_vars="$config_vars
 config-cflags-novzeroupper = $libc_cv_cc_novzeroupper"
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for Intel MPX support" >&5
+$as_echo_n "checking for Intel MPX support... " >&6; }
+if ${libc_cv_asm_mpx+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat > conftest.s <<\EOF
+        bndmov %bnd0,(%rsp)
+EOF
+if { ac_try='${CC-cc} -c $ASFLAGS conftest.s 1>&5'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
+  libc_cv_asm_mpx=yes
+else
+  libc_cv_asm_mpx=no
+fi
+rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_mpx" >&5
+$as_echo "$libc_cv_asm_mpx" >&6; }
+if test $libc_cv_asm_mpx == yes; then
+  $as_echo "#define HAVE_MPX_SUPPORT 1" >>confdefs.h
+
+fi
+
 $as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h
 
 # work around problem with autoconf and empty lines at the end of files
diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
index d34f9a8..9138f63 100644
--- a/sysdeps/x86_64/configure.ac
+++ b/sysdeps/x86_64/configure.ac
@@ -75,6 +75,21 @@  LIBC_TRY_CC_OPTION([-mno-vzeroupper],
 ])
 LIBC_CONFIG_VAR([config-cflags-novzeroupper], [$libc_cv_cc_novzeroupper])
 
+dnl Check whether asm supports Intel MPX
+AC_CACHE_CHECK(for Intel MPX support, libc_cv_asm_mpx, [dnl
+cat > conftest.s <<\EOF
+        bndmov %bnd0,(%rsp)
+EOF
+if AC_TRY_COMMAND(${CC-cc} -c $ASFLAGS conftest.s 1>&AS_MESSAGE_LOG_FD); then
+  libc_cv_asm_mpx=yes
+else
+  libc_cv_asm_mpx=no
+fi
+rm -f conftest*])
+if test $libc_cv_asm_mpx == yes; then
+  AC_DEFINE(HAVE_MPX_SUPPORT)
+fi
+
 dnl It is always possible to access static and hidden symbols in an
 dnl position independent way.
 AC_DEFINE(PI_STATIC_AND_HIDDEN)
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index 77c4d0f..8761939 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -24,6 +24,16 @@ 
 # error RTLD_SAVESPACE_SSE must be aligned to 32 bytes
 #endif
 
+/* Area on stack to save and restore registers used for parameter
+   passing when calling _dl_fixup.  */
+#ifdef __ILP32__
+/* X32 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX.  */
+# define REGISTER_SAVE_AREA (8 * 7)
+#else
+/* X86-64 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as BND0,
+   BND1, BND2, BND3.  */
+# define REGISTER_SAVE_AREA (8 * 7 + 16 * 4)
+#endif
 	.text
 	.globl _dl_runtime_resolve
 	.type _dl_runtime_resolve, @function
@@ -31,8 +41,8 @@ 
 	cfi_startproc
 _dl_runtime_resolve:
 	cfi_adjust_cfa_offset(16) # Incorporate PLT
-	subq $56,%rsp
-	cfi_adjust_cfa_offset(56)
+	subq $REGISTER_SAVE_AREA,%rsp
+	cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
 	movq %rax,(%rsp)	# Preserve registers otherwise clobbered.
 	movq %rcx, 8(%rsp)
 	movq %rdx, 16(%rsp)
@@ -40,10 +50,38 @@  _dl_runtime_resolve:
 	movq %rdi, 32(%rsp)
 	movq %r8, 40(%rsp)
 	movq %r9, 48(%rsp)
-	movq 64(%rsp), %rsi	# Copy args pushed by PLT in register.
-	movq 56(%rsp), %rdi	# %rdi: link_map, %rsi: reloc_index
+#ifndef __ILP32__
+# ifdef HAVE_MPX_SUPPORT
+	bndmov %bnd0, 56(%rsp)  # We also have to preserve bound registers.
+	bndmov %bnd1, 72(%rsp)  # These are nops if Intel MPX isn't available
+	bndmov %bnd2, 88(%rsp)  # or disabled.
+	bndmov %bnd3, 104(%rsp)
+# else
+	.byte 0x66,0x0f,0x1b,0x44,0x24,0x38
+	.byte 0x66,0x0f,0x1b,0x4c,0x24,0x48
+	.byte 0x66,0x0f,0x1b,0x54,0x24,0x58
+	.byte 0x66,0x0f,0x1b,0x5c,0x24,0x68
+# endif
+#endif
+	# Copy args pushed by PLT in register.
+	# %rdi: link_map, %rsi: reloc_index
+	movq (REGISTER_SAVE_AREA + 8)(%rsp), %rsi
+	movq REGISTER_SAVE_AREA(%rsp), %rdi
 	call _dl_fixup		# Call resolver.
 	movq %rax, %r11		# Save return value
+#ifndef __ILP32__
+# ifdef HAVE_MPX_SUPPORT
+	bndmov 104(%rsp), %bnd3  # Restore bound registers back.
+	bndmov 88(%rsp), %bnd2   # These are nops if Intel MPX isn't avaiable
+	bndmov 72(%rsp), %bnd1   # or disabled.
+	bndmov 56(%rsp), %bnd0
+# else
+	.byte 0x66,0x0f,0x1a,0x5c,0x24,0x68
+	.byte 0x66,0x0f,0x1a,0x54,0x24,0x58
+	.byte 0x66,0x0f,0x1a,0x4c,0x24,0x48
+	.byte 0x66,0x0f,0x1a,0x44,0x24,0x38
+# endif
+#endif
 	movq 48(%rsp), %r9	# Get register content back.
 	movq 40(%rsp), %r8
 	movq 32(%rsp), %rdi
@@ -51,8 +89,9 @@  _dl_runtime_resolve:
 	movq 16(%rsp), %rdx
 	movq 8(%rsp), %rcx
 	movq (%rsp), %rax
-	addq $72, %rsp		# Adjust stack(PLT did 2 pushes)
-	cfi_adjust_cfa_offset(-72)
+	# Adjust stack(PLT did 2 pushes)
+	addq $(REGISTER_SAVE_AREA + 16), %rsp
+	cfi_adjust_cfa_offset(-(REGISTER_SAVE_AREA + 16))
 	jmp *%r11		# Jump to function address.
 	cfi_endproc
 	.size _dl_runtime_resolve, .-_dl_runtime_resolve
-- 
1.8.5.3