Add Prefer_MAP_32BIT_EXEC for Silvermont

Message ID 20151211143706.GA7868@intel.com
State New, archived
Headers

Commit Message

Lu, Hongjiu Dec. 11, 2015, 2:37 p.m. UTC
  According to Silvermont software optimization guide, for 64-bit
applications, branch prediction performance can be negatively impacted
when the target of a branch is more than 4GB away from the branch.  Set
the Prefer_MAP_32BIT_EXEC bit for Silvermont so that mmap will try to
map executable pages with MAP_32BIT first.  Also enable Silvermont
optimizations for Knights Landing.

Prefer_MAP_32BIT_EXEC reduces bits available for address space layout
randomization (ASLR), which can be disabled by setting environment
variable, LD_DISABLE_PREFER_MAP_32BIT_EXEC.

On Fedora 23, this patch speeds up GCC 5 testsuite by 3% on Silvermont.

Tested on ia32, x32 and x86-64.

H.J.
---
	* sysdeps/unix/sysv/linux/x86_64/64/mmap.c: New file.
	* sysdeps/x86/cpu-features.c (get_prefer_map_32bit_exec): New
	function.
	(init_cpu_features): Set the Prefer_MAP_32BIT_EXEC bit for
	Silvermont.  Enable Silvermont optimizations for Knights Landing.
	* sysdeps/x86/cpu-features.h (bit_Prefer_MAP_32BIT_EXEC): New.
	(index_Prefer_MAP_32BIT_EXEC): Likewise.
---
 sysdeps/unix/sysv/linux/x86_64/64/mmap.c | 49 ++++++++++++++++++++++++++++++++
 sysdeps/x86/cpu-features.c               | 44 ++++++++++++++++++++++++++--
 sysdeps/x86/cpu-features.h               |  3 ++
 3 files changed, 94 insertions(+), 2 deletions(-)
 create mode 100644 sysdeps/unix/sysv/linux/x86_64/64/mmap.c
  

Comments

Joseph Myers Dec. 11, 2015, 3:39 p.m. UTC | #1
On Fri, 11 Dec 2015, H.J. Lu wrote:

> +++ b/sysdeps/unix/sysv/linux/x86_64/64/mmap.c
> @@ -0,0 +1,49 @@
> +/* Copyright (C) 2015 Free Software Foundation, Inc.

All new files should have a descriptive first line before the copyright 
notice.
  

Patch

diff --git a/sysdeps/unix/sysv/linux/x86_64/64/mmap.c b/sysdeps/unix/sysv/linux/x86_64/64/mmap.c
new file mode 100644
index 0000000..57de010
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86_64/64/mmap.c
@@ -0,0 +1,49 @@ 
+/* Copyright (C) 2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <errno.h>
+#include <sys/syscall.h>
+#include <sysdep.h>
+#include <unistd.h>
+#include <ldsodefs.h>
+#include <cpu-features.h>
+
+__ptr_t
+__mmap (__ptr_t addr, size_t len, int prot, int flags, int fd, off_t offset)
+{
+  /* If the Prefer_MAP_32BIT_EXEC bit is set, try to map executable pages
+     with MAP_32BIT first.  */
+  if (addr == NULL
+      && (prot & PROT_EXEC) != 0
+      && HAS_ARCH_FEATURE (Prefer_MAP_32BIT_EXEC))
+    {
+      addr = (__ptr_t) INLINE_SYSCALL (mmap, 6, addr, len, prot,
+				       flags | MAP_32BIT,
+				       fd, offset);
+      if (addr != MAP_FAILED)
+	return addr;
+    }
+  return (__ptr_t) INLINE_SYSCALL (mmap, 6, addr, len, prot, flags,
+				   fd, offset);
+}
+
+weak_alias (__mmap, mmap)
+weak_alias (__mmap, mmap64)
+weak_alias (__mmap, __mmap64)
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index fba3ef0..6a132f7 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -39,6 +39,33 @@  get_common_indeces (struct cpu_features *cpu_features,
     }
 }
 
+/* Prefer_MAP_32BIT_EXEC reduces bits available for address space layout
+   randomization (ASLR), which can be disabled by setting environment
+   variable, LD_DISABLE_PREFER_MAP_32BIT_EXEC.  */
+
+static inline unsigned int
+get_prefer_map_32bit_exec (void)
+{
+#if defined __LP64__ && IS_IN (rtld)
+  extern char **__environ attribute_hidden;
+  for (char **current = __environ; *current != NULL; ++current)
+    {
+      /* Check LD_DISABLE_PREFER_MAP_32BIT_EXEC=.  */
+      static const char *disable = "LD_DISABLE_PREFER_MAP_32BIT_EXEC=";
+      for (size_t i = 0; ; i++)
+	{
+	  if (disable[i] != (*current)[i])
+	    break;
+	  if ((*current)[i] == '=')
+	    return 0;
+	}
+    }
+  return bit_Prefer_MAP_32BIT_EXEC;
+#else
+  return 0;
+#endif
+}
+
 static inline void
 init_cpu_features (struct cpu_features *cpu_features)
 {
@@ -78,22 +105,35 @@  init_cpu_features (struct cpu_features *cpu_features)
 	      cpu_features->feature[index_Slow_BSF] |= bit_Slow_BSF;
 	      break;
 
+	    case 0x57:
+	      /* Knights Landing.  Enable Silvermont optimizations.  */
+
 	    case 0x37:
 	    case 0x4a:
 	    case 0x4d:
 	    case 0x5a:
 	    case 0x5d:
-	      /* Unaligned load versions are faster than SSSE3
-		 on Silvermont.  */
+	      /* Unaligned load versions are faster than SSSE3 on
+		 Silvermont.  For 64-bit applications, branch
+		 prediction performance can be negatively impacted
+		 when the target of a branch is more than 4GB away
+		 from the branch.  Set the Prefer_MAP_32BIT_EXEC bit
+		 so that mmap will try to map executable pages with
+		 MAP_32BIT first.  NB: MAP_32BIT will map to lower
+		 2GB, not lower 4GB, address.  */
 #if index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop
 # error index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop
 #endif
+#if index_Fast_Unaligned_Load != index_Prefer_MAP_32BIT_EXEC
+# error index_Fast_Unaligned_Load != index_Prefer_MAP_32BIT_EXEC
+#endif
 #if index_Fast_Unaligned_Load != index_Slow_SSE4_2
 # error index_Fast_Unaligned_Load != index_Slow_SSE4_2
 #endif
 	      cpu_features->feature[index_Fast_Unaligned_Load]
 		|= (bit_Fast_Unaligned_Load
 		    | bit_Prefer_PMINUB_for_stringop
+		    | get_prefer_map_32bit_exec ()
 		    | bit_Slow_SSE4_2);
 	      break;
 
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
index 80edbee..93bee69 100644
--- a/sysdeps/x86/cpu-features.h
+++ b/sysdeps/x86/cpu-features.h
@@ -33,6 +33,7 @@ 
 #define bit_AVX512DQ_Usable		(1 << 13)
 #define bit_I586			(1 << 14)
 #define bit_I686			(1 << 15)
+#define bit_Prefer_MAP_32BIT_EXEC	(1 << 16)
 
 /* CPUID Feature flags.  */
 
@@ -97,6 +98,7 @@ 
 # define index_AVX512DQ_Usable		FEATURE_INDEX_1*FEATURE_SIZE
 # define index_I586			FEATURE_INDEX_1*FEATURE_SIZE
 # define index_I686			FEATURE_INDEX_1*FEATURE_SIZE
+# define index_Prefer_MAP_32BIT_EXEC	FEATURE_INDEX_1*FEATURE_SIZE
 
 # if defined (_LIBC) && !IS_IN (nonlib)
 #  ifdef __x86_64__
@@ -248,6 +250,7 @@  extern const struct cpu_features *__get_cpu_features (void)
 # define index_AVX512DQ_Usable		FEATURE_INDEX_1
 # define index_I586			FEATURE_INDEX_1
 # define index_I686			FEATURE_INDEX_1
+# define index_Prefer_MAP_32BIT_EXEC	FEATURE_INDEX_1
 
 #endif	/* !__ASSEMBLER__ */