Add Prefer_MAP_32BIT_EXEC for Silvermont
Commit Message
According to Silvermont software optimization guide, for 64-bit
applications, branch prediction performance can be negatively impacted
when the target of a branch is more than 4GB away from the branch. Set
the Prefer_MAP_32BIT_EXEC bit for Silvermont so that mmap will try to
map executable pages with MAP_32BIT first. Also enable Silvermont
optimizations for Knights Landing.
Prefer_MAP_32BIT_EXEC reduces bits available for address space layout
randomization (ASLR), which can be disabled by setting environment
variable, LD_DISABLE_PREFER_MAP_32BIT_EXEC.
On Fedora 23, this patch speeds up GCC 5 testsuite by 3% on Silvermont.
Tested on ia32, x32 and x86-64.
H.J.
---
* sysdeps/unix/sysv/linux/x86_64/64/mmap.c: New file.
* sysdeps/x86/cpu-features.c (get_prefer_map_32bit_exec): New
function.
(init_cpu_features): Set the Prefer_MAP_32BIT_EXEC bit for
Silvermont. Enable Silvermont optimizations for Knights Landing.
* sysdeps/x86/cpu-features.h (bit_Prefer_MAP_32BIT_EXEC): New.
(index_Prefer_MAP_32BIT_EXEC): Likewise.
---
sysdeps/unix/sysv/linux/x86_64/64/mmap.c | 49 ++++++++++++++++++++++++++++++++
sysdeps/x86/cpu-features.c | 44 ++++++++++++++++++++++++++--
sysdeps/x86/cpu-features.h | 3 ++
3 files changed, 94 insertions(+), 2 deletions(-)
create mode 100644 sysdeps/unix/sysv/linux/x86_64/64/mmap.c
Comments
On Fri, 11 Dec 2015, H.J. Lu wrote:
> +++ b/sysdeps/unix/sysv/linux/x86_64/64/mmap.c
> @@ -0,0 +1,49 @@
> +/* Copyright (C) 2015 Free Software Foundation, Inc.
All new files should have a descriptive first line before the copyright
notice.
new file mode 100644
@@ -0,0 +1,49 @@
+/* Copyright (C) 2015 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public License as
+ published by the Free Software Foundation; either version 2.1 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <errno.h>
+#include <sys/syscall.h>
+#include <sysdep.h>
+#include <unistd.h>
+#include <ldsodefs.h>
+#include <cpu-features.h>
+
+__ptr_t
+__mmap (__ptr_t addr, size_t len, int prot, int flags, int fd, off_t offset)
+{
+ /* If the Prefer_MAP_32BIT_EXEC bit is set, try to map executable pages
+ with MAP_32BIT first. */
+ if (addr == NULL
+ && (prot & PROT_EXEC) != 0
+ && HAS_ARCH_FEATURE (Prefer_MAP_32BIT_EXEC))
+ {
+ addr = (__ptr_t) INLINE_SYSCALL (mmap, 6, addr, len, prot,
+ flags | MAP_32BIT,
+ fd, offset);
+ if (addr != MAP_FAILED)
+ return addr;
+ }
+ return (__ptr_t) INLINE_SYSCALL (mmap, 6, addr, len, prot, flags,
+ fd, offset);
+}
+
+weak_alias (__mmap, mmap)
+weak_alias (__mmap, mmap64)
+weak_alias (__mmap, __mmap64)
@@ -39,6 +39,33 @@ get_common_indeces (struct cpu_features *cpu_features,
}
}
+/* Prefer_MAP_32BIT_EXEC reduces bits available for address space layout
+ randomization (ASLR), which can be disabled by setting environment
+ variable, LD_DISABLE_PREFER_MAP_32BIT_EXEC. */
+
+static inline unsigned int
+get_prefer_map_32bit_exec (void)
+{
+#if defined __LP64__ && IS_IN (rtld)
+ extern char **__environ attribute_hidden;
+ for (char **current = __environ; *current != NULL; ++current)
+ {
+ /* Check LD_DISABLE_PREFER_MAP_32BIT_EXEC=. */
+ static const char *disable = "LD_DISABLE_PREFER_MAP_32BIT_EXEC=";
+ for (size_t i = 0; ; i++)
+ {
+ if (disable[i] != (*current)[i])
+ break;
+ if ((*current)[i] == '=')
+ return 0;
+ }
+ }
+ return bit_Prefer_MAP_32BIT_EXEC;
+#else
+ return 0;
+#endif
+}
+
static inline void
init_cpu_features (struct cpu_features *cpu_features)
{
@@ -78,22 +105,35 @@ init_cpu_features (struct cpu_features *cpu_features)
cpu_features->feature[index_Slow_BSF] |= bit_Slow_BSF;
break;
+ case 0x57:
+ /* Knights Landing. Enable Silvermont optimizations. */
+
case 0x37:
case 0x4a:
case 0x4d:
case 0x5a:
case 0x5d:
- /* Unaligned load versions are faster than SSSE3
- on Silvermont. */
+ /* Unaligned load versions are faster than SSSE3 on
+ Silvermont. For 64-bit applications, branch
+ prediction performance can be negatively impacted
+ when the target of a branch is more than 4GB away
+ from the branch. Set the Prefer_MAP_32BIT_EXEC bit
+ so that mmap will try to map executable pages with
+ MAP_32BIT first. NB: MAP_32BIT will map to lower
+ 2GB, not lower 4GB, address. */
#if index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop
# error index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop
#endif
+#if index_Fast_Unaligned_Load != index_Prefer_MAP_32BIT_EXEC
+# error index_Fast_Unaligned_Load != index_Prefer_MAP_32BIT_EXEC
+#endif
#if index_Fast_Unaligned_Load != index_Slow_SSE4_2
# error index_Fast_Unaligned_Load != index_Slow_SSE4_2
#endif
cpu_features->feature[index_Fast_Unaligned_Load]
|= (bit_Fast_Unaligned_Load
| bit_Prefer_PMINUB_for_stringop
+ | get_prefer_map_32bit_exec ()
| bit_Slow_SSE4_2);
break;
@@ -33,6 +33,7 @@
#define bit_AVX512DQ_Usable (1 << 13)
#define bit_I586 (1 << 14)
#define bit_I686 (1 << 15)
+#define bit_Prefer_MAP_32BIT_EXEC (1 << 16)
/* CPUID Feature flags. */
@@ -97,6 +98,7 @@
# define index_AVX512DQ_Usable FEATURE_INDEX_1*FEATURE_SIZE
# define index_I586 FEATURE_INDEX_1*FEATURE_SIZE
# define index_I686 FEATURE_INDEX_1*FEATURE_SIZE
+# define index_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1*FEATURE_SIZE
# if defined (_LIBC) && !IS_IN (nonlib)
# ifdef __x86_64__
@@ -248,6 +250,7 @@ extern const struct cpu_features *__get_cpu_features (void)
# define index_AVX512DQ_Usable FEATURE_INDEX_1
# define index_I586 FEATURE_INDEX_1
# define index_I686 FEATURE_INDEX_1
+# define index_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1
#endif /* !__ASSEMBLER__ */