Patchwork Add Prefer_MAP_32BIT_EXEC for Silvermont

login
register
mail settings
Submitter H.J. Lu
Date Dec. 11, 2015, 3:59 p.m.
Message ID <CAMe9rOqbqyFw3CMa35vwOEefdFq1xK2Q9hX8GXoGMKVZ-A2y0g@mail.gmail.com>
Download mbox | patch
Permalink /patch/9981/
State New
Headers show

Comments

H.J. Lu - Dec. 11, 2015, 3:59 p.m.
On Fri, Dec 11, 2015 at 7:39 AM, Joseph Myers <joseph@codesourcery.com> wrote:
> On Fri, 11 Dec 2015, H.J. Lu wrote:
>
>> +++ b/sysdeps/unix/sysv/linux/x86_64/64/mmap.c
>> @@ -0,0 +1,49 @@
>> +/* Copyright (C) 2015 Free Software Foundation, Inc.
>
> All new files should have a descriptive first line before the copyright
> notice.
>

Here is the updated patch.
Adhemerval Zanella Netto - Dec. 11, 2015, 4:23 p.m.
On 11-12-2015 13:59, H.J. Lu wrote:
> On Fri, Dec 11, 2015 at 7:39 AM, Joseph Myers <joseph@codesourcery.com> wrote:
>> > On Fri, 11 Dec 2015, H.J. Lu wrote:
>> >
>>> >> +++ b/sysdeps/unix/sysv/linux/x86_64/64/mmap.c
>>> >> @@ -0,0 +1,49 @@
>>> >> +/* Copyright (C) 2015 Free Software Foundation, Inc.
>> >
>> > All new files should have a descriptive first line before the copyright
>> > notice.
>> >
> Here is the updated patch.

> 
> diff --git a/sysdeps/unix/sysv/linux/x86_64/64/mmap.c b/sysdeps/unix/sysv/linux/x86_64/64/mmap.c
> new file mode 100644
> index 0000000..c34f633
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/x86_64/64/mmap.c

> +
> +__ptr_t
> +__mmap (__ptr_t addr, size_t len, int prot, int flags, int fd, off_t offset)
> +{
> +  /* If the Prefer_MAP_32BIT_EXEC bit is set, try to map executable pages
> +     with MAP_32BIT first.  */
> +  if (addr == NULL
> +      && (prot & PROT_EXEC) != 0
> +      && HAS_ARCH_FEATURE (Prefer_MAP_32BIT_EXEC))
> +    {
> +      addr = (__ptr_t) INLINE_SYSCALL (mmap, 6, addr, len, prot,
> +				       flags | MAP_32BIT,
> +				       fd, offset);
> +      if (addr != MAP_FAILED)
> +	return addr;
> +    }
> +  return (__ptr_t) INLINE_SYSCALL (mmap, 6, addr, len, prot, flags,
> +				   fd, offset);
> +}
> +

I would advise not add another syscall variant implementation, but rather 
work on make a generic implementation with adjustments made by each platform.
Something like

__ptr_t
__mmap (__ptr_t addr, size_t len, int prot, int flags, int fd, off_t offset)
{
  flags = MMAP_ARCH_FLAGS (flags);
  return (__ptr_t) INLINE_SYSCALL( mmap, 6, addr, len, prot, flags, fd,  offset);
}

And then define MMAP_ARCH_FLAG for x86 to add the MAP_32BIT when required.

I am proposing it because I am reworking the cancellable syscalls implementations
to get rid of the sysdep-cancel assembly macros (which currently are not really
required) and this kind of code fragmentation only add complexity in the project.
H.J. Lu - Dec. 11, 2015, 4:40 p.m.
On Fri, Dec 11, 2015 at 8:23 AM, Adhemerval Zanella
<adhemerval.zanella@linaro.org> wrote:
>
>
> On 11-12-2015 13:59, H.J. Lu wrote:
>> On Fri, Dec 11, 2015 at 7:39 AM, Joseph Myers <joseph@codesourcery.com> wrote:
>>> > On Fri, 11 Dec 2015, H.J. Lu wrote:
>>> >
>>>> >> +++ b/sysdeps/unix/sysv/linux/x86_64/64/mmap.c
>>>> >> @@ -0,0 +1,49 @@
>>>> >> +/* Copyright (C) 2015 Free Software Foundation, Inc.
>>> >
>>> > All new files should have a descriptive first line before the copyright
>>> > notice.
>>> >
>> Here is the updated patch.
>
>>
>> diff --git a/sysdeps/unix/sysv/linux/x86_64/64/mmap.c b/sysdeps/unix/sysv/linux/x86_64/64/mmap.c
>> new file mode 100644
>> index 0000000..c34f633
>> --- /dev/null
>> +++ b/sysdeps/unix/sysv/linux/x86_64/64/mmap.c
>
>> +
>> +__ptr_t
>> +__mmap (__ptr_t addr, size_t len, int prot, int flags, int fd, off_t offset)
>> +{
>> +  /* If the Prefer_MAP_32BIT_EXEC bit is set, try to map executable pages
>> +     with MAP_32BIT first.  */
>> +  if (addr == NULL
>> +      && (prot & PROT_EXEC) != 0
>> +      && HAS_ARCH_FEATURE (Prefer_MAP_32BIT_EXEC))
>> +    {
>> +      addr = (__ptr_t) INLINE_SYSCALL (mmap, 6, addr, len, prot,
>> +                                    flags | MAP_32BIT,
>> +                                    fd, offset);
>> +      if (addr != MAP_FAILED)
>> +     return addr;
>> +    }
>> +  return (__ptr_t) INLINE_SYSCALL (mmap, 6, addr, len, prot, flags,
>> +                                fd, offset);
>> +}
>> +
>
> I would advise not add another syscall variant implementation, but rather
> work on make a generic implementation with adjustments made by each platform.
> Something like
>
> __ptr_t
> __mmap (__ptr_t addr, size_t len, int prot, int flags, int fd, off_t offset)
> {
>   flags = MMAP_ARCH_FLAGS (flags);
>   return (__ptr_t) INLINE_SYSCALL( mmap, 6, addr, len, prot, flags, fd,  offset);
> }
>
> And then define MMAP_ARCH_FLAG for x86 to add the MAP_32BIT when required.
>

This won't work here since we fallback to the default mmap when
MAP_32BIT fails.
Adhemerval Zanella Netto - Dec. 11, 2015, 5:02 p.m.
On 11-12-2015 14:40, H.J. Lu wrote:
> On Fri, Dec 11, 2015 at 8:23 AM, Adhemerval Zanella
> <adhemerval.zanella@linaro.org> wrote:
>>
>>
>> On 11-12-2015 13:59, H.J. Lu wrote:
>>> On Fri, Dec 11, 2015 at 7:39 AM, Joseph Myers <joseph@codesourcery.com> wrote:
>>>>> On Fri, 11 Dec 2015, H.J. Lu wrote:
>>>>>
>>>>>>> +++ b/sysdeps/unix/sysv/linux/x86_64/64/mmap.c
>>>>>>> @@ -0,0 +1,49 @@
>>>>>>> +/* Copyright (C) 2015 Free Software Foundation, Inc.
>>>>>
>>>>> All new files should have a descriptive first line before the copyright
>>>>> notice.
>>>>>
>>> Here is the updated patch.
>>
>>>
>>> diff --git a/sysdeps/unix/sysv/linux/x86_64/64/mmap.c b/sysdeps/unix/sysv/linux/x86_64/64/mmap.c
>>> new file mode 100644
>>> index 0000000..c34f633
>>> --- /dev/null
>>> +++ b/sysdeps/unix/sysv/linux/x86_64/64/mmap.c
>>
>>> +
>>> +__ptr_t
>>> +__mmap (__ptr_t addr, size_t len, int prot, int flags, int fd, off_t offset)
>>> +{
>>> +  /* If the Prefer_MAP_32BIT_EXEC bit is set, try to map executable pages
>>> +     with MAP_32BIT first.  */
>>> +  if (addr == NULL
>>> +      && (prot & PROT_EXEC) != 0
>>> +      && HAS_ARCH_FEATURE (Prefer_MAP_32BIT_EXEC))
>>> +    {
>>> +      addr = (__ptr_t) INLINE_SYSCALL (mmap, 6, addr, len, prot,
>>> +                                    flags | MAP_32BIT,
>>> +                                    fd, offset);
>>> +      if (addr != MAP_FAILED)
>>> +     return addr;
>>> +    }
>>> +  return (__ptr_t) INLINE_SYSCALL (mmap, 6, addr, len, prot, flags,
>>> +                                fd, offset);
>>> +}
>>> +
>>
>> I would advise not add another syscall variant implementation, but rather
>> work on make a generic implementation with adjustments made by each platform.
>> Something like
>>
>> __ptr_t
>> __mmap (__ptr_t addr, size_t len, int prot, int flags, int fd, off_t offset)
>> {
>>   flags = MMAP_ARCH_FLAGS (flags);
>>   return (__ptr_t) INLINE_SYSCALL( mmap, 6, addr, len, prot, flags, fd,  offset);
>> }
>>
>> And then define MMAP_ARCH_FLAG for x86 to add the MAP_32BIT when required.
>>
> 
> This won't work here since we fallback to the default mmap when
> MAP_32BIT fails.

So just change the MMAP_ARCH_FLAGS hook to something like:

 __ptr_t ret = __mmap_arch (addr, len, prot, flags, fd, offset));
 if (ret != MAP_FAILED)
   ret = INLINE_SYSCALL (mmap, 6, addr, len, prot, flags, fd, offset);
 return ret

The default value for __mmap_arch could be a constant value so compiler 
could remove the the comparison if it is the case.

Another issue is this is basically limiting ALSR really hard on x86_64.
I also would prefer to make the default to *not* include this flag and
set the env. variable to actually enable it. If the cpu is slow doing
what's intended because it is buggy, let it be slow at default. Do not
break what was intended (full ALSR).
H.J. Lu - Dec. 11, 2015, 5:14 p.m.
On Fri, Dec 11, 2015 at 9:02 AM, Adhemerval Zanella
<adhemerval.zanella@linaro.org> wrote:
>
>
> On 11-12-2015 14:40, H.J. Lu wrote:
>> On Fri, Dec 11, 2015 at 8:23 AM, Adhemerval Zanella
>> <adhemerval.zanella@linaro.org> wrote:
>>>
>>>
>>> On 11-12-2015 13:59, H.J. Lu wrote:
>>>> On Fri, Dec 11, 2015 at 7:39 AM, Joseph Myers <joseph@codesourcery.com> wrote:
>>>>>> On Fri, 11 Dec 2015, H.J. Lu wrote:
>>>>>>
>>>>>>>> +++ b/sysdeps/unix/sysv/linux/x86_64/64/mmap.c
>>>>>>>> @@ -0,0 +1,49 @@
>>>>>>>> +/* Copyright (C) 2015 Free Software Foundation, Inc.
>>>>>>
>>>>>> All new files should have a descriptive first line before the copyright
>>>>>> notice.
>>>>>>
>>>> Here is the updated patch.
>>>
>>>>
>>>> diff --git a/sysdeps/unix/sysv/linux/x86_64/64/mmap.c b/sysdeps/unix/sysv/linux/x86_64/64/mmap.c
>>>> new file mode 100644
>>>> index 0000000..c34f633
>>>> --- /dev/null
>>>> +++ b/sysdeps/unix/sysv/linux/x86_64/64/mmap.c
>>>
>>>> +
>>>> +__ptr_t
>>>> +__mmap (__ptr_t addr, size_t len, int prot, int flags, int fd, off_t offset)
>>>> +{
>>>> +  /* If the Prefer_MAP_32BIT_EXEC bit is set, try to map executable pages
>>>> +     with MAP_32BIT first.  */
>>>> +  if (addr == NULL
>>>> +      && (prot & PROT_EXEC) != 0
>>>> +      && HAS_ARCH_FEATURE (Prefer_MAP_32BIT_EXEC))
>>>> +    {
>>>> +      addr = (__ptr_t) INLINE_SYSCALL (mmap, 6, addr, len, prot,
>>>> +                                    flags | MAP_32BIT,
>>>> +                                    fd, offset);
>>>> +      if (addr != MAP_FAILED)
>>>> +     return addr;
>>>> +    }
>>>> +  return (__ptr_t) INLINE_SYSCALL (mmap, 6, addr, len, prot, flags,
>>>> +                                fd, offset);
>>>> +}
>>>> +
>>>
>>> I would advise not add another syscall variant implementation, but rather
>>> work on make a generic implementation with adjustments made by each platform.
>>> Something like
>>>
>>> __ptr_t
>>> __mmap (__ptr_t addr, size_t len, int prot, int flags, int fd, off_t offset)
>>> {
>>>   flags = MMAP_ARCH_FLAGS (flags);
>>>   return (__ptr_t) INLINE_SYSCALL( mmap, 6, addr, len, prot, flags, fd,  offset);
>>> }
>>>
>>> And then define MMAP_ARCH_FLAG for x86 to add the MAP_32BIT when required.
>>>
>>
>> This won't work here since we fallback to the default mmap when
>> MAP_32BIT fails.
>
> So just change the MMAP_ARCH_FLAGS hook to something like:
>
>  __ptr_t ret = __mmap_arch (addr, len, prot, flags, fd, offset));
>  if (ret != MAP_FAILED)
>    ret = INLINE_SYSCALL (mmap, 6, addr, len, prot, flags, fd, offset);
>  return ret

I will give it a try.

> The default value for __mmap_arch could be a constant value so compiler
> could remove the the comparison if it is the case.
>
> Another issue is this is basically limiting ALSR really hard on x86_64.
> I also would prefer to make the default to *not* include this flag and
> set the env. variable to actually enable it. If the cpu is slow doing
> what's intended because it is buggy, let it be slow at default. Do not
> break what was intended (full ALSR).

I will discuss it internally.

Thanks.
Zack Weinberg - Dec. 11, 2015, 6:11 p.m.
On Fri, Dec 11, 2015 at 12:02 PM, Adhemerval Zanella
<adhemerval.zanella@linaro.org> wrote:
> Another issue is this is basically limiting ALSR really hard on x86_64.
> I also would prefer to make the default to *not* include this flag and
> set the env. variable to actually enable it. If the cpu is slow doing
> what's intended because it is buggy, let it be slow at default. Do not
> break what was intended (full ALSR).

FWIW, I was about to post the exact same objection.  Relatedly, the
environment variable should be handled through the normal ld.so-tuning
environment variable mechanism (and, in particular, ineffective for
set*id binaries).

zw
H.J. Lu - Dec. 11, 2015, 7:45 p.m.
On Fri, Dec 11, 2015 at 10:11 AM, Zack Weinberg <zackw@panix.com> wrote:
> On Fri, Dec 11, 2015 at 12:02 PM, Adhemerval Zanella
> <adhemerval.zanella@linaro.org> wrote:
>> Another issue is this is basically limiting ALSR really hard on x86_64.
>> I also would prefer to make the default to *not* include this flag and
>> set the env. variable to actually enable it. If the cpu is slow doing
>> what's intended because it is buggy, let it be slow at default. Do not
>> break what was intended (full ALSR).
>
> FWIW, I was about to post the exact same objection.  Relatedly, the
> environment variable should be handled through the normal ld.so-tuning
> environment variable mechanism (and, in particular, ineffective for
> set*id binaries).
>

We have discussed it internally.  Since this is very critical for performance
on Silvermont based platforms, we want to keep it op-out for normal
programs and disable it for SUID programs.  Reduced address range is no
worse than 32-bit programs.

Like this.
Zack Weinberg - Dec. 11, 2015, 8:10 p.m.
On Fri, Dec 11, 2015 at 2:45 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Fri, Dec 11, 2015 at 10:11 AM, Zack Weinberg <zackw@panix.com> wrote:
>> On Fri, Dec 11, 2015 at 12:02 PM, Adhemerval Zanella
>> <adhemerval.zanella@linaro.org> wrote:
>>> Another issue is this is basically limiting ALSR really hard on x86_64.
>>> I also would prefer to make the default to *not* include this flag and
>>> set the env. variable to actually enable it. If the cpu is slow doing
>>> what's intended because it is buggy, let it be slow at default. Do not
>>> break what was intended (full ALSR).
>>
>> FWIW, I was about to post the exact same objection.  Relatedly, the
>> environment variable should be handled through the normal ld.so-tuning
>> environment variable mechanism (and, in particular, ineffective for
>> set*id binaries).
>>
>
> We have discussed it internally.  Since this is very critical for performance
> on Silvermont based platforms, we want to keep it op-out for normal
> programs and disable it for SUID programs.  Reduced address range is no
> worse than 32-bit programs.

I don't think 3% performance hit on a fork-intensive artificial
benchmark qualifies as "very critical"; certainly not enough to be
worth rendering ASLR _completely ineffective_ over.  Randomization
within a 2GB address space just isn't good enough to qualify even as a
_hurdle_ anymore.

Consider this a formal objection to the inclusion of this "feature" on
anything other than an opt-in basis.

zw
Zack Weinberg - Dec. 11, 2015, 8:44 p.m.
On Fri, Dec 11, 2015 at 3:10 PM, Zack Weinberg <zackw@panix.com> wrote:
> I don't think 3% performance hit on a fork-intensive artificial
> benchmark qualifies as "very critical"; certainly not enough to be
> worth rendering ASLR _completely ineffective_ over.  Randomization
> within a 2GB address space just isn't good enough to qualify even as a
> _hurdle_ anymore.

Just to back up this assertion: 16 bits of base address randomization
was brute-forceable in less than five minutes (on average) in 2004,
per http://www.cs.columbia.edu/~locasto/projects/candidacy/papers/shacham2004ccs.pdf
.  Digging into the kernel a little, it appears that MAP_32BIT (in
4.3) selects a page-aligned address at random from within a 1GB (not
2GB) space; that's *thirteen* bits of randomness, so we don't even
have to have the argument about how many more than 16 bits it would
take to be good enough in 2016; clearly *fewer* than 16 bits is
unacceptable.

zw
Jeff Law - Dec. 11, 2015, 8:50 p.m.
On 12/11/2015 01:44 PM, Zack Weinberg wrote:
> On Fri, Dec 11, 2015 at 3:10 PM, Zack Weinberg <zackw@panix.com> wrote:
>> I don't think 3% performance hit on a fork-intensive artificial
>> benchmark qualifies as "very critical"; certainly not enough to be
>> worth rendering ASLR _completely ineffective_ over.  Randomization
>> within a 2GB address space just isn't good enough to qualify even as a
>> _hurdle_ anymore.
>
> Just to back up this assertion: 16 bits of base address randomization
> was brute-forceable in less than five minutes (on average) in 2004,
> per http://www.cs.columbia.edu/~locasto/projects/candidacy/papers/shacham2004ccs.pdf
> .  Digging into the kernel a little, it appears that MAP_32BIT (in
> 4.3) selects a page-aligned address at random from within a 1GB (not
> 2GB) space; that's *thirteen* bits of randomness, so we don't even
> have to have the argument about how many more than 16 bits it would
> take to be good enough in 2016; clearly *fewer* than 16 bits is
> unacceptable.
I'm glad someone raised this issue.  I'm back in GCC-land these days, so 
my opinion can be discounted to some degree.  But ISTM that reducing 
ASLR's effectiveness like this would be a non-starter.

I've spend a fair amount of time the last month reading about various 
vulnerabilities & exploits.  While there are ways around ASLR, it does 
represent a notable barrier for the less sophisticated attackers.


jeff
Zack Weinberg - Dec. 11, 2015, 8:52 p.m.
On Fri, Dec 11, 2015 at 3:44 PM, Zack Weinberg <zackw@panix.com> wrote:
> Digging into the kernel a little, it appears that MAP_32BIT (in
> 4.3) selects a page-aligned address at random from within a 1GB (not
> 2GB) space

Correction: the page-aligned address is selected at random from, if
I'm reading this correctly (and I'm not sure I am), the range
0x4000_0000 to 0x4200_0000.  That is *not* a 1GB space; it is only a
32MB space.  However, 'thirteen bits of randomness' is accurate.  I'm
looking at
http://lxr.free-electrons.com/source/arch/x86/kernel/sys_x86_64.c#L100
and I cannot tell whether this means each mmap() gets its own 13 bits
of randomness or what.

zw
Andi Kleen - Dec. 11, 2015, 8:59 p.m.
Zack Weinberg <zackw@panix.com> writes:
>
> Just to back up this assertion: 16 bits of base address randomization
> was brute-forceable in less than five minutes (on average) in 2004,
> per http://www.cs.columbia.edu/~locasto/projects/candidacy/papers/shacham2004ccs.pdf
> .  Digging into the kernel a little, it appears that MAP_32BIT (in
> 4.3) selects a page-aligned address at random from within a 1GB (not
> 2GB) space; that's *thirteen* bits of randomness, so we don't even
> have to have the argument about how many more than 16 bits it would
> take to be good enough in 2016; clearly *fewer* than 16 bits is
> unacceptable.

The patch brings ASLR into a similar ballpark as with 32bit (plus minus
1 or 2 bits)

You're basically arguing that running 32bit is unacceptable, and that the
main motivation for users to use 64bit is security and not performance.

I don't think users would agree with you on that. 32bit is widely used,
and clearly users find the risk from that acceptable. Also there
are no security advisories around that ask users to stop using 32bit.

There aren't because it doesn't make any sense.

Also 3% (likely more in other workloads) is a significant performance
difference, especially when we're talking about something as common as
function calls.
 
BTW the patch could be fixed to support all 4GB by guessing a hole
and using the mmap hint. But it would complicate it somewhat.

-Andi
H.J. Lu - Dec. 11, 2015, 9:14 p.m.
On Fri, Dec 11, 2015 at 12:59 PM, Andi Kleen <andi@firstfloor.org> wrote:
>
>
> BTW the patch could be fixed to support all 4GB by guessing a hole
> and using the mmap hint. But it would complicate it somewhat.
>

That means user space has to keep track mmap/mremap/munmap.
It isn't going to work.

We need a new MAP_4GB bit to get 4GB address and we can make it
backward compatible with old kernels.   Can we extend the 64-bit
mmap interface to pass the extra bit in upper 32bits of the 5th argument
if we have run out bits in the 5th argument.
Zack Weinberg - Dec. 11, 2015, 9:16 p.m.
On Fri, Dec 11, 2015 at 3:59 PM, Andi Kleen <andi@firstfloor.org> wrote:
> Zack Weinberg <zackw@panix.com> writes:
>> Just to back up this assertion: 16 bits of base address randomization
>> was brute-forceable in less than five minutes (on average) in 2004,
>> per http://www.cs.columbia.edu/~locasto/projects/candidacy/papers/shacham2004ccs.pdf
>> .  Digging into the kernel a little, it appears that MAP_32BIT (in
>> 4.3) selects a page-aligned address at random from within a 1GB (not
>> 2GB) space; that's *thirteen* bits of randomness, so we don't even
>> have to have the argument about how many more than 16 bits it would
>> take to be good enough in 2016; clearly *fewer* than 16 bits is
>> unacceptable.
>
> The patch brings ASLR into a similar ballpark as with 32bit (plus minus
> 1 or 2 bits)

For the record, the amount of entropy in each mmap() for 32-bit x86
might be as low as 8 or as high as 20 bits depending on which bit of
the code I decide to believe.  arch_get_unmapped_area() is a real
mess.  However, 20 bits does not strike me as adequate either, so ...

> You're basically arguing that running 32bit is unacceptable, and that the
> main motivation for users to use 64bit is security and not performance.

... essentially yes, but I'd put it differently: some subset of 64-bit
users are using it *because* it makes ASLR more effective; kneecapping
ASLR for those users would be an unacceptable regression.

> 32bit is widely used,
> and clearly users find the risk from that acceptable.

I think that reads too much into the lack of data.  I would expect
that the majority of people still using 32-bit user space are doing so
either because the computer is a black box to them, or because they
have a business-critical 32-bit binary that can't be upgraded.

> Also there are no security advisories around that ask users to stop using 32bit.

The paper I cited is a counterexample.

> Also 3% (likely more in other workloads) is a significant performance
> difference, especially when we're talking about something as common as
> function calls.

I saw a claim of 3% *overall* performance increase on an artificial
benchmark and that's all.  I currently suspect this will turn out to
be unmeasurable on realistic workloads.

zw
H.J. Lu - Dec. 11, 2015, 9:25 p.m.
On Fri, Dec 11, 2015 at 1:16 PM, Zack Weinberg <zackw@panix.com> wrote:
>
>> Also 3% (likely more in other workloads) is a significant performance
>> difference, especially when we're talking about something as common as
>> function calls.
>
> I saw a claim of 3% *overall* performance increase on an artificial
> benchmark and that's all.  I currently suspect this will turn out to
> be unmeasurable on realistic workloads.

3% speedup is for my typical workloads, which is running GCC.   For
an artificial benchmark, I got

Old glibc:

[hjl@gnu-slm-1 dlcall]$
/export/build/gnu/glibc/build-x86_64-linux.old/elf/ld-linux-x86-64.so.2
--library-path /export/build/gnu/glibc/build-x86_64-linux
intel64/dlcall

Time for 1000000 calls into dynamic library   1.00   ,  22.83 MT,
22.83 MT,  22.83 MT,       0 T

New glibc:

[hjl@gnu-slm-1 dlcall]$
/export/build/gnu/glibc/build-x86_64-linux/elf/ld-linux-x86-64.so.2
--library-path /export/build/gnu/glibc/build-x86_64-linux
intel64/dlcall

Time for 1000000 calls into dynamic library   1.00   ,   2.87 MT,
2.87 MT,   2.87 MT,       0 T

which is 8X speedup.
Andi Kleen - Dec. 11, 2015, 9:27 p.m.
> That means user space has to keep track mmap/mremap/munmap.
> It isn't going to work.

By default nothing is put into the first 4GB other than the main
executable. All mmaps without special flags or arguments end up
high up in the address space by the default mmap placement policy.

So the only thing you're normally competing with in the first 4GB
is your own (special) mappings and the main executable.

If you make some reasonable assumptions about the load address and size
of the executable you can guess likely free ranges.

Then just pick a random address in those and try to mmap it with
the mmap hint. If if fails fall back to the full 4GB (or try again a
few times)

It shouldn't be that hard to implement. One minor issue would
be that ASLR would cause even more variability than usual because
it adds penalty in rare situations (if there is a collision
that forces a library to be >4GB)

-Andi
Zack Weinberg - Dec. 11, 2015, 9:35 p.m.
On Fri, Dec 11, 2015 at 4:25 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Fri, Dec 11, 2015 at 1:16 PM, Zack Weinberg <zackw@panix.com> wrote:
>>
>>> Also 3% (likely more in other workloads) is a significant performance
>>> difference, especially when we're talking about something as common as
>>> function calls.
>>
>> I saw a claim of 3% *overall* performance increase on an artificial
>> benchmark and that's all.  I currently suspect this will turn out to
>> be unmeasurable on realistic workloads.
>
> 3% speedup is for my typical workloads, which is running GCC.

You can opt in in your development environment; equally you can switch
ASLR off altogether.

The "realistic workloads" I care about are long-lived network clients
and servers -- you know, the things for which ASLR is relevant as a
security measure.

zw
Andi Kleen - Dec. 11, 2015, 10:29 p.m.
> You can opt in in your development environment; equally you can switch
> ASLR off altogether.
> 
> The "realistic workloads" I care about are long-lived network clients
> and servers -- you know, the things for which ASLR is relevant as a
> security measure.

They can opt in. Slow security features such as compiler stack overflow checking
are usually opt-in, not opt-out. This is certainly in the same ballpark,
in some cases likely higher.

-Andi
Jeff Law - Dec. 11, 2015, 11:01 p.m.
On 12/11/2015 03:29 PM, Andi Kleen wrote:
>> You can opt in in your development environment; equally you can switch
>> ASLR off altogether.
>>
>> The "realistic workloads" I care about are long-lived network clients
>> and servers -- you know, the things for which ASLR is relevant as a
>> security measure.
>
> They can opt in. Slow security features such as compiler stack overflow checking
> are usually opt-in, not opt-out. This is certainly in the same ballpark,
> in some cases likely higher.
And I'd argue that this is killing ASLR at a level that it should be an 
opt-out rather than opt-in.  Crippling ASLR is, IMHO, unacceptable.

jeff
Andi Kleen - Dec. 12, 2015, 12:14 a.m.
> And I'd argue that this is killing ASLR at a level that it should be
> an opt-out rather than opt-in.  Crippling ASLR is, IMHO,
> unacceptable.

You're arguing then that running 32bit code is unacceptable.

Frankly, I don't think that's a viable position.

-Andi
Zack Weinberg - Dec. 12, 2015, 12:31 a.m.
On Fri, Dec 11, 2015 at 7:14 PM, Andi Kleen <andi@firstfloor.org> wrote:
>> And I'd argue that this is killing ASLR at a level that it should be
>> an opt-out rather than opt-in.  Crippling ASLR is, IMHO,
>> unacceptable.
>
> You're arguing then that running 32bit code is unacceptable.

I don't see that that follows.

Right now, 32-bit code has security margin X and 64-bit code has
security margin Y > X.  The proposed patch *reduces* the security
margin of 64-bit code from Y to X (give or take).  That may be, and
IMHO is, an unacceptable change *even if* X is agreed to be adequate,
or anyway the best that can be done for 32-bit.

Fundamentally, my issue here is that there are people right now
depending on this security margin to be Y, so a glibc upgrade should
not silently remove that.  It is a compatibility break of the worst
kind: completely invisible in normal operation, but the system no
longer has a property you were counting on to protect you under
abnormal (adversarial) conditions.

zw
Jeff Law - Dec. 12, 2015, 2:01 a.m.
On 12/11/2015 05:31 PM, Zack Weinberg wrote:
> On Fri, Dec 11, 2015 at 7:14 PM, Andi Kleen <andi@firstfloor.org> wrote:
>>> And I'd argue that this is killing ASLR at a level that it should be
>>> an opt-out rather than opt-in.  Crippling ASLR is, IMHO,
>>> unacceptable.
>>
>> You're arguing then that running 32bit code is unacceptable.
>
> I don't see that that follows.
>
> Right now, 32-bit code has security margin X and 64-bit code has
> security margin Y > X.  The proposed patch *reduces* the security
> margin of 64-bit code from Y to X (give or take).  That may be, and
> IMHO is, an unacceptable change *even if* X is agreed to be adequate,
> or anyway the best that can be done for 32-bit.
Exactly.  For a 64 bit application, this change will essentially cripple 
ASLR if I understand the patch correctly.  That is unacceptable to me 
and likely to Red Hat as a whole.

>
> Fundamentally, my issue here is that there are people right now
> depending on this security margin to be Y, so a glibc upgrade should
> not silently remove that.  It is a compatibility break of the worst
> kind: completely invisible in normal operation, but the system no
> longer has a property you were counting on to protect you under
> abnormal (adversarial) conditions.
Right.  And in fact, ASLR is the margin by which some currently known 
vulnerabilities have not been turned into proof of concept exploits.

ASLR, while not perfect, while bypassable via various information leaks, 
is still a vital component in the overall security profile for Linux, 
particularly for 64 bit OSs & applications.


Jeff
Paul Eggert - Dec. 12, 2015, 4:48 a.m.
On 12/11/2015 04:31 PM, Zack Weinberg wrote:
> there are people right now
> depending on this security margin to be Y, so a glibc upgrade should
> not silently remove that.

+1.  Caution is advisable here. Distributions that prefer the 
performance uptick to more-robust ASLR defense can change the default.
Florian Weimer - Dec. 12, 2015, 1:18 p.m.
On 12/11/2015 10:25 PM, H.J. Lu wrote:

> 3% speedup is for my typical workloads, which is running GCC.   For
> an artificial benchmark, I got
> 
> Old glibc:
> 
> [hjl@gnu-slm-1 dlcall]$
> /export/build/gnu/glibc/build-x86_64-linux.old/elf/ld-linux-x86-64.so.2
> --library-path /export/build/gnu/glibc/build-x86_64-linux
> intel64/dlcall
> 
> Time for 1000000 calls into dynamic library   1.00   ,  22.83 MT,
> 22.83 MT,  22.83 MT,       0 T
> 
> New glibc:
> 
> [hjl@gnu-slm-1 dlcall]$
> /export/build/gnu/glibc/build-x86_64-linux/elf/ld-linux-x86-64.so.2
> --library-path /export/build/gnu/glibc/build-x86_64-linux
> intel64/dlcall
> 
> Time for 1000000 calls into dynamic library   1.00   ,   2.87 MT,
> 2.87 MT,   2.87 MT,       0 T
> 
> which is 8X speedup.

Ouch.  Can't you label those chips as 32-bit-only?

What does your benchmark look like?

Can you run it with PIE?  And please also measure the performance of
gettimeofday and sched_getcpu.

I suspect you need kernel support to avoid the performance hit.

Florian
Florian Weimer - Dec. 12, 2015, 2:50 p.m.
On 12/11/2015 06:02 PM, Adhemerval Zanella wrote:

> Another issue is this is basically limiting ALSR really hard on x86_64.

Looking at my Fedora 22 system, it seems that most (all?) processes map
their DSOs into a single 31-bit window.  I don't know why it's
implemented this way and if it's also about silicon limitations.

Non-relocatable and PIE executables and the vDSO are stilled mapped
outside of this window, though.  A conservative fix would change that
aspect only.

I'm worried that mapping a lot of things into the first 2 GiB of address
space breaks application expectations.  LuaJIT applications might be
restricted to smaller-sized heaps, and Hotspot might not be able to use
unscaled compressed oops, resulting in a performance loss.

Florian
Mike Frysinger - Dec. 12, 2015, 11:02 p.m.
On 11 Dec 2015 12:59, Andi Kleen wrote:
> Zack Weinberg <zackw@panix.com> writes:
> > Just to back up this assertion: 16 bits of base address randomization
> > was brute-forceable in less than five minutes (on average) in 2004,
> > per http://www.cs.columbia.edu/~locasto/projects/candidacy/papers/shacham2004ccs.pdf
> > .  Digging into the kernel a little, it appears that MAP_32BIT (in
> > 4.3) selects a page-aligned address at random from within a 1GB (not
> > 2GB) space; that's *thirteen* bits of randomness, so we don't even
> > have to have the argument about how many more than 16 bits it would
> > take to be good enough in 2016; clearly *fewer* than 16 bits is
> > unacceptable.
> 
> The patch brings ASLR into a similar ballpark as with 32bit (plus minus
> 1 or 2 bits)
> 
> You're basically arguing that running 32bit is unacceptable, and that the
> main motivation for users to use 64bit is security and not performance.

yes, this is exactly why Chrome OS (and many Google systems) use 64bit,
to the point where we've sacrificed performance.  it's also the only
thing that's held us back from moving forward with x32.

> I don't think users would agree with you on that. 32bit is widely used,
> and clearly users find the risk from that acceptable. Also there
> are no security advisories around that ask users to stop using 32bit.

users don't understand or care.  appealing to the masses here doesn't
make sense.
-mike

Patch

From 7b108c6879e4ff546014a0f11266f33ff4f75418 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Wed, 21 Oct 2015 14:44:23 -0700
Subject: [PATCH] Add Prefer_MAP_32BIT_EXEC for Silvermont

According to Silvermont software optimization guide, for 64-bit
applications, branch prediction performance can be negatively impacted
when the target of a branch is more than 4GB away from the branch.  Set
the Prefer_MAP_32BIT_EXEC bit for Silvermont so that mmap will try to
map executable pages with MAP_32BIT first.  Also enable Silvermont
optimizations for Knights Landing.

Prefer_MAP_32BIT_EXEC reduces bits available for address space layout
randomization (ASLR), which can be disabled by setting environment
variable, LD_DISABLE_PREFER_MAP_32BIT_EXEC.

On Fedora 23, this patch speeds up GCC 5 testsuite by 3% on Silvermont.

	* sysdeps/unix/sysv/linux/x86_64/64/mmap.c: New file.
	* sysdeps/x86/cpu-features.c (get_prefer_map_32bit_exec): New
	function.
	(init_cpu_features): Set the Prefer_MAP_32BIT_EXEC bit for
	Silvermont.  Enable Silvermont optimizations for Knights Landing.
	* sysdeps/x86/cpu-features.h (bit_Prefer_MAP_32BIT_EXEC): New.
	(index_Prefer_MAP_32BIT_EXEC): Likewise.
---
 sysdeps/unix/sysv/linux/x86_64/64/mmap.c | 50 ++++++++++++++++++++++++++++++++
 sysdeps/x86/cpu-features.c               | 44 ++++++++++++++++++++++++++--
 sysdeps/x86/cpu-features.h               |  3 ++
 3 files changed, 95 insertions(+), 2 deletions(-)
 create mode 100644 sysdeps/unix/sysv/linux/x86_64/64/mmap.c

diff --git a/sysdeps/unix/sysv/linux/x86_64/64/mmap.c b/sysdeps/unix/sysv/linux/x86_64/64/mmap.c
new file mode 100644
index 0000000..c34f633
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86_64/64/mmap.c
@@ -0,0 +1,50 @@ 
+/* mmap system call.  x86-64 version.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <errno.h>
+#include <sys/syscall.h>
+#include <sysdep.h>
+#include <unistd.h>
+#include <ldsodefs.h>
+#include <cpu-features.h>
+
+__ptr_t
+__mmap (__ptr_t addr, size_t len, int prot, int flags, int fd, off_t offset)
+{
+  /* If the Prefer_MAP_32BIT_EXEC bit is set, try to map executable pages
+     with MAP_32BIT first.  */
+  if (addr == NULL
+      && (prot & PROT_EXEC) != 0
+      && HAS_ARCH_FEATURE (Prefer_MAP_32BIT_EXEC))
+    {
+      addr = (__ptr_t) INLINE_SYSCALL (mmap, 6, addr, len, prot,
+				       flags | MAP_32BIT,
+				       fd, offset);
+      if (addr != MAP_FAILED)
+	return addr;
+    }
+  return (__ptr_t) INLINE_SYSCALL (mmap, 6, addr, len, prot, flags,
+				   fd, offset);
+}
+
+weak_alias (__mmap, mmap)
+weak_alias (__mmap, mmap64)
+weak_alias (__mmap, __mmap64)
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index fba3ef0..6a132f7 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -39,6 +39,33 @@  get_common_indeces (struct cpu_features *cpu_features,
     }
 }
 
+/* Prefer_MAP_32BIT_EXEC reduces bits available for address space layout
+   randomization (ASLR), which can be disabled by setting environment
+   variable, LD_DISABLE_PREFER_MAP_32BIT_EXEC.  */
+
+static inline unsigned int
+get_prefer_map_32bit_exec (void)
+{
+#if defined __LP64__ && IS_IN (rtld)
+  extern char **__environ attribute_hidden;
+  for (char **current = __environ; *current != NULL; ++current)
+    {
+      /* Check LD_DISABLE_PREFER_MAP_32BIT_EXEC=.  */
+      static const char *disable = "LD_DISABLE_PREFER_MAP_32BIT_EXEC=";
+      for (size_t i = 0; ; i++)
+	{
+	  if (disable[i] != (*current)[i])
+	    break;
+	  if ((*current)[i] == '=')
+	    return 0;
+	}
+    }
+  return bit_Prefer_MAP_32BIT_EXEC;
+#else
+  return 0;
+#endif
+}
+
 static inline void
 init_cpu_features (struct cpu_features *cpu_features)
 {
@@ -78,22 +105,35 @@  init_cpu_features (struct cpu_features *cpu_features)
 	      cpu_features->feature[index_Slow_BSF] |= bit_Slow_BSF;
 	      break;
 
+	    case 0x57:
+	      /* Knights Landing.  Enable Silvermont optimizations.  */
+
 	    case 0x37:
 	    case 0x4a:
 	    case 0x4d:
 	    case 0x5a:
 	    case 0x5d:
-	      /* Unaligned load versions are faster than SSSE3
-		 on Silvermont.  */
+	      /* Unaligned load versions are faster than SSSE3 on
+		 Silvermont.  For 64-bit applications, branch
+		 prediction performance can be negatively impacted
+		 when the target of a branch is more than 4GB away
+		 from the branch.  Set the Prefer_MAP_32BIT_EXEC bit
+		 so that mmap will try to map executable pages with
+		 MAP_32BIT first.  NB: MAP_32BIT will map to lower
+		 2GB, not lower 4GB, address.  */
 #if index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop
 # error index_Fast_Unaligned_Load != index_Prefer_PMINUB_for_stringop
 #endif
+#if index_Fast_Unaligned_Load != index_Prefer_MAP_32BIT_EXEC
+# error index_Fast_Unaligned_Load != index_Prefer_MAP_32BIT_EXEC
+#endif
 #if index_Fast_Unaligned_Load != index_Slow_SSE4_2
 # error index_Fast_Unaligned_Load != index_Slow_SSE4_2
 #endif
 	      cpu_features->feature[index_Fast_Unaligned_Load]
 		|= (bit_Fast_Unaligned_Load
 		    | bit_Prefer_PMINUB_for_stringop
+		    | get_prefer_map_32bit_exec ()
 		    | bit_Slow_SSE4_2);
 	      break;
 
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
index 80edbee..93bee69 100644
--- a/sysdeps/x86/cpu-features.h
+++ b/sysdeps/x86/cpu-features.h
@@ -33,6 +33,7 @@ 
 #define bit_AVX512DQ_Usable		(1 << 13)
 #define bit_I586			(1 << 14)
 #define bit_I686			(1 << 15)
+#define bit_Prefer_MAP_32BIT_EXEC	(1 << 16)
 
 /* CPUID Feature flags.  */
 
@@ -97,6 +98,7 @@ 
 # define index_AVX512DQ_Usable		FEATURE_INDEX_1*FEATURE_SIZE
 # define index_I586			FEATURE_INDEX_1*FEATURE_SIZE
 # define index_I686			FEATURE_INDEX_1*FEATURE_SIZE
+# define index_Prefer_MAP_32BIT_EXEC	FEATURE_INDEX_1*FEATURE_SIZE
 
 # if defined (_LIBC) && !IS_IN (nonlib)
 #  ifdef __x86_64__
@@ -248,6 +250,7 @@  extern const struct cpu_features *__get_cpu_features (void)
 # define index_AVX512DQ_Usable		FEATURE_INDEX_1
 # define index_I586			FEATURE_INDEX_1
 # define index_I686			FEATURE_INDEX_1
+# define index_Prefer_MAP_32BIT_EXEC	FEATURE_INDEX_1
 
 #endif	/* !__ASSEMBLER__ */
 
-- 
2.5.0