[1/2] Add private_function for private functions within glibc
Commit Message
On Thu, Jun 22, 2017 at 7:41 AM, Florian Weimer <fweimer@redhat.com> wrote:
> On 06/22/2017 04:38 PM, H.J. Lu wrote:
>> On Thu, Jun 22, 2017 at 6:40 AM, Florian Weimer <fweimer@redhat.com> wrote:
>>> On 06/17/2017 03:59 PM, H.J. Lu wrote:
>>>> Here parameters are passed to _dl_init in registers. I want to minimize
>>>> changes to avoid any potential issues.
>>>
>>> Well, as a rule of thumb, if we do something that breaks our own code,
>>> it is pretty much guaranteed to wreak havoc across the board (because
>>> our test coverage is somewhat poor).
>>>
>>> I see a lot of use of regparm (3). For example:
>>>
>>> $ echo '#include <Qt/qchar.h>' | g++ -m32 -E -x c++ - | grep regparm
>>> static Category __attribute__((regparm(3))) category(uint ucs4);
>>> static Category __attribute__((regparm(3))) category(ushort ucs2);
>>> static Direction __attribute__((regparm(3))) direction(uint ucs4);
>>> static Direction __attribute__((regparm(3))) direction(ushort ucs2);
>>> static Joining __attribute__((regparm(3))) joining(uint ucs4);
>>> …
>>>
>>> I think these calls actually cross DSO boundaries.
>>
>> I don't think so. See "static ...".
>
> It's C++ code, so it just means there's no this pointer.
>
How about this patch?
Comments
On 06/22/2017 06:21 PM, H.J. Lu wrote:
> On Thu, Jun 22, 2017 at 7:41 AM, Florian Weimer <fweimer@redhat.com> wrote:
>> On 06/22/2017 04:38 PM, H.J. Lu wrote:
>>> On Thu, Jun 22, 2017 at 6:40 AM, Florian Weimer <fweimer@redhat.com> wrote:
>>>> On 06/17/2017 03:59 PM, H.J. Lu wrote:
>>>>> Here parameters are passed to _dl_init in registers. I want to minimize
>>>>> changes to avoid any potential issues.
>>>> Well, as a rule of thumb, if we do something that breaks our own code,
>>>> it is pretty much guaranteed to wreak havoc across the board (because
>>>> our test coverage is somewhat poor).
>>>>
>>>> I see a lot of use of regparm (3). For example:
>>>>
>>>> $ echo '#include <Qt/qchar.h>' | g++ -m32 -E -x c++ - | grep regparm
>>>> static Category __attribute__((regparm(3))) category(uint ucs4);
>>>> static Category __attribute__((regparm(3))) category(ushort ucs2);
>>>> static Direction __attribute__((regparm(3))) direction(uint ucs4);
>>>> static Direction __attribute__((regparm(3))) direction(ushort ucs2);
>>>> static Joining __attribute__((regparm(3))) joining(uint ucs4);
>>>> …
>>>>
>>>> I think these calls actually cross DSO boundaries.
>>> I don't think so. See "static ...".
>> It's C++ code, so it just means there's no this pointer.
>>
> How about this patch?
Yes, it addresses my concerns.
> +# The SHSTK compatible version.
> + .text
> + .globl _dl_runtime_resolve_shstk
> + .type _dl_runtime_resolve_shstk, @function
> + cfi_startproc
> + .align 16
> +_dl_runtime_resolve_shstk:
> + cfi_adjust_cfa_offset (8)
> + pushl %eax # Preserve registers otherwise clobbered.
> + cfi_adjust_cfa_offset (4)
> + pushl %edx
> + cfi_adjust_cfa_offset (4)
> + movl 12(%esp), %edx # Copy args pushed by PLT in register. Note
> + movl 8(%esp), %eax # that `fixup' takes its parameters in regs.
> + call _dl_fixup # Call resolver.
> + movl (%esp), %edx # Get register content back.
> + movl %eax, %ecx # Store the function address.
> + movl 4(%esp), %eax # Get register content back.
> + addl $16, %esp # Adjust stack: PLT1 + PLT2 + %eax + %edx
> + cfi_adjust_cfa_offset (-16)
> + jmp *%ecx # Jump to function address.
> + cfi_endproc
> + .size _dl_runtime_resolve_shstk, .-_dl_runtime_resolve_shstk
Are the CFI annotations correct? I think the offset after the push %eax
should be 8, not 12, as it currently appears to be. CFI annotations for
the spill slots of %eax and %edx would be nice. too.
It seems to me that _dl_fixup is called with an unaligned stack, either
in this implementation or the existing one. Don't we care about this
because we compile the dynamic linker without SSE support?
Thanks,
Florian
On Thu, Jun 22, 2017 at 9:37 AM, Florian Weimer <fweimer@redhat.com> wrote:
> On 06/22/2017 06:21 PM, H.J. Lu wrote:
>> On Thu, Jun 22, 2017 at 7:41 AM, Florian Weimer <fweimer@redhat.com> wrote:
>>> On 06/22/2017 04:38 PM, H.J. Lu wrote:
>>>> On Thu, Jun 22, 2017 at 6:40 AM, Florian Weimer <fweimer@redhat.com> wrote:
>>>>> On 06/17/2017 03:59 PM, H.J. Lu wrote:
>>>>>> Here parameters are passed to _dl_init in registers. I want to minimize
>>>>>> changes to avoid any potential issues.
>>>>> Well, as a rule of thumb, if we do something that breaks our own code,
>>>>> it is pretty much guaranteed to wreak havoc across the board (because
>>>>> our test coverage is somewhat poor).
>>>>>
>>>>> I see a lot of use of regparm (3). For example:
>>>>>
>>>>> $ echo '#include <Qt/qchar.h>' | g++ -m32 -E -x c++ - | grep regparm
>>>>> static Category __attribute__((regparm(3))) category(uint ucs4);
>>>>> static Category __attribute__((regparm(3))) category(ushort ucs2);
>>>>> static Direction __attribute__((regparm(3))) direction(uint ucs4);
>>>>> static Direction __attribute__((regparm(3))) direction(ushort ucs2);
>>>>> static Joining __attribute__((regparm(3))) joining(uint ucs4);
>>>>> …
>>>>>
>>>>> I think these calls actually cross DSO boundaries.
>>>> I don't think so. See "static ...".
>>> It's C++ code, so it just means there's no this pointer.
>>>
>> How about this patch?
>
> Yes, it addresses my concerns.
>
>> +# The SHSTK compatible version.
>> + .text
>> + .globl _dl_runtime_resolve_shstk
>> + .type _dl_runtime_resolve_shstk, @function
>> + cfi_startproc
>> + .align 16
>> +_dl_runtime_resolve_shstk:
>> + cfi_adjust_cfa_offset (8)
>> + pushl %eax # Preserve registers otherwise clobbered.
>> + cfi_adjust_cfa_offset (4)
>> + pushl %edx
>> + cfi_adjust_cfa_offset (4)
>> + movl 12(%esp), %edx # Copy args pushed by PLT in register. Note
>> + movl 8(%esp), %eax # that `fixup' takes its parameters in regs.
>> + call _dl_fixup # Call resolver.
>> + movl (%esp), %edx # Get register content back.
>> + movl %eax, %ecx # Store the function address.
>> + movl 4(%esp), %eax # Get register content back.
>> + addl $16, %esp # Adjust stack: PLT1 + PLT2 + %eax + %edx
>> + cfi_adjust_cfa_offset (-16)
>> + jmp *%ecx # Jump to function address.
>> + cfi_endproc
>> + .size _dl_runtime_resolve_shstk, .-_dl_runtime_resolve_shstk
>
> Are the CFI annotations correct? I think the offset after the push %eax
> should be 8, not 12, as it currently appears to be. CFI annotations for
There are 2 pushes for PLT and add push %eax. The offset is 12.
> the spill slots of %eax and %edx would be nice. too.
Don't we have
pushl %eax # Preserve registers otherwise clobbered.
cfi_adjust_cfa_offset (4)
pushl %edx
cfi_adjust_cfa_offset (4)
> It seems to me that _dl_fixup is called with an unaligned stack, either
> in this implementation or the existing one. Don't we care about this
> because we compile the dynamic linker without SSE support?
>
Probably. But it is a separate issue.
From bc18b43086c2ac1efb379ba83aae351d532963f7 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Thu, 22 Jun 2017 08:51:42 -0700
Subject: [PATCH] i386: Add _dl_runtime_resolve_shstk [BZ #21598]
Add a SHSTK compatible symbol resolver to support Shadow Stack in Intel
Control-flow Enforcement Technology (CET) instructions:
https://software.intel.com/sites/default/files/managed/4d/2a/control-flow-enforcement-technology-preview.pdf
[BZ #21598]
* sysdeps/i386/dl-machine.h (elf_machine_runtime_setup): Use
_dl_runtime_resolve_shstk if SHSTK is usable.
* sysdeps/i386/dl-trampoline.S (_dl_runtime_resolve_shstk): New.
* sysdeps/x86/cpu-features.h (bit_arch_IBT_Usable): New.
(bit_arch_SHSTK_Usable): Likewise.
(bit_cpu_SHSTK): Likewise.
(index_cpu_IBT): Likewise.
(index_cpu_SHSTK): Likewise.
(index_arch_IBT_Usable): Likewise.
(index_arch_SHSTK_Usable): Likewise.
(reg_IBT): Likewise.
(reg_SHSTK): Likewise.
---
sysdeps/i386/dl-machine.h | 5 ++++-
sysdeps/i386/dl-trampoline.S | 23 +++++++++++++++++++++++
sysdeps/x86/cpu-features.h | 14 ++++++++++++++
3 files changed, 41 insertions(+), 1 deletion(-)
@@ -66,6 +66,7 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
{
Elf32_Addr *got;
extern void _dl_runtime_resolve (Elf32_Word) attribute_hidden;
+ extern void _dl_runtime_resolve_shstk (Elf32_Word) attribute_hidden;
extern void _dl_runtime_profile (Elf32_Word) attribute_hidden;
if (l->l_info[DT_JMPREL] && lazy)
@@ -104,7 +105,9 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
else
/* This function will get called to fix up the GOT entry indicated by
the offset on the stack, and then jump to the resolved address. */
- got[2] = (Elf32_Addr) &_dl_runtime_resolve;
+ got[2] = (HAS_ARCH_FEATURE (SHSTK_Usable)
+ ? (Elf32_Addr) &_dl_runtime_resolve_shstk
+ : (Elf32_Addr) &_dl_runtime_resolve);
}
return lazy;
@@ -50,6 +50,29 @@ _dl_runtime_resolve:
cfi_endproc
.size _dl_runtime_resolve, .-_dl_runtime_resolve
+# The SHSTK compatible version.
+ .text
+ .globl _dl_runtime_resolve_shstk
+ .type _dl_runtime_resolve_shstk, @function
+ cfi_startproc
+ .align 16
+_dl_runtime_resolve_shstk:
+ cfi_adjust_cfa_offset (8)
+ pushl %eax # Preserve registers otherwise clobbered.
+ cfi_adjust_cfa_offset (4)
+ pushl %edx
+ cfi_adjust_cfa_offset (4)
+ movl 12(%esp), %edx # Copy args pushed by PLT in register. Note
+ movl 8(%esp), %eax # that `fixup' takes its parameters in regs.
+ call _dl_fixup # Call resolver.
+ movl (%esp), %edx # Get register content back.
+ movl %eax, %ecx # Store the function address.
+ movl 4(%esp), %eax # Get register content back.
+ addl $16, %esp # Adjust stack: PLT1 + PLT2 + %eax + %edx
+ cfi_adjust_cfa_offset (-16)
+ jmp *%ecx # Jump to function address.
+ cfi_endproc
+ .size _dl_runtime_resolve_shstk, .-_dl_runtime_resolve_shstk
#ifndef PROF
.globl _dl_runtime_profile
@@ -40,6 +40,8 @@
#define bit_arch_Use_dl_runtime_resolve_opt (1 << 20)
#define bit_arch_Use_dl_runtime_resolve_slow (1 << 21)
#define bit_arch_Prefer_No_AVX512 (1 << 22)
+#define bit_arch_IBT_Usable (1 << 23)
+#define bit_arch_SHSTK_Usable (1 << 24)
/* CPUID Feature flags. */
@@ -74,6 +76,8 @@
#define bit_cpu_AVX512CD (1 << 28)
#define bit_cpu_AVX512BW (1 << 30)
#define bit_cpu_AVX512VL (1u << 31)
+#define bit_cpu_IBT (1u << 20)
+#define bit_cpu_SHSTK (1u << 7)
/* XCR0 Feature flags. */
#define bit_XMM_state (1 << 1)
@@ -103,6 +107,8 @@
# define index_cpu_AVX2 COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
# define index_cpu_ERMS COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
# define index_cpu_MOVBE COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
+# define index_cpu_IBT COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EDX_OFFSET
+# define index_cpu_SHSTK COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_ECX_OFFSET
# define index_arch_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE
# define index_arch_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE
@@ -126,6 +132,8 @@
# define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1*FEATURE_SIZE
# define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1*FEATURE_SIZE
# define index_arch_Prefer_No_AVX512 FEATURE_INDEX_1*FEATURE_SIZE
+# define index_arch_IBT_Usable FEATURE_INDEX_1*FEATURE_SIZE
+# define index_arch_SHSTK_Usable FEATURE_INDEX_1*FEATURE_SIZE
# if defined (_LIBC) && !IS_IN (nonlib)
@@ -277,6 +285,8 @@ extern const struct cpu_features *__get_cpu_features (void)
# define index_cpu_LZCNT COMMON_CPUID_INDEX_1
# define index_cpu_MOVBE COMMON_CPUID_INDEX_1
# define index_cpu_POPCNT COMMON_CPUID_INDEX_1
+# define index_cpu_IBT COMMON_CPUID_INDEX_7
+# define index_cpu_SHSTK COMMON_CPUID_INDEX_7
# define reg_CX8 edx
# define reg_CMOV edx
@@ -306,6 +316,8 @@ extern const struct cpu_features *__get_cpu_features (void)
# define reg_LZCNT ecx
# define reg_MOVBE ecx
# define reg_POPCNT ecx
+# define reg_IBT edx
+# define reg_SHSTK ecx
# define index_arch_Fast_Rep_String FEATURE_INDEX_1
# define index_arch_Fast_Copy_Backward FEATURE_INDEX_1
@@ -329,6 +341,8 @@ extern const struct cpu_features *__get_cpu_features (void)
# define index_arch_Use_dl_runtime_resolve_opt FEATURE_INDEX_1
# define index_arch_Use_dl_runtime_resolve_slow FEATURE_INDEX_1
# define index_arch_Prefer_No_AVX512 FEATURE_INDEX_1
+# define index_arch_IBT_Usable FEATURE_INDEX_1
+# define index_arch_SHSTK_Usable FEATURE_INDEX_1
#endif /* !__ASSEMBLER__ */
--
2.9.4