[2/2] i386: Update _dl_runtime_resolve/_dl_runtime_profile

Message ID 20170617130710.GB14641@gmail.com
State New, archived
Headers

Commit Message

H.J. Lu June 17, 2017, 1:07 p.m. UTC
  To make symbol resolver compatible with Shadow Stack in Intel Control-flow
Enforcement Technology (CET) instructions:

https://software.intel.com/sites/default/files/managed/4d/2a/control-flow-enforcement-technology-preview.pdf

call resolved function indirectly with %ecx.

Any comments?


H.J.
---
	[BZ #21598]
	* sysdeps/i386/dl-trampoline.S (_dl_runtime_resolve): Call
	resolved function indirectly with %ecx.
	(_dl_runtime_profile): Likewise.
---
 sysdeps/i386/dl-trampoline.S | 34 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 18 deletions(-)
  

Comments

Florian Weimer June 17, 2017, 1:35 p.m. UTC | #1
On 06/17/2017 03:07 PM, H.J. Lu wrote:
> -	popl %edx		# Get register content back.
> -	cfi_adjust_cfa_offset (-4)
> -	movl (%esp), %ecx
> -	movl %eax, (%esp)	# Store the function address.
> -	movl 4(%esp), %eax
> -	ret $12			# Jump to function address.
> +	movl (%esp), %edx	# Get register content back.
> +	movl %eax, %ecx		# Store the function address.
> +	movl 4(%esp), %eax	# Get register content back.
> +	addl $16, %esp		# Adjust stack(PLT did 2 pushes)
> +	cfi_adjust_cfa_offset (-16)
> +	jmp *%ecx		# Jump to function address.

Did the old code break the return stack optimization?  I suppose this is
a real improvement, then.  (I'm aware it depends on reserving the %ecx
register.)

Thanks,
Florian
  
H.J. Lu June 17, 2017, 1:39 p.m. UTC | #2
On Sat, Jun 17, 2017 at 6:35 AM, Florian Weimer <fweimer@redhat.com> wrote:
> On 06/17/2017 03:07 PM, H.J. Lu wrote:
>> -     popl %edx               # Get register content back.
>> -     cfi_adjust_cfa_offset (-4)
>> -     movl (%esp), %ecx
>> -     movl %eax, (%esp)       # Store the function address.
>> -     movl 4(%esp), %eax
>> -     ret $12                 # Jump to function address.
>> +     movl (%esp), %edx       # Get register content back.
>> +     movl %eax, %ecx         # Store the function address.
>> +     movl 4(%esp), %eax      # Get register content back.
>> +     addl $16, %esp          # Adjust stack(PLT did 2 pushes)
>> +     cfi_adjust_cfa_offset (-16)
>> +     jmp *%ecx               # Jump to function address.
>
> Did the old code break the return stack optimization?  I suppose this is
> a real improvement, then.  (I'm aware it depends on reserving the %ecx
> register.)

Yes, this change will also improve return address predication.
  

Patch

diff --git a/sysdeps/i386/dl-trampoline.S b/sysdeps/i386/dl-trampoline.S
index 6e7f3ae..648841c 100644
--- a/sysdeps/i386/dl-trampoline.S
+++ b/sysdeps/i386/dl-trampoline.S
@@ -34,19 +34,17 @@  _dl_runtime_resolve:
 	cfi_adjust_cfa_offset (8)
 	pushl %eax		# Preserve registers otherwise clobbered.
 	cfi_adjust_cfa_offset (4)
-	pushl %ecx
-	cfi_adjust_cfa_offset (4)
 	pushl %edx
 	cfi_adjust_cfa_offset (4)
-	movl 16(%esp), %edx	# Copy args pushed by PLT in register.  Note
-	movl 12(%esp), %eax	# that `fixup' takes its parameters in regs.
+	movl 12(%esp), %edx	# Copy args pushed by PLT in register.  Note
+	movl 8(%esp), %eax	# that `fixup' takes its parameters in regs.
 	call _dl_fixup		# Call resolver.
-	popl %edx		# Get register content back.
-	cfi_adjust_cfa_offset (-4)
-	movl (%esp), %ecx
-	movl %eax, (%esp)	# Store the function address.
-	movl 4(%esp), %eax
-	ret $12			# Jump to function address.
+	movl (%esp), %edx	# Get register content back.
+	movl %eax, %ecx		# Store the function address.
+	movl 4(%esp), %eax	# Get register content back.
+	addl $16, %esp		# Adjust stack(PLT did 2 pushes)
+	cfi_adjust_cfa_offset (-16)
+	jmp *%ecx		# Jump to function address.
 	cfi_endproc
 	.size _dl_runtime_resolve, .-_dl_runtime_resolve
 
@@ -85,14 +83,14 @@  _dl_runtime_profile:
 	movl (%esp), %edx
 	testl %edx, %edx
 	jns 1f
-	popl %edx
-	cfi_adjust_cfa_offset (-4)
-	popl %edx		# Get register content back.
-	cfi_adjust_cfa_offset (-4)
-	movl (%esp), %ecx
-	movl %eax, (%esp)	# Store the function address.
-	movl 4(%esp), %eax
-	ret $20			# Jump to function address.
+	movl 4(%esp), %edx	# Get register content back.
+	movl %eax, %ecx		# Store the function address.
+	movl 12(%esp), %eax	# Get register content back.
+	# Adjust stack: PLT1 + PLT2 + %esp + %ebp + %eax + %ecx + %edx
+	# + free.
+	addl $32, %esp
+	cfi_adjust_cfa_offset (-32)
+	jmp *%ecx		# Jump to function address.
 
 	/*
 	    +32     return address