[23/27,AARCH64] delouse input arguments in system functions

Message ID 1466485631-3532-25-git-send-email-ynorov@caviumnetworks.com
State New, archived
Headers

Commit Message

Yury Norov June 21, 2016, 5:07 a.m. UTC
  Signed-off-by: Yury Norov <ynorov@caviumnetworks.com>
---
 sysdeps/aarch64/__longjmp.S                   | 2 ++
 sysdeps/aarch64/dl-tlsdesc.S                  | 6 ++++++
 sysdeps/aarch64/memcmp.S                      | 3 +++
 sysdeps/aarch64/memcpy.S                      | 4 +++-
 sysdeps/aarch64/memmove.S                     | 3 +++
 sysdeps/aarch64/memset.S                      | 3 ++-
 sysdeps/aarch64/setjmp.S                      | 1 +
 sysdeps/aarch64/strchr.S                      | 1 +
 sysdeps/aarch64/strchrnul.S                   | 1 +
 sysdeps/aarch64/strcmp.S                      | 2 ++
 sysdeps/aarch64/strcpy.S                      | 2 ++
 sysdeps/aarch64/strlen.S                      | 2 ++
 sysdeps/aarch64/strncmp.S                     | 3 +++
 sysdeps/aarch64/strnlen.S                     | 3 +++
 sysdeps/aarch64/strrchr.S                     | 1 +
 sysdeps/aarch64/sysdep.h                      | 4 +++-
 sysdeps/unix/sysv/linux/aarch64/clone.S       | 7 +++++++
 sysdeps/unix/sysv/linux/aarch64/getcontext.S  | 1 +
 sysdeps/unix/sysv/linux/aarch64/setcontext.S  | 1 +
 sysdeps/unix/sysv/linux/aarch64/swapcontext.S | 1 +
 20 files changed, 48 insertions(+), 3 deletions(-)
  

Comments

Andreas Schwab June 21, 2016, 8:08 a.m. UTC | #1
Yury Norov <ynorov@caviumnetworks.com> writes:

> diff --git a/sysdeps/aarch64/dl-tlsdesc.S b/sysdeps/aarch64/dl-tlsdesc.S
> index fe8a17d..718dddf 100644
> --- a/sysdeps/aarch64/dl-tlsdesc.S
> +++ b/sysdeps/aarch64/dl-tlsdesc.S
> @@ -74,6 +74,7 @@
>  	cfi_startproc
>  	.align 2
>  _dl_tlsdesc_return:
> +	DELOUSE(0)
>  	ldr	PTR_REG (0), [x0, #PTR_SIZE]
>  	RET
>  	cfi_endproc
> @@ -126,6 +127,7 @@ _dl_tlsdesc_undefweak:
>  	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
>  	   from [x0,#8] here happens after the initialization of td->arg.  */
>  	ldar	xzr, [x0]
> +	DELOUSE(0)

That needs to be moved before the previous insn, doesn't it?  Also,
again _dl_tlsdesc_return_lazy is missing.

Andreas.
  
Joseph Myers June 21, 2016, 10:36 a.m. UTC | #2
On Tue, 21 Jun 2016, Yury Norov wrote:

> Signed-off-by: Yury Norov <ynorov@caviumnetworks.com>

You're missing a patch description.  What does "delouse" even mean?  What 
is the ABI peculiarity that means there are ABI-conforming arguments to 
these functions that need such a manipulation?
  
Arnd Bergmann June 21, 2016, 3:42 p.m. UTC | #3
On Tuesday, June 21, 2016 10:36:53 AM CEST Joseph Myers wrote:
> On Tue, 21 Jun 2016, Yury Norov wrote:
> 
> > Signed-off-by: Yury Norov <ynorov@caviumnetworks.com>
> 
> You're missing a patch description.  What does "delouse" even mean?  What 
> is the ABI peculiarity that means there are ABI-conforming arguments to 
> these functions that need such a manipulation?
> 

This is the term the kernel uses for making sure that no system call
from user space passes data in the upper bits of the argument registers,
which could end up being used in an exploit when the calling conventions
between functions assume that the upper bits contain zeroes.

I don't think there is any point in doing this in glibc though: we
can safely assume that any application calling into glibc follows
the documented calling conventions (it would otherwise be a bug),
but the kernel still has to zero those registers because malicious
user space code would simply execute the system call instruction
directly instead of calling into glibc...

	Arnd
  
Andrew Pinski June 21, 2016, 4:37 p.m. UTC | #4
On Tue, Jun 21, 2016 at 8:42 AM, Arnd Bergmann <arnd@arndb.de> wrote:
> On Tuesday, June 21, 2016 10:36:53 AM CEST Joseph Myers wrote:
>> On Tue, 21 Jun 2016, Yury Norov wrote:
>>
>> > Signed-off-by: Yury Norov <ynorov@caviumnetworks.com>
>>
>> You're missing a patch description.  What does "delouse" even mean?  What
>> is the ABI peculiarity that means there are ABI-conforming arguments to
>> these functions that need such a manipulation?
>>
>
> This is the term the kernel uses for making sure that no system call
> from user space passes data in the upper bits of the argument registers,
> which could end up being used in an exploit when the calling conventions
> between functions assume that the upper bits contain zeroes.
>
> I don't think there is any point in doing this in glibc though: we
> can safely assume that any application calling into glibc follows
> the documented calling conventions (it would otherwise be a bug),
> but the kernel still has to zero those registers because malicious
> user space code would simply execute the system call instruction
> directly instead of calling into glibc...

The documented abi leaves the top 32bits undefined which is why the
zeroing is needed in these cases. Maybe a different name is needed
here.
It is the same reason why it is needed for vdso assembly code.  If
these functions were written in C, the compiler would be emitting the
zero extend for you.

Thanks,
Andrew




>
>         Arnd
  

Patch

diff --git a/sysdeps/aarch64/__longjmp.S b/sysdeps/aarch64/__longjmp.S
index 58332be..0377715 100644
--- a/sysdeps/aarch64/__longjmp.S
+++ b/sysdeps/aarch64/__longjmp.S
@@ -46,6 +46,8 @@  ENTRY (__longjmp)
 	cfi_offset(d14, JB_D14<<3)
 	cfi_offset(d15, JB_D15<<3)
 
+	DELOUSE(0)
+
 	ldp	x19, x20, [x0, #JB_X19<<3]
 	ldp	x21, x22, [x0, #JB_X21<<3]
 	ldp	x23, x24, [x0, #JB_X23<<3]
diff --git a/sysdeps/aarch64/dl-tlsdesc.S b/sysdeps/aarch64/dl-tlsdesc.S
index fe8a17d..718dddf 100644
--- a/sysdeps/aarch64/dl-tlsdesc.S
+++ b/sysdeps/aarch64/dl-tlsdesc.S
@@ -74,6 +74,7 @@ 
 	cfi_startproc
 	.align 2
 _dl_tlsdesc_return:
+	DELOUSE(0)
 	ldr	PTR_REG (0), [x0, #PTR_SIZE]
 	RET
 	cfi_endproc
@@ -126,6 +127,7 @@  _dl_tlsdesc_undefweak:
 	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
 	   from [x0,#8] here happens after the initialization of td->arg.  */
 	ldar	xzr, [x0]
+	DELOUSE(0)
 	ldr	PTR_REG (0), [x0, #PTR_SIZE]
 	mrs	x1, tpidr_el0
 	sub	PTR_REG (0), PTR_REG (0), PTR_REG (1)
@@ -174,6 +176,7 @@  _dl_tlsdesc_dynamic:
 	stp	x29, x30, [sp,#-(32+16*NSAVEXREGPAIRS)]!
 	cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
 	mov	x29, sp
+	DELOUSE(0)
 
 	/* Save just enough registers to support fast path, if we fall
 	   into slow path we will save additional registers.  */
@@ -279,12 +282,14 @@  _dl_tlsdesc_resolve_rela:
 
 	SAVE_Q_REGISTERS
 
+	DELOUSE(3)
 	ldr	PTR_REG (1), [x3, #PTR_SIZE]
 	bl	_dl_tlsdesc_resolve_rela_fixup
 
 	RESTORE_Q_REGISTERS
 
 	ldr	x0, [sp, #32+16*8]
+	DELOUSE(0)
 	ldr	PTR_REG (1), [x0]
 	blr	x1
 
@@ -346,6 +351,7 @@  _dl_tlsdesc_resolve_hold:
 	RESTORE_Q_REGISTERS
 
 	ldr	x0, [sp, #32+16*9]
+	DELOUSE(0)
 	ldr	PTR_REG (1), [x0]
 	blr	x1
 
diff --git a/sysdeps/aarch64/memcmp.S b/sysdeps/aarch64/memcmp.S
index ae2d997..982aa02 100644
--- a/sysdeps/aarch64/memcmp.S
+++ b/sysdeps/aarch64/memcmp.S
@@ -47,6 +47,9 @@ 
 #define mask		x13
 
 ENTRY_ALIGN (memcmp, 6)
+	DELOUSE(0)
+	DELOUSE(1)
+	DELOUSE(2)
 	cbz	limit, L(ret0)
 	eor	tmp1, src1, src2
 	tst	tmp1, #7
diff --git a/sysdeps/aarch64/memcpy.S b/sysdeps/aarch64/memcpy.S
index 442f390..e0bbbf8 100644
--- a/sysdeps/aarch64/memcpy.S
+++ b/sysdeps/aarch64/memcpy.S
@@ -46,7 +46,9 @@ 
 #include <sysdep.h>
 
 ENTRY_ALIGN (memcpy, 6)
-
+	DELOUSE(0)
+	DELOUSE(1)
+	DELOUSE(2)
 	mov	dst, dstin
 	cmp	count, #64
 	b.ge	L(cpy_not_short)
diff --git a/sysdeps/aarch64/memmove.S b/sysdeps/aarch64/memmove.S
index dd91db0..3f72dea 100644
--- a/sysdeps/aarch64/memmove.S
+++ b/sysdeps/aarch64/memmove.S
@@ -46,6 +46,9 @@ 
 #define D_h	x14
 
 ENTRY_ALIGN (memmove, 6)
+	DELOUSE(0)
+	DELOUSE(1)
+	DELOUSE(2)
 
 	cmp	dstin, src
 	b.lo	L(downwards)
diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S
index e49f4d6..e8eed9e 100644
--- a/sysdeps/aarch64/memset.S
+++ b/sysdeps/aarch64/memset.S
@@ -52,7 +52,8 @@ 
 #define tmp3w		w9
 
 ENTRY_ALIGN (__memset, 6)
-
+	DELOUSE(0)
+	DELOUSE(2)
 	mov	dst, dstin		/* Preserve return value.  */
 	ands	A_lw, val, #255
 #ifndef DONT_USE_DC
diff --git a/sysdeps/aarch64/setjmp.S b/sysdeps/aarch64/setjmp.S
index da83f19..d608660 100644
--- a/sysdeps/aarch64/setjmp.S
+++ b/sysdeps/aarch64/setjmp.S
@@ -33,6 +33,7 @@  END (_setjmp)
 libc_hidden_def (_setjmp)
 
 ENTRY (__sigsetjmp)
+	DELOUSE(0)
 
 1:
 	stp	x19, x20, [x0, #JB_X19<<3]
diff --git a/sysdeps/aarch64/strchr.S b/sysdeps/aarch64/strchr.S
index 5e3aecf..838384c 100644
--- a/sysdeps/aarch64/strchr.S
+++ b/sysdeps/aarch64/strchr.S
@@ -62,6 +62,7 @@ 
 /* Locals and temporaries.  */
 
 ENTRY (strchr)
+	DELOUSE(0)
 	mov	wtmp2, #0x0401
 	movk	wtmp2, #0x4010, lsl #16
 	dup	vrepchr.16b, chrin
diff --git a/sysdeps/aarch64/strchrnul.S b/sysdeps/aarch64/strchrnul.S
index a624c8d..b60df26 100644
--- a/sysdeps/aarch64/strchrnul.S
+++ b/sysdeps/aarch64/strchrnul.S
@@ -60,6 +60,7 @@ 
    identify exactly which byte is causing the termination.  */
 
 ENTRY (__strchrnul)
+	DELOUSE(0)
 	/* Magic constant 0x40100401 to allow us to identify which lane
 	   matches the termination condition.  */
 	mov	wtmp2, #0x0401
diff --git a/sysdeps/aarch64/strcmp.S b/sysdeps/aarch64/strcmp.S
index ba0ccb4..ccfe281 100644
--- a/sysdeps/aarch64/strcmp.S
+++ b/sysdeps/aarch64/strcmp.S
@@ -49,6 +49,8 @@ 
 	/* Start of performance-critical section  -- one 64B cache line.  */
 ENTRY_ALIGN(strcmp, 6)
 
+	DELOUSE(0)
+	DELOUSE(1)
 	eor	tmp1, src1, src2
 	mov	zeroones, #REP8_01
 	tst	tmp1, #7
diff --git a/sysdeps/aarch64/strcpy.S b/sysdeps/aarch64/strcpy.S
index 0694199..2a281b9 100644
--- a/sysdeps/aarch64/strcpy.S
+++ b/sysdeps/aarch64/strcpy.S
@@ -91,6 +91,8 @@ 
 #define MIN_PAGE_SIZE (1 << MIN_PAGE_P2)
 
 ENTRY_ALIGN (STRCPY, 6)
+	DELOUSE(0)
+	DELOUSE(1)
 	/* For moderately short strings, the fastest way to do the copy is to
 	   calculate the length of the string in the same way as strlen, then
 	   essentially do a memcpy of the result.  This avoids the need for
diff --git a/sysdeps/aarch64/strlen.S b/sysdeps/aarch64/strlen.S
index 9b4d1da..4734cfb 100644
--- a/sysdeps/aarch64/strlen.S
+++ b/sysdeps/aarch64/strlen.S
@@ -85,6 +85,8 @@ 
 	   boundary.  */
 
 ENTRY_ALIGN (strlen, 6)
+	DELOUSE(0)
+	DELOUSE(1)
 	and	tmp1, srcin, MIN_PAGE_SIZE - 1
 	mov	zeroones, REP8_01
 	cmp	tmp1, MIN_PAGE_SIZE - 16
diff --git a/sysdeps/aarch64/strncmp.S b/sysdeps/aarch64/strncmp.S
index f6a17fd..a372654 100644
--- a/sysdeps/aarch64/strncmp.S
+++ b/sysdeps/aarch64/strncmp.S
@@ -51,6 +51,9 @@ 
 #define endloop		x15
 
 ENTRY_ALIGN_AND_PAD (strncmp, 6, 7)
+	DELOUSE(0)
+	DELOUSE(1)
+	DELOUSE(2)
 	cbz	limit, L(ret0)
 	eor	tmp1, src1, src2
 	mov	zeroones, #REP8_01
diff --git a/sysdeps/aarch64/strnlen.S b/sysdeps/aarch64/strnlen.S
index 4cce45f..6f67221 100644
--- a/sysdeps/aarch64/strnlen.S
+++ b/sysdeps/aarch64/strnlen.S
@@ -50,6 +50,9 @@ 
 #define REP8_80 0x8080808080808080
 
 ENTRY_ALIGN_AND_PAD (__strnlen, 6, 9)
+	DELOUSE(0)
+	DELOUSE(1)
+	DELOUSE(2)
 	cbz	limit, L(hit_limit)
 	mov	zeroones, #REP8_01
 	bic	src, srcin, #15
diff --git a/sysdeps/aarch64/strrchr.S b/sysdeps/aarch64/strrchr.S
index 44c1917..bb85a60 100644
--- a/sysdeps/aarch64/strrchr.S
+++ b/sysdeps/aarch64/strrchr.S
@@ -68,6 +68,7 @@ 
    identify exactly which byte is causing the termination, and why.  */
 
 ENTRY(strrchr)
+	DELOUSE(0)
 	cbz	x1, L(null_search)
 	/* Magic constant 0x40100401 to allow us to identify which lane
 	   matches the requested byte.  Magic constant 0x80200802 used
diff --git a/sysdeps/aarch64/sysdep.h b/sysdeps/aarch64/sysdep.h
index f2ea821..c4ff5e7 100644
--- a/sysdeps/aarch64/sysdep.h
+++ b/sysdeps/aarch64/sysdep.h
@@ -25,10 +25,12 @@ 
 #define AARCH64_R(NAME)		R_AARCH64_ ## NAME
 #define PTR_REG(n)	x##n
 #define PTR_LOG_SIZE	3
-#else
+#define DELOUSE(n)
+#else	/* __ILP32__ */
 #define AARCH64_R(NAME)		R_AARCH64_P32_ ## NAME
 #define PTR_REG(n)	w##n
 #define PTR_LOG_SIZE	2
+#define DELOUSE(n)	mov     w##n, w##n
 #endif
 
 #define PTR_SIZE	(1<<PTR_LOG_SIZE)
diff --git a/sysdeps/unix/sysv/linux/aarch64/clone.S b/sysdeps/unix/sysv/linux/aarch64/clone.S
index 596fb9c..91a1d4c 100644
--- a/sysdeps/unix/sysv/linux/aarch64/clone.S
+++ b/sysdeps/unix/sysv/linux/aarch64/clone.S
@@ -39,6 +39,13 @@ 
  */
         .text
 ENTRY(__clone)
+	DELOUSE(0)
+	DELOUSE(1)
+	DELOUSE(2)
+	DELOUSE(3)
+	DELOUSE(4)
+	DELOUSE(5)
+	DELOUSE(6)
 	/* Save args for the child.  */
 	mov	x10, x0
 	mov	x11, x2
diff --git a/sysdeps/unix/sysv/linux/aarch64/getcontext.S b/sysdeps/unix/sysv/linux/aarch64/getcontext.S
index 71e526c..35ff326 100644
--- a/sysdeps/unix/sysv/linux/aarch64/getcontext.S
+++ b/sysdeps/unix/sysv/linux/aarch64/getcontext.S
@@ -30,6 +30,7 @@ 
 	.text
 
 ENTRY(__getcontext)
+	DELOUSE(0)
 	/* The saved context will return to the getcontext() call point
 	   with a return value of 0 */
 	str	xzr,	  [x0, oX0 +  0 * SZREG]
diff --git a/sysdeps/unix/sysv/linux/aarch64/setcontext.S b/sysdeps/unix/sysv/linux/aarch64/setcontext.S
index d17f8c8..7d854bd 100644
--- a/sysdeps/unix/sysv/linux/aarch64/setcontext.S
+++ b/sysdeps/unix/sysv/linux/aarch64/setcontext.S
@@ -34,6 +34,7 @@ 
 	.text
 
 ENTRY (__setcontext)
+	DELOUSE(0)
 	/* Save a copy of UCP.  */
 	mov	x9, x0
 
diff --git a/sysdeps/unix/sysv/linux/aarch64/swapcontext.S b/sysdeps/unix/sysv/linux/aarch64/swapcontext.S
index c1a16f3..764fedc 100644
--- a/sysdeps/unix/sysv/linux/aarch64/swapcontext.S
+++ b/sysdeps/unix/sysv/linux/aarch64/swapcontext.S
@@ -27,6 +27,7 @@ 
 
 	.text
 ENTRY(__swapcontext)
+	DELOUSE(0)
 	/* Set the value returned when swapcontext() returns in this context. */
 	str	xzr,      [x0, oX0 +  0 * SZREG]