powerpc: Fix syscalls during early process initialization [BZ #22685]

Message ID 20180111192750.6256-1-tuliom@linux.vnet.ibm.com
State Superseded
Headers

Commit Message

Tulio Magno Quites Machado Filho Jan. 11, 2018, 7:27 p.m. UTC
  The tunables framework needs to make syscall early during process
initialization, before the TCB is available for consumption.  This
behavior conflicts with powerpc{|64|64le}'s lock elision code, that
tries to abort transactions before a syscall when lock elision is
available and enabled.

This patch adds the macro EARLY_INTERNAL_SYSCALL in order to let early
syscalls happen without depending on the TCB initialization for
powerpc{|64|64le}.  Other architectures are redirected to INTERNAL_SYSCALL.

Tested on powerpc{|64|64le}, s390x and x86_64.

2018-01-11  Tulio Magno Quites Machado Filho  <tuliom@linux.vnet.ibm.com>

	[BZ #22685]
	* sysdeps/unix/sysdep.h (__EARLY_INTERNAL_SYSCALL0,
	__EARLY_INTERNAL_SYSCALL1, __EARLY_INTERNAL_SYSCALL2,
	__EARLY_INTERNAL_SYSCALL3, __EARLY_INTERNAL_SYSCALL4,
	__EARLY_INTERNAL_SYSCALL5, __EARLY_INTERNAL_SYSCALL6,
	__EARLY_INTERNAL_SYSCALL7, EARLY_INTERNAL_SYSCALL_CALL): New macros
	(EARLY_INTERNAL_SYSCALL): New macro.  Redirect to
	INTERNAL_SYSCALL by default.
	* sysdeps/unix/sysv/linux/not-errno.h (__access_noerrno):
	Replace INTERNAL_SYSCALL_CALL with EARLY_INTERNAL_SYSCALL_CALL.
	* sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h
	(EARLY_INTERNAL_SYSCALL_NCS, EARLY_INTERNAL_SYSCALL): New macros.
	* sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h
	(EARLY_INTERNAL_SYSCALL_NCS, EARLY_INTERNAL_SYSCALL): Likewise.

Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>
---
 sysdeps/unix/sysdep.h                              | 25 +++++++++++++++
 sysdeps/unix/sysv/linux/not-errno.h                |  4 +--
 sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h | 36 +++++++++++++++++++++-
 sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h | 34 +++++++++++++++++++-
 4 files changed, 95 insertions(+), 4 deletions(-)
  

Comments

Aurelien Jarno Jan. 11, 2018, 9:36 p.m. UTC | #1
On 2018-01-11 17:27, Tulio Magno Quites Machado Filho wrote:
> The tunables framework needs to make syscall early during process
> initialization, before the TCB is available for consumption.  This
> behavior conflicts with powerpc{|64|64le}'s lock elision code, that
> tries to abort transactions before a syscall when lock elision is
> available and enabled.
> 
> This patch adds the macro EARLY_INTERNAL_SYSCALL in order to let early
> syscalls happen without depending on the TCB initialization for
> powerpc{|64|64le}.  Other architectures are redirected to INTERNAL_SYSCALL.
> 
> Tested on powerpc{|64|64le}, s390x and x86_64.
> 
> 2018-01-11  Tulio Magno Quites Machado Filho  <tuliom@linux.vnet.ibm.com>
> 
> 	[BZ #22685]
> 	* sysdeps/unix/sysdep.h (__EARLY_INTERNAL_SYSCALL0,
> 	__EARLY_INTERNAL_SYSCALL1, __EARLY_INTERNAL_SYSCALL2,
> 	__EARLY_INTERNAL_SYSCALL3, __EARLY_INTERNAL_SYSCALL4,
> 	__EARLY_INTERNAL_SYSCALL5, __EARLY_INTERNAL_SYSCALL6,
> 	__EARLY_INTERNAL_SYSCALL7, EARLY_INTERNAL_SYSCALL_CALL): New macros
> 	(EARLY_INTERNAL_SYSCALL): New macro.  Redirect to
> 	INTERNAL_SYSCALL by default.
> 	* sysdeps/unix/sysv/linux/not-errno.h (__access_noerrno):
> 	Replace INTERNAL_SYSCALL_CALL with EARLY_INTERNAL_SYSCALL_CALL.
> 	* sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h
> 	(EARLY_INTERNAL_SYSCALL_NCS, EARLY_INTERNAL_SYSCALL): New macros.
> 	* sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h
> 	(EARLY_INTERNAL_SYSCALL_NCS, EARLY_INTERNAL_SYSCALL): Likewise.
> 
> Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>
> ---
>  sysdeps/unix/sysdep.h                              | 25 +++++++++++++++
>  sysdeps/unix/sysv/linux/not-errno.h                |  4 +--
>  sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h | 36 +++++++++++++++++++++-
>  sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h | 34 +++++++++++++++++++-
>  4 files changed, 95 insertions(+), 4 deletions(-)

Thanks for the patch, I have just tested it and I confirm it fixes the
issue.

Tested-by: Aurelien Jarno <aurelien@aurel32.net>
  
Adhemerval Zanella Netto Jan. 11, 2018, 10:40 p.m. UTC | #2
On 11/01/2018 19:36, Aurelien Jarno wrote:
> On 2018-01-11 17:27, Tulio Magno Quites Machado Filho wrote:
>> The tunables framework needs to make syscall early during process
>> initialization, before the TCB is available for consumption.  This
>> behavior conflicts with powerpc{|64|64le}'s lock elision code, that
>> tries to abort transactions before a syscall when lock elision is
>> available and enabled.
>>
>> This patch adds the macro EARLY_INTERNAL_SYSCALL in order to let early
>> syscalls happen without depending on the TCB initialization for
>> powerpc{|64|64le}.  Other architectures are redirected to INTERNAL_SYSCALL.
>>
>> Tested on powerpc{|64|64le}, s390x and x86_64.

I am not really understanding why exactly this is failing because the only
object that currently uses __access_noerrno, dl-tunables.os, is built with
-DMODULE_NAME=rtld and thus ABORT_TRANSACTION should be an empty statement.
 

>>
>> 2018-01-11  Tulio Magno Quites Machado Filho  <tuliom@linux.vnet.ibm.com>
>>
>> 	[BZ #22685]
>> 	* sysdeps/unix/sysdep.h (__EARLY_INTERNAL_SYSCALL0,
>> 	__EARLY_INTERNAL_SYSCALL1, __EARLY_INTERNAL_SYSCALL2,
>> 	__EARLY_INTERNAL_SYSCALL3, __EARLY_INTERNAL_SYSCALL4,
>> 	__EARLY_INTERNAL_SYSCALL5, __EARLY_INTERNAL_SYSCALL6,
>> 	__EARLY_INTERNAL_SYSCALL7, EARLY_INTERNAL_SYSCALL_CALL): New macros
>> 	(EARLY_INTERNAL_SYSCALL): New macro.  Redirect to
>> 	INTERNAL_SYSCALL by default.
>> 	* sysdeps/unix/sysv/linux/not-errno.h (__access_noerrno):
>> 	Replace INTERNAL_SYSCALL_CALL with EARLY_INTERNAL_SYSCALL_CALL.
>> 	* sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h
>> 	(EARLY_INTERNAL_SYSCALL_NCS, EARLY_INTERNAL_SYSCALL): New macros.
>> 	* sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h
>> 	(EARLY_INTERNAL_SYSCALL_NCS, EARLY_INTERNAL_SYSCALL): Likewise.
>>
>> Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>
>> ---
>>  sysdeps/unix/sysdep.h                              | 25 +++++++++++++++
>>  sysdeps/unix/sysv/linux/not-errno.h                |  4 +--
>>  sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h | 36 +++++++++++++++++++++-
>>  sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h | 34 +++++++++++++++++++-
>>  4 files changed, 95 insertions(+), 4 deletions(-)
> 
> Thanks for the patch, I have just tested it and I confirm it fixes the
> issue.
> 
> Tested-by: Aurelien Jarno <aurelien@aurel32.net>
>
  
Florian Weimer Jan. 11, 2018, 11:42 p.m. UTC | #3
On 01/11/2018 11:40 PM, Adhemerval Zanella wrote:
> I am not really understanding why exactly this is failing because the only
> object that currently uses __access_noerrno, dl-tunables.os, is built with
> -DMODULE_NAME=rtld and thus ABORT_TRANSACTION should be an empty statement.

elf/dl-tunables.o uses it as well, and is built with -DMODULE_NAME=libc.

Thanks,
Florian
  
Adhemerval Zanella Netto Jan. 12, 2018, 12:56 a.m. UTC | #4
On 11/01/2018 21:42, Florian Weimer wrote:
> On 01/11/2018 11:40 PM, Adhemerval Zanella wrote:
>> I am not really understanding why exactly this is failing because the only
>> object that currently uses __access_noerrno, dl-tunables.os, is built with
>> -DMODULE_NAME=rtld and thus ABORT_TRANSACTION should be an empty statement.
> 
> elf/dl-tunables.o uses it as well, and is built with -DMODULE_NAME=libc.
Right, and if I recall correctly thread register is an undefined position at
the time. The only regard I have is adding some specific syscall tinkering
due a very specific arch/os requirement. Wouldn't be better to just
reimplement __access_noerrno/not-errno.h for powerpc?
  

Patch

diff --git a/sysdeps/unix/sysdep.h b/sysdeps/unix/sysdep.h
index aac9303..689272e 100644
--- a/sysdeps/unix/sysdep.h
+++ b/sysdeps/unix/sysdep.h
@@ -57,6 +57,31 @@ 
 #define INTERNAL_SYSCALL_CALL(...) \
   __INTERNAL_SYSCALL_DISP (__INTERNAL_SYSCALL, __VA_ARGS__)
 
+#define __EARLY_INTERNAL_SYSCALL0(name, err) \
+  EARLY_INTERNAL_SYSCALL (name, err, 0)
+#define __EARLY_INTERNAL_SYSCALL1(name, err, a1) \
+  EARLY_INTERNAL_SYSCALL (name, err, 1, a1)
+#define __EARLY_INTERNAL_SYSCALL2(name, err, a1, a2) \
+  EARLY_INTERNAL_SYSCALL (name, err, 2, a1, a2)
+#define __EARLY_INTERNAL_SYSCALL3(name, err, a1, a2, a3) \
+  EARLY_INTERNAL_SYSCALL (name, err, 3, a1, a2, a3)
+#define __EARLY_INTERNAL_SYSCALL4(name, err, a1, a2, a3, a4) \
+  EARLY_INTERNAL_SYSCALL (name, err, 4, a1, a2, a3, a4)
+#define __EARLY_INTERNAL_SYSCALL5(name, err, a1, a2, a3, a4, a5) \
+  EARLY_INTERNAL_SYSCALL (name, err, 5, a1, a2, a3, a4, a5)
+#define __EARLY_INTERNAL_SYSCALL6(name, err, a1, a2, a3, a4, a5, a6) \
+  EARLY_INTERNAL_SYSCALL (name, err, 6, a1, a2, a3, a4, a5, a6)
+#define __EARLY_INTERNAL_SYSCALL7(name, err, a1, a2, a3, a4, a5, a6, a7) \
+  EARLY_INTERNAL_SYSCALL (name, err, 7, a1, a2, a3, a4, a5, a6, a7)
+
+/* It is similar to INTERNAL_SYSCALL_CALL, but it is reserved to system calls
+   during process initialization, when internal structures may not be
+   available, e.g. TCB on powerpc.  */
+#define EARLY_INTERNAL_SYSCALL_CALL(...)				\
+  __INTERNAL_SYSCALL_DISP (__EARLY_INTERNAL_SYSCALL, __VA_ARGS__)
+#define EARLY_INTERNAL_SYSCALL(name, err, nr, args...)	\
+  INTERNAL_SYSCALL (name, err, nr, args)
+
 #define __INLINE_SYSCALL0(name) \
   INLINE_SYSCALL (name, 0)
 #define __INLINE_SYSCALL1(name, a1) \
diff --git a/sysdeps/unix/sysv/linux/not-errno.h b/sysdeps/unix/sysv/linux/not-errno.h
index 106ba5c..a033b7b 100644
--- a/sysdeps/unix/sysv/linux/not-errno.h
+++ b/sysdeps/unix/sysv/linux/not-errno.h
@@ -25,9 +25,9 @@  __access_noerrno (const char *pathname, int mode)
   int res;
   INTERNAL_SYSCALL_DECL (err);
 #ifdef __NR_access
-  res = INTERNAL_SYSCALL_CALL (access, err, pathname, mode);
+  res = EARLY_INTERNAL_SYSCALL_CALL (access, err, pathname, mode);
 #else
-  res = INTERNAL_SYSCALL_CALL (faccessat, err, AT_FDCWD, pathname, mode);
+  res = EARLY_INTERNAL_SYSCALL_CALL (faccessat, err, AT_FDCWD, pathname, mode);
 #endif
   if (INTERNAL_SYSCALL_ERROR_P (res, err))
     return INTERNAL_SYSCALL_ERRNO (res, err);
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h b/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h
index f7277d5..efc6cbc 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h
@@ -89,7 +89,10 @@ 
    On powerpc a system call basically clobbers the same registers like a
    function call, with the exception of LR (which is needed for the
    "sc; bnslr+" sequence) and CR (where only CR0.SO is clobbered to signal
-   an error return status).  */
+   an error return status).
+
+   Notice it requires the TCB to be allocated and completely set in order to
+   abort transactions before the syscall.  */
 
 # undef INTERNAL_SYSCALL_DECL
 # define INTERNAL_SYSCALL_DECL(err) long int err __attribute__ ((unused))
@@ -124,6 +127,37 @@ 
 # define INTERNAL_SYSCALL(name, err, nr, args...) \
   INTERNAL_SYSCALL_NCS (__NR_##name, err, nr, ##args)
 
+/* Similar to INTERNAL_SYSCALL, but reserved to early process initialization
+   without requiring the TCB to allocated and completely set.  */
+# undef EARLY_INTERNAL_SYSCALL
+# define EARLY_INTERNAL_SYSCALL_NCS(name, err, nr, args...) \
+  ({									\
+    register long int r0  __asm__ ("r0");				\
+    register long int r3  __asm__ ("r3");				\
+    register long int r4  __asm__ ("r4");				\
+    register long int r5  __asm__ ("r5");				\
+    register long int r6  __asm__ ("r6");				\
+    register long int r7  __asm__ ("r7");				\
+    register long int r8  __asm__ ("r8");				\
+    register long int r9  __asm__ ("r9");				\
+    register long int r10 __asm__ ("r10");				\
+    register long int r11 __asm__ ("r11");				\
+    register long int r12 __asm__ ("r12");				\
+    LOADARGS_##nr(name, args);						\
+    __asm__ __volatile__						\
+      ("sc   \n\t"							\
+       "mfcr %0"							\
+       : "=&r" (r0),							\
+	 "=&r" (r3), "=&r" (r4), "=&r" (r5),  "=&r" (r6),  "=&r" (r7),	\
+	 "=&r" (r8), "=&r" (r9), "=&r" (r10), "=&r" (r11), "=&r" (r12)	\
+       : ASM_INPUT_##nr							\
+       : "cr0", "ctr", "memory");					\
+    err = r0;								\
+    (int) r3;								\
+  })
+# define EARLY_INTERNAL_SYSCALL(name, err, nr, args...) \
+  EARLY_INTERNAL_SYSCALL_NCS (__NR_##name, err, nr, ##args)
+
 # undef INTERNAL_SYSCALL_ERROR_P
 # define INTERNAL_SYSCALL_ERROR_P(val, err) \
   ((void) (val), __builtin_expect ((err) & (1 << 28), 0))
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h b/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h
index 0fc179a..d431fdb 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h
@@ -118,7 +118,10 @@ 
    call. This use is for internal calls that do not need to handle errors
    normally. It will never touch errno. This returns just what the kernel
    gave back in the non-error (CR0.SO cleared) case, otherwise (CR0.SO set)
-   the negation of the return value in the kernel gets reverted.  */
+   the negation of the return value in the kernel gets reverted.
+
+   Notice it requires the TCB to be allocated and completely set in order to
+   abort transactions before the syscall.  */
 
 #undef INTERNAL_SYSCALL
 #define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \
@@ -148,6 +151,35 @@ 
 #define INTERNAL_SYSCALL(name, err, nr, args...)			\
   INTERNAL_SYSCALL_NCS (__NR_##name, err, nr, args)
 
+/* Similar to INTERNAL_SYSCALL, but reserved to early process initialization
+   without requiring the TCB to allocated and completely set.  */
+#undef EARLY_INTERNAL_SYSCALL
+#define EARLY_INTERNAL_SYSCALL_NCS(name, err, nr, args...) \
+  ({							\
+    register long int r0  __asm__ ("r0");		\
+    register long int r3  __asm__ ("r3");		\
+    register long int r4  __asm__ ("r4");		\
+    register long int r5  __asm__ ("r5");		\
+    register long int r6  __asm__ ("r6");		\
+    register long int r7  __asm__ ("r7");		\
+    register long int r8  __asm__ ("r8");		\
+    LOADARGS_##nr (name, ##args);			\
+    __asm__ __volatile__				\
+      ("sc\n\t"						\
+       "mfcr  %0\n\t"					\
+       "0:"						\
+       : "=&r" (r0),					\
+         "=&r" (r3), "=&r" (r4), "=&r" (r5),		\
+         "=&r" (r6), "=&r" (r7), "=&r" (r8)		\
+       : ASM_INPUT_##nr					\
+       : "r9", "r10", "r11", "r12",			\
+         "cr0", "ctr", "memory");			\
+    err = r0;						\
+    r3;							\
+  })
+#define EARLY_INTERNAL_SYSCALL(name, err, nr, args...)	\
+  EARLY_INTERNAL_SYSCALL_NCS (__NR_##name, err, nr, args)
+
 #undef INTERNAL_SYSCALL_DECL
 #define INTERNAL_SYSCALL_DECL(err) long int err __attribute__ ((unused))