[03/03] PowerPC: abort transaction in syscalls
Commit Message
Linux kernel powerpc documentation [1] states issuing a syscall inside a
transaction is not recommended and may lead to undefined behavior. It
also states syscalls do not abort transactions neither they run in
transactional state. This may lead to side-effects begin visible outside
transactions, specially when related to 'futex' syscalls.
I see constantly deadlock issues in testcases that might trigger a futex
sycalls inside a transaction. Although they are related to timing issue, both
pthread_cond and pthread_barrier are very susceptible to such issue. To avoid
side-effects being visible outside transactions, GLIBC powerpc with lock elision
enable issue a transaction abort instruction just before any syscall.
I have added a new TCB fields named tm_capable that is initially set to 1
if system supports hardware transaction memory (it is set from hwcap2 field).
If GLIBC is built with lock elision support, every syscall made in libc.so
and other libraries (except the loader itself) will check the field value
and abort the transaction if the field is set to 1. I have chose to use a
TCB field instead to rely on a accessing a global variable (hwcap2 for instance)
for some reasons:
1. The TCB field logic would be simple to implement, it avoid the requirement of
creating a TOC field in every syscall to access a global variable and also
simplifies the implementation of syscall wrappers in assembly.
2. The TCB field simplifies syscalls wrapper done by other libraries (there is no
need to export private symbols).
3. It also provides a much more simpler way in the future to specific that a thread
should not abort on syscalls (by setting its TCB tm_capable field to 0).
4. There is some internal discussions about enabling transaction abort in syscalls
for powerpc within kernel. This adds an early abort, potentially improving
performance by avoid the syscall call (since the transaction would be aborted
anyway).
The performance implications on syscalls for such modification are negligible (
around 3-5 cycles in POWER8).
[1] https://www.kernel.org/doc/Documentation/powerpc/transactional_memory.txt
--
* sysdeps/powerpc/nptl/tls.h (tcbhead_t): Add tm_capable field.
(TLS_INIT_TP): Add tm_capable initialization.
(TLS_DEFINE_INIT_TP): Likewise.
(THREAD_GET_TM_CAPABLE): New file: get tm_capable field value from
TCB.
(THREAD_SET_TM_CAPABLE): New file: set tm_capable field value in TCB.
* sysdeps/powerpc/nptl/tcb-offsets.sym (TM_CAPABLE): Add field offset
calculation.
* sysdeps/powerpc/powerpc32/sysdep.h (DO_CALL): Abort hardware
transactoion is lock elision is built and TCB tm_capable is set.
* sysdeps/powerpc/powerpc64/sysdep.h (DO_CALL): Likewise.
* sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h
(INTERNAL_SYSCALL_NCS): Likewise.
* sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h
(INTERNAL_SYSCALL_NCS): Likewise.
* sysdeps/powerpc/sysdep.h (ABORT_TRANSACTION): New define.
---
@@ -19,6 +19,7 @@ POINTER_GUARD (offsetof (tcbhead_t, pointer_guard) - TLS_TCB_OFFSET - sizeof (
TAR_SAVE (offsetof (tcbhead_t, tar_save) - TLS_TCB_OFFSET - sizeof (tcbhead_t))
DSO_SLOT1 (offsetof (tcbhead_t, dso_slot1) - TLS_TCB_OFFSET - sizeof (tcbhead_t))
DSO_SLOT2 (offsetof (tcbhead_t, dso_slot2) - TLS_TCB_OFFSET - sizeof (tcbhead_t))
+TM_CAPABLE (offsetof (tcbhead_t, tm_capable) - TLS_TCB_OFFSET - sizeof (tcbhead_t))
#ifndef __ASSUME_PRIVATE_FUTEX
PRIVATE_FUTEX_OFFSET thread_offsetof (header.private_futex)
#endif
@@ -63,6 +63,8 @@ typedef union dtv
are private. */
typedef struct
{
+ /* Indicate if hwcap2 has PPC_FEATURE2_HAS_HTM capability. */
+ int tm_capable;
/* Reservation for Dynamic System Optimizer ABI. */
uintptr_t dso_slot2;
uintptr_t dso_slot1;
@@ -130,11 +132,17 @@ register void *__thread_register __asm__ ("r13");
special attention since 'errno' is not yet available and if the
operation can cause a failure 'errno' must not be touched. */
# define TLS_INIT_TP(tcbp) \
- (__thread_register = (void *) (tcbp) + TLS_TCB_OFFSET, NULL)
+ ({ \
+ __thread_register = (void *) (tcbp) + TLS_TCB_OFFSET; \
+ THREAD_SET_TM_CAPABLE (GLRO (dl_hwcap2) & PPC_FEATURE2_HAS_HTM ? 1 : 0); \
+ NULL; \
+ })
/* Value passed to 'clone' for initialization of the thread register. */
# define TLS_DEFINE_INIT_TP(tp, pd) \
- void *tp = (void *) (pd) + TLS_TCB_OFFSET + TLS_PRE_TCB_SIZE
+ void *tp = (void *) (pd) + TLS_TCB_OFFSET + TLS_PRE_TCB_SIZE; \
+ (((tcbhead_t *) ((char *) tp - TLS_TCB_OFFSET))[-1].tm_capable) = \
+ THREAD_GET_TM_CAPABLE ();
/* Return the address of the dtv for the current thread. */
# define THREAD_DTV() \
@@ -188,6 +196,13 @@ register void *__thread_register __asm__ ("r13");
+ TLS_PRE_TCB_SIZE))[-1].pointer_guard \
= THREAD_GET_POINTER_GUARD())
+/* tm_capable field in TCB head. */
+# define THREAD_GET_TM_CAPABLE() \
+ (((tcbhead_t *) ((char *) __thread_register \
+ - TLS_TCB_OFFSET))[-1].tm_capable)
+# define THREAD_SET_TM_CAPABLE(value) \
+ (THREAD_GET_TM_CAPABLE () = (value))
+
/* l_tls_offset == 0 is perfectly valid on PPC, so we have to use some
different value to mean unset l_tls_offset. */
# define NO_TLS_OFFSET -1
@@ -88,7 +88,23 @@ GOT_LABEL: ; \
cfi_endproc; \
ASM_SIZE_DIRECTIVE(name)
+#if !defined IS_IN_rtld && defined (ENABLE_LOCK_ELISION)
+# define ABORT_TRANSACTION \
+ cmpwi 2,0; \
+ beq 1f; \
+ lwz 0,TM_CAPABLE(2); \
+ cmpwi 0,0; \
+ beq 1f; \
+ li 0,_ABORT_SYSCALL; \
+ tabort. 0; \
+ .align 4; \
+1:
+#else
+# define ABORT_TRANSACTION
+#endif
+
#define DO_CALL(syscall) \
+ ABORT_TRANSACTION \
li 0,syscall; \
sc
@@ -283,7 +283,23 @@ LT_LABELSUFFIX(name,_name_end): ; \
TRACEBACK_MASK(name,mask) \
END_2(name)
+#if !defined IS_IN_rtld && defined (ENABLE_LOCK_ELISION)
+# define ABORT_TRANSACTION \
+ cmpdi 13,0; \
+ beq 1f; \
+ lwz 0,TM_CAPABLE(13); \
+ cmpwi 0,0; \
+ beq 1f; \
+ li 0,_ABORT_SYSCALL; \
+ tabort. 0; \
+ .align 4; \
+1:
+#else
+# define ABORT_TRANSACTION
+#endif
+
#define DO_CALL(syscall) \
+ ABORT_TRANSACTION \
li 0,syscall; \
sc
@@ -21,6 +21,10 @@
*/
#define _SYSDEPS_SYSDEP_H 1
#include <bits/hwcap.h>
+#ifdef ENABLE_LOCK_ELISION
+#include <tls.h>
+#include <htm.h>
+#endif
#define PPC_FEATURE_970 (PPC_FEATURE_POWER4 + PPC_FEATURE_HAS_ALTIVEC)
@@ -164,4 +168,21 @@
#define ALIGNARG(log2) log2
#define ASM_SIZE_DIRECTIVE(name) .size name,.-name
+#else
+
+/* Linux kernel powerpc documentation states issuing a syscall inside a
+ transaction is not recommended and may lead to undefined behavior. It
+ also states syscalls does not abort transactoin neither run in
+ transactional state. To avoid such traps, we abort transaction just
+ before syscalls. */
+#if !defined IS_IN_rtld && defined (ENABLE_LOCK_ELISION)
+# define ABORT_TRANSACTION \
+ ({ \
+ if (THREAD_GET_TM_CAPABLE ()) \
+ __builtin_tabort (_ABORT_SYSCALL); \
+ })
+#else
+# define ABORT_TRANSACTION
+#endif
+
#endif /* __ASSEMBLER__ */
@@ -194,6 +194,7 @@
register long int r11 __asm__ ("r11"); \
register long int r12 __asm__ ("r12"); \
LOADARGS_##nr(name, args); \
+ ABORT_TRANSACTION; \
__asm__ __volatile__ \
("sc \n\t" \
"mfcr %0" \
@@ -201,6 +201,7 @@
register long int r7 __asm__ ("r7"); \
register long int r8 __asm__ ("r8"); \
LOADARGS_##nr (name, ##args); \
+ ABORT_TRANSACTION; \
__asm__ __volatile__ \
("sc\n\t" \
"mfcr %0\n\t" \