@@ -18,6 +18,7 @@
#include <stdio.h>
#include <atomic.h>
+#include <support/xthread.h>
#ifndef atomic_t
# define atomic_t int
@@ -290,9 +290,6 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
stack cache nor will the memory (except the TLS memory) be freed. */
pd->user_stack = true;
- /* This is at least the second thread. */
- pd->header.multiple_threads = 1;
-
#ifdef NEED_DL_SYSINFO
SETUP_THREAD_SYSINFO (pd);
#endif
@@ -408,9 +405,6 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
descriptor. */
pd->specific[0] = pd->specific_1stblock;
- /* This is at least the second thread. */
- pd->header.multiple_threads = 1;
-
#ifdef NEED_DL_SYSINFO
SETUP_THREAD_SYSINFO (pd);
#endif
@@ -137,22 +137,7 @@ struct pthread
#else
struct
{
- /* multiple_threads is enabled either when the process has spawned at
- least one thread or when a single-threaded process cancels itself.
- This enables additional code to introduce locking before doing some
- compare_and_exchange operations and also enable cancellation points.
- The concepts of multiple threads and cancellation points ideally
- should be separate, since it is not necessary for multiple threads to
- have been created for cancellation points to be enabled, as is the
- case is when single-threaded process cancels itself.
-
- Since enabling multiple_threads enables additional code in
- cancellation points and compare_and_exchange operations, there is a
- potential for an unneeded performance hit when it is enabled in a
- single-threaded, self-canceling process. This is OK though, since a
- single-threaded process will enable async cancellation only when it
- looks to cancel itself and is hence going to end anyway. */
- int multiple_threads;
+ int unused_multiple_threads;
int gscope_flag;
} header;
#endif
@@ -157,12 +157,9 @@ __pthread_cancel (pthread_t th)
/* A single-threaded process should be able to kill itself, since
there is nothing in the POSIX specification that says that it
- cannot. So we set multiple_threads to true so that cancellation
- points get executed. */
- THREAD_SETMEM (THREAD_SELF, header.multiple_threads, 1);
-#ifndef TLS_MULTIPLE_THREADS_IN_TCB
+ cannot. So we set __libc_single_threaded to true so that
+ cancellation points get executed. */
__libc_single_threaded_internal = 0;
-#endif
}
while (!atomic_compare_exchange_weak_acquire (&pd->cancelhandling, &oldval,
newval));
@@ -881,11 +881,6 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
other reason that create_thread chose. Now let it run
free. */
lll_unlock (pd->lock, LLL_PRIVATE);
-
- /* We now have for sure more than one thread. The main thread might
- not yet have the flag set. No need to set the global variable
- again if this is what we use. */
- THREAD_SETMEM (THREAD_SELF, header.multiple_threads, 1);
}
out:
@@ -2,7 +2,6 @@
#include <tls.h>
#include <kernel-features.h>
-MULTIPLE_THREADS_OFFSET offsetof (tcbhead_t, multiple_threads)
SYSINFO_OFFSET offsetof (tcbhead_t, sysinfo)
POINTER_GUARD offsetof (tcbhead_t, pointer_guard)
SIGSTATE_OFFSET offsetof (tcbhead_t, _hurd_sigstate)
@@ -6,7 +6,6 @@ RESULT offsetof (struct pthread, result)
TID offsetof (struct pthread, tid)
CANCELHANDLING offsetof (struct pthread, cancelhandling)
CLEANUP_JMP_BUF offsetof (struct pthread, cleanup_jmp_buf)
-MULTIPLE_THREADS_OFFSET offsetof (tcbhead_t, multiple_threads)
SYSINFO_OFFSET offsetof (tcbhead_t, sysinfo)
CLEANUP offsetof (struct pthread, cleanup)
CLEANUP_PREV offsetof (struct _pthread_cleanup_buffer, __prev)
@@ -36,7 +36,7 @@ typedef struct
thread descriptor used by libpthread. */
dtv_t *dtv;
void *self; /* Pointer to the thread descriptor. */
- int multiple_threads;
+ int unused_multiple_threads;
uintptr_t sysinfo;
uintptr_t stack_guard;
uintptr_t pointer_guard;
@@ -57,8 +57,6 @@ typedef struct
_Static_assert (offsetof (tcbhead_t, __private_ss) == 0x30,
"offset of __private_ss != 0x30");
-# define TLS_MULTIPLE_THREADS_IN_TCB 1
-
#else /* __ASSEMBLER__ */
# include <tcb-offsets.h>
#endif
@@ -2,5 +2,4 @@
#include <tls.h>
TID offsetof (struct pthread, tid) - TLS_PRE_TCB_SIZE
-MULTIPLE_THREADS_OFFSET offsetof (struct pthread, header.multiple_threads) - TLS_PRE_TCB_SIZE
SYSINFO_OFFSET offsetof (tcbhead_t, __private)
@@ -36,8 +36,6 @@ typedef struct
register struct pthread *__thread_self __asm__("r13");
-# define TLS_MULTIPLE_THREADS_IN_TCB 1
-
#else /* __ASSEMBLER__ */
# include <tcb-offsets.h>
#endif
@@ -33,7 +33,7 @@ typedef struct
void *tcb; /* Points to this structure. */
dtv_t *dtv; /* Vector of pointers to TLS data. */
thread_t self; /* This thread's control port. */
- int multiple_threads;
+ int unused_multiple_threads;
uintptr_t sysinfo;
uintptr_t stack_guard;
uintptr_t pointer_guard;
@@ -117,8 +117,6 @@ _hurd_tls_init (tcbhead_t *tcb)
/* This field is used by TLS accesses to get our "thread pointer"
from the TLS point of view. */
tcb->tcb = tcb;
- /* We always at least start the sigthread anyway. */
- tcb->multiple_threads = 1;
/* Get the first available selector. */
int sel = -1;
@@ -8,6 +8,5 @@
# define __thread_self ((void *) 0)
# define thread_offsetof(mem) ((ptrdiff_t) THREAD_SELF + offsetof (struct pthread, mem))
-MULTIPLE_THREADS_OFFSET thread_offsetof (header.multiple_threads)
TID_OFFSET thread_offsetof (tid)
POINTER_GUARD (offsetof (tcbhead_t, pointer_guard) - TLS_TCB_OFFSET - sizeof (tcbhead_t))
@@ -35,8 +35,6 @@ typedef struct
register tcbhead_t *__thread_self __asm__("r10");
-# define TLS_MULTIPLE_THREADS_IN_TCB 1
-
/* Get system call information. */
# include <sysdep.h>
@@ -10,9 +10,6 @@
# define thread_offsetof(mem) ((ptrdiff_t) THREAD_SELF + offsetof (struct pthread, mem))
-#if TLS_MULTIPLE_THREADS_IN_TCB
-MULTIPLE_THREADS_OFFSET thread_offsetof (header.multiple_threads)
-#endif
TID thread_offsetof (tid)
POINTER_GUARD (offsetof (tcbhead_t, pointer_guard) - TLS_TCB_OFFSET - sizeof (tcbhead_t))
TAR_SAVE (offsetof (tcbhead_t, tar_save) - TLS_TCB_OFFSET - sizeof (tcbhead_t))
@@ -52,9 +52,6 @@
# define TLS_DTV_AT_TP 1
# define TLS_TCB_AT_TP 0
-/* We use the multiple_threads field in the pthread struct */
-#define TLS_MULTIPLE_THREADS_IN_TCB 1
-
/* Get the thread descriptor definition. */
# include <nptl/descr.h>
@@ -1,6 +1,5 @@
#include <sysdep.h>
#include <tls.h>
-MULTIPLE_THREADS_OFFSET offsetof (tcbhead_t, multiple_threads)
STACK_GUARD offsetof (tcbhead_t, stack_guard)
TID offsetof (struct pthread, tid)
@@ -35,7 +35,7 @@ typedef struct
thread descriptor used by libpthread. */
dtv_t *dtv;
void *self; /* Pointer to the thread descriptor. */
- int multiple_threads;
+ int unused_multiple_threads;
uintptr_t sysinfo;
uintptr_t stack_guard;
int gscope_flag;
@@ -44,10 +44,6 @@ typedef struct
void *__private_ss;
} tcbhead_t;
-# ifndef __s390x__
-# define TLS_MULTIPLE_THREADS_IN_TCB 1
-# endif
-
#else /* __ASSEMBLER__ */
# include <tcb-offsets.h>
#endif
@@ -6,7 +6,6 @@ RESULT offsetof (struct pthread, result)
TID offsetof (struct pthread, tid)
CANCELHANDLING offsetof (struct pthread, cancelhandling)
CLEANUP_JMP_BUF offsetof (struct pthread, cleanup_jmp_buf)
-MULTIPLE_THREADS_OFFSET offsetof (struct pthread, header.multiple_threads)
TLS_PRE_TCB_SIZE sizeof (struct pthread)
MUTEX_FUTEX offsetof (pthread_mutex_t, __data.__lock)
POINTER_GUARD offsetof (tcbhead_t, pointer_guard)
@@ -36,8 +36,6 @@ typedef struct
uintptr_t pointer_guard;
} tcbhead_t;
-# define TLS_MULTIPLE_THREADS_IN_TCB 1
-
#else /* __ASSEMBLER__ */
# include <tcb-offsets.h>
#endif /* __ASSEMBLER__ */
@@ -1,6 +1,5 @@
#include <sysdep.h>
#include <tls.h>
-MULTIPLE_THREADS_OFFSET offsetof (tcbhead_t, multiple_threads)
POINTER_GUARD offsetof (tcbhead_t, pointer_guard)
TID offsetof (struct pthread, tid)
@@ -35,7 +35,7 @@ typedef struct
thread descriptor used by libpthread. */
dtv_t *dtv;
void *self;
- int multiple_threads;
+ int unused_multiple_threads;
#if __WORDSIZE == 64
int gscope_flag;
#endif
@@ -23,20 +23,7 @@
# include <sys/single_threaded.h>
#endif
-/* The default way to check if the process is single thread is by using the
- pthread_t 'multiple_threads' field. However, for some architectures it is
- faster to either use an extra field on TCB or global variables (the TCB
- field is also used on x86 for some single-thread atomic optimizations).
-
- The ABI might define SINGLE_THREAD_BY_GLOBAL to enable the single thread
- check to use global variables instead of the pthread_t field. */
-
-#if !defined SINGLE_THREAD_BY_GLOBAL || IS_IN (rtld)
-# define SINGLE_THREAD_P \
- (THREAD_GETMEM (THREAD_SELF, header.multiple_threads) == 0)
-#else
-# define SINGLE_THREAD_P (__libc_single_threaded_internal != 0)
-#endif
+#define SINGLE_THREAD_P (__libc_single_threaded_internal != 0)
#define RTLD_SINGLE_THREAD_P SINGLE_THREAD_P
@@ -51,292 +51,145 @@
#define atomic_compare_and_exchange_bool_acq(mem, newval, oldval) \
(! __sync_bool_compare_and_swap (mem, oldval, newval))
-
-#define __arch_c_compare_and_exchange_val_8_acq(mem, newval, oldval) \
- ({ __typeof (*mem) ret; \
- __asm __volatile ("cmpl $0, %%" SEG_REG ":%P5\n\t" \
- "je 0f\n\t" \
- "lock\n" \
- "0:\tcmpxchgb %b2, %1" \
- : "=a" (ret), "=m" (*mem) \
- : BR_CONSTRAINT (newval), "m" (*mem), "0" (oldval), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- ret; })
-
-#define __arch_c_compare_and_exchange_val_16_acq(mem, newval, oldval) \
- ({ __typeof (*mem) ret; \
- __asm __volatile ("cmpl $0, %%" SEG_REG ":%P5\n\t" \
- "je 0f\n\t" \
- "lock\n" \
- "0:\tcmpxchgw %w2, %1" \
- : "=a" (ret), "=m" (*mem) \
- : BR_CONSTRAINT (newval), "m" (*mem), "0" (oldval), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- ret; })
-
-#define __arch_c_compare_and_exchange_val_32_acq(mem, newval, oldval) \
- ({ __typeof (*mem) ret; \
- __asm __volatile ("cmpl $0, %%" SEG_REG ":%P5\n\t" \
- "je 0f\n\t" \
- "lock\n" \
- "0:\tcmpxchgl %2, %1" \
- : "=a" (ret), "=m" (*mem) \
- : BR_CONSTRAINT (newval), "m" (*mem), "0" (oldval), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- ret; })
-
-#ifdef __x86_64__
-# define __arch_c_compare_and_exchange_val_64_acq(mem, newval, oldval) \
- ({ __typeof (*mem) ret; \
- __asm __volatile ("cmpl $0, %%fs:%P5\n\t" \
- "je 0f\n\t" \
- "lock\n" \
- "0:\tcmpxchgq %q2, %1" \
- : "=a" (ret), "=m" (*mem) \
- : "q" ((int64_t) cast_to_integer (newval)), \
- "m" (*mem), \
- "0" ((int64_t) cast_to_integer (oldval)), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- ret; })
-# define do_exchange_and_add_val_64_acq(pfx, mem, value) 0
-# define do_add_val_64_acq(pfx, mem, value) do { } while (0)
-#else
-/* XXX We do not really need 64-bit compare-and-exchange. At least
- not in the moment. Using it would mean causing portability
- problems since not many other 32-bit architectures have support for
- such an operation. So don't define any code for now. If it is
- really going to be used the code below can be used on Intel Pentium
- and later, but NOT on i486. */
-# define __arch_c_compare_and_exchange_val_64_acq(mem, newval, oldval) \
- ({ __typeof (*mem) ret = *(mem); \
- __atomic_link_error (); \
- ret = (newval); \
- ret = (oldval); \
- ret; })
-
-# define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \
- ({ __typeof (*mem) ret = *(mem); \
- __atomic_link_error (); \
- ret = (newval); \
- ret = (oldval); \
- ret; })
-
-# define do_exchange_and_add_val_64_acq(pfx, mem, value) \
- ({ __typeof (value) __addval = (value); \
- __typeof (*mem) __result; \
- __typeof (mem) __memp = (mem); \
- __typeof (*mem) __tmpval; \
- __result = *__memp; \
- do \
- __tmpval = __result; \
- while ((__result = pfx##_compare_and_exchange_val_64_acq \
- (__memp, __result + __addval, __result)) == __tmpval); \
- __result; })
-
-# define do_add_val_64_acq(pfx, mem, value) \
- { \
- __typeof (value) __addval = (value); \
- __typeof (mem) __memp = (mem); \
- __typeof (*mem) __oldval = *__memp; \
- __typeof (*mem) __tmpval; \
- do \
- __tmpval = __oldval; \
- while ((__oldval = pfx##_compare_and_exchange_val_64_acq \
- (__memp, __oldval + __addval, __oldval)) == __tmpval); \
- }
-#endif
-
-
-/* Note that we need no lock prefix. */
-#define atomic_exchange_acq(mem, newvalue) \
- ({ __typeof (*mem) result; \
+#define __cmpxchg_op(lock, mem, newval, oldval) \
+ ({ __typeof (*mem) __ret; \
if (sizeof (*mem) == 1) \
- __asm __volatile ("xchgb %b0, %1" \
- : "=q" (result), "=m" (*mem) \
- : "0" (newvalue), "m" (*mem)); \
+ asm volatile (lock "cmpxchgb %2, %1" \
+ : "=a" (__ret), "+m" (*mem) \
+ : BR_CONSTRAINT (newval), "0" (oldval) \
+ : "memory"); \
else if (sizeof (*mem) == 2) \
- __asm __volatile ("xchgw %w0, %1" \
- : "=r" (result), "=m" (*mem) \
- : "0" (newvalue), "m" (*mem)); \
+ asm volatile (lock "cmpxchgw %2, %1" \
+ : "=a" (__ret), "+m" (*mem) \
+ : BR_CONSTRAINT (newval), "0" (oldval) \
+ : "memory"); \
else if (sizeof (*mem) == 4) \
- __asm __volatile ("xchgl %0, %1" \
- : "=r" (result), "=m" (*mem) \
- : "0" (newvalue), "m" (*mem)); \
+ asm volatile (lock "cmpxchgl %2, %1" \
+ : "=a" (__ret), "+m" (*mem) \
+ : BR_CONSTRAINT (newval), "0" (oldval) \
+ : "memory"); \
else if (__HAVE_64B_ATOMICS) \
- __asm __volatile ("xchgq %q0, %1" \
- : "=r" (result), "=m" (*mem) \
- : "0" ((int64_t) cast_to_integer (newvalue)), \
- "m" (*mem)); \
+ asm volatile (lock "cmpxchgq %2, %1" \
+ : "=a" (__ret), "+m" (*mem) \
+ : "q" ((int64_t) cast_to_integer (newval)), \
+ "0" ((int64_t) cast_to_integer (oldval)) \
+ : "memory"); \
else \
- { \
- result = 0; \
- __atomic_link_error (); \
- } \
- result; })
-
+ __atomic_link_error (); \
+ __ret; })
-#define __arch_exchange_and_add_body(lock, pfx, mem, value) \
- ({ __typeof (*mem) __result; \
- __typeof (value) __addval = (value); \
- if (sizeof (*mem) == 1) \
- __asm __volatile (lock "xaddb %b0, %1" \
- : "=q" (__result), "=m" (*mem) \
- : "0" (__addval), "m" (*mem), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- else if (sizeof (*mem) == 2) \
- __asm __volatile (lock "xaddw %w0, %1" \
- : "=r" (__result), "=m" (*mem) \
- : "0" (__addval), "m" (*mem), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- else if (sizeof (*mem) == 4) \
- __asm __volatile (lock "xaddl %0, %1" \
- : "=r" (__result), "=m" (*mem) \
- : "0" (__addval), "m" (*mem), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- else if (__HAVE_64B_ATOMICS) \
- __asm __volatile (lock "xaddq %q0, %1" \
- : "=r" (__result), "=m" (*mem) \
- : "0" ((int64_t) cast_to_integer (__addval)), \
- "m" (*mem), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
+#define __arch_c_compare_and_exchange_val_8_acq(mem, newval, oldval) \
+ ({ __typeof (*mem) __ret; \
+ if (SINGLE_THREAD_P) \
+ __ret = __cmpxchg_op ("", (mem), (newval), (oldval)); \
else \
- __result = do_exchange_and_add_val_64_acq (pfx, (mem), __addval); \
- __result; })
-
-#define atomic_exchange_and_add(mem, value) \
- __sync_fetch_and_add (mem, value)
-
-#define __arch_exchange_and_add_cprefix \
- "cmpl $0, %%" SEG_REG ":%P4\n\tje 0f\n\tlock\n0:\t"
-
-#define catomic_exchange_and_add(mem, value) \
- __arch_exchange_and_add_body (__arch_exchange_and_add_cprefix, __arch_c, \
- mem, value)
+ __ret = __cmpxchg_op (LOCK_PREFIX, (mem), (newval), (oldval)); \
+ __ret; })
+#define __arch_c_compare_and_exchange_val_16_acq(mem, newval, oldval) \
+ ({ __typeof (*mem) __ret; \
+ if (SINGLE_THREAD_P) \
+ __ret = __cmpxchg_op ("", (mem), (newval), (oldval)); \
+ else \
+ __ret = __cmpxchg_op (LOCK_PREFIX, (mem), (newval), (oldval)); \
+ __ret; })
-#define __arch_add_body(lock, pfx, apfx, mem, value) \
- do { \
- if (__builtin_constant_p (value) && (value) == 1) \
- pfx##_increment (mem); \
- else if (__builtin_constant_p (value) && (value) == -1) \
- pfx##_decrement (mem); \
- else if (sizeof (*mem) == 1) \
- __asm __volatile (lock "addb %b1, %0" \
- : "=m" (*mem) \
- : IBR_CONSTRAINT (value), "m" (*mem), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- else if (sizeof (*mem) == 2) \
- __asm __volatile (lock "addw %w1, %0" \
- : "=m" (*mem) \
- : "ir" (value), "m" (*mem), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- else if (sizeof (*mem) == 4) \
- __asm __volatile (lock "addl %1, %0" \
- : "=m" (*mem) \
- : "ir" (value), "m" (*mem), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- else if (__HAVE_64B_ATOMICS) \
- __asm __volatile (lock "addq %q1, %0" \
- : "=m" (*mem) \
- : "ir" ((int64_t) cast_to_integer (value)), \
- "m" (*mem), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- else \
- do_add_val_64_acq (apfx, (mem), (value)); \
- } while (0)
-
-# define atomic_add(mem, value) \
- __arch_add_body (LOCK_PREFIX, atomic, __arch, mem, value)
-
-#define __arch_add_cprefix \
- "cmpl $0, %%" SEG_REG ":%P3\n\tje 0f\n\tlock\n0:\t"
+#define __arch_c_compare_and_exchange_val_32_acq(mem, newval, oldval) \
+ ({ __typeof (*mem) __ret; \
+ if (SINGLE_THREAD_P) \
+ __ret = __cmpxchg_op ("", (mem), (newval), (oldval)); \
+ else \
+ __ret = __cmpxchg_op (LOCK_PREFIX, (mem), (newval), (oldval)); \
+ __ret; })
-#define catomic_add(mem, value) \
- __arch_add_body (__arch_add_cprefix, atomic, __arch_c, mem, value)
+#define __arch_c_compare_and_exchange_val_64_acq(mem, newval, oldval) \
+ ({ __typeof (*mem) __ret; \
+ if (SINGLE_THREAD_P) \
+ __ret = __cmpxchg_op ("", (mem), (newval), (oldval)); \
+ else \
+ __ret =__cmpxchg_op (LOCK_PREFIX, (mem), (newval), (oldval)); \
+ __ret; })
-#define atomic_add_negative(mem, value) \
- ({ unsigned char __result; \
+#define __xchg_op(lock, mem, arg, op) \
+ ({ __typeof (*mem) __ret = (arg); \
if (sizeof (*mem) == 1) \
- __asm __volatile (LOCK_PREFIX "addb %b2, %0; sets %1" \
- : "=m" (*mem), "=qm" (__result) \
- : IBR_CONSTRAINT (value), "m" (*mem)); \
+ __asm __volatile (lock #op "b %b0, %1" \
+ : "=q" (__ret), "=m" (*mem) \
+ : "0" (arg), "m" (*mem) \
+ : "memory", "cc"); \
else if (sizeof (*mem) == 2) \
- __asm __volatile (LOCK_PREFIX "addw %w2, %0; sets %1" \
- : "=m" (*mem), "=qm" (__result) \
- : "ir" (value), "m" (*mem)); \
+ __asm __volatile (lock #op "w %w0, %1" \
+ : "=r" (__ret), "=m" (*mem) \
+ : "0" (arg), "m" (*mem) \
+ : "memory", "cc"); \
else if (sizeof (*mem) == 4) \
- __asm __volatile (LOCK_PREFIX "addl %2, %0; sets %1" \
- : "=m" (*mem), "=qm" (__result) \
- : "ir" (value), "m" (*mem)); \
+ __asm __volatile (lock #op "l %0, %1" \
+ : "=r" (__ret), "=m" (*mem) \
+ : "0" (arg), "m" (*mem) \
+ : "memory", "cc"); \
else if (__HAVE_64B_ATOMICS) \
- __asm __volatile (LOCK_PREFIX "addq %q2, %0; sets %1" \
- : "=m" (*mem), "=qm" (__result) \
- : "ir" ((int64_t) cast_to_integer (value)), \
- "m" (*mem)); \
+ __asm __volatile (lock #op "q %q0, %1" \
+ : "=r" (__ret), "=m" (*mem) \
+ : "0" ((int64_t) cast_to_integer (arg)), \
+ "m" (*mem) \
+ : "memory", "cc"); \
else \
__atomic_link_error (); \
- __result; })
+ __ret; })
-
-#define atomic_add_zero(mem, value) \
- ({ unsigned char __result; \
+#define __single_op(lock, mem, op) \
+ ({ \
if (sizeof (*mem) == 1) \
- __asm __volatile (LOCK_PREFIX "addb %b2, %0; setz %1" \
- : "=m" (*mem), "=qm" (__result) \
- : IBR_CONSTRAINT (value), "m" (*mem)); \
+ __asm __volatile (lock #op "b %b0" \
+ : "=m" (*mem) \
+ : "m" (*mem) \
+ : "memory", "cc"); \
else if (sizeof (*mem) == 2) \
- __asm __volatile (LOCK_PREFIX "addw %w2, %0; setz %1" \
- : "=m" (*mem), "=qm" (__result) \
- : "ir" (value), "m" (*mem)); \
+ __asm __volatile (lock #op "w %b0" \
+ : "=m" (*mem) \
+ : "m" (*mem) \
+ : "memory", "cc"); \
else if (sizeof (*mem) == 4) \
- __asm __volatile (LOCK_PREFIX "addl %2, %0; setz %1" \
- : "=m" (*mem), "=qm" (__result) \
- : "ir" (value), "m" (*mem)); \
+ __asm __volatile (lock #op "l %b0" \
+ : "=m" (*mem) \
+ : "m" (*mem) \
+ : "memory", "cc"); \
else if (__HAVE_64B_ATOMICS) \
- __asm __volatile (LOCK_PREFIX "addq %q2, %0; setz %1" \
- : "=m" (*mem), "=qm" (__result) \
- : "ir" ((int64_t) cast_to_integer (value)), \
- "m" (*mem)); \
+ __asm __volatile (lock #op "q %b0" \
+ : "=m" (*mem) \
+ : "m" (*mem) \
+ : "memory", "cc"); \
else \
- __atomic_link_error (); \
- __result; })
+ __atomic_link_error (); \
+ })
+/* Note that we need no lock prefix. */
+#define atomic_exchange_acq(mem, newvalue) \
+ __xchg_op ("", (mem), (newvalue), xchg)
-#define __arch_increment_body(lock, pfx, mem) \
- do { \
- if (sizeof (*mem) == 1) \
- __asm __volatile (lock "incb %b0" \
- : "=m" (*mem) \
- : "m" (*mem), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- else if (sizeof (*mem) == 2) \
- __asm __volatile (lock "incw %w0" \
- : "=m" (*mem) \
- : "m" (*mem), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- else if (sizeof (*mem) == 4) \
- __asm __volatile (lock "incl %0" \
- : "=m" (*mem) \
- : "m" (*mem), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- else if (__HAVE_64B_ATOMICS) \
- __asm __volatile (lock "incq %q0" \
- : "=m" (*mem) \
- : "m" (*mem), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- else \
- do_add_val_64_acq (pfx, mem, 1); \
- } while (0)
+#define atomic_add(mem, value) \
+ __xchg_op (LOCK_PREFIX, (mem), (value), add); \
-#define atomic_increment(mem) __arch_increment_body (LOCK_PREFIX, __arch, mem)
+#define catomic_add(mem, value) \
+ ({ \
+ if (SINGLE_THREAD_P) \
+ __xchg_op ("", (mem), (value), add); \
+ else \
+ atomic_add (mem, value); \
+ })
-#define __arch_increment_cprefix \
- "cmpl $0, %%" SEG_REG ":%P2\n\tje 0f\n\tlock\n0:\t"
-#define catomic_increment(mem) \
- __arch_increment_body (__arch_increment_cprefix, __arch_c, mem)
+#define atomic_increment(mem) \
+ __single_op (LOCK_PREFIX, (mem), inc)
+#define catomic_increment(mem) \
+ ({ \
+ if (SINGLE_THREAD_P) \
+ __single_op ("", (mem), inc); \
+ else \
+ atomic_increment (mem); \
+ })
#define atomic_increment_and_test(mem) \
({ unsigned char __result; \
@@ -357,43 +210,20 @@
: "=m" (*mem), "=qm" (__result) \
: "m" (*mem)); \
else \
- __atomic_link_error (); \
+ __atomic_link_error (); \
__result; })
-#define __arch_decrement_body(lock, pfx, mem) \
- do { \
- if (sizeof (*mem) == 1) \
- __asm __volatile (lock "decb %b0" \
- : "=m" (*mem) \
- : "m" (*mem), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- else if (sizeof (*mem) == 2) \
- __asm __volatile (lock "decw %w0" \
- : "=m" (*mem) \
- : "m" (*mem), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- else if (sizeof (*mem) == 4) \
- __asm __volatile (lock "decl %0" \
- : "=m" (*mem) \
- : "m" (*mem), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- else if (__HAVE_64B_ATOMICS) \
- __asm __volatile (lock "decq %q0" \
- : "=m" (*mem) \
- : "m" (*mem), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- else \
- do_add_val_64_acq (pfx, mem, -1); \
- } while (0)
-
-#define atomic_decrement(mem) __arch_decrement_body (LOCK_PREFIX, __arch, mem)
-
-#define __arch_decrement_cprefix \
- "cmpl $0, %%" SEG_REG ":%P2\n\tje 0f\n\tlock\n0:\t"
+#define atomic_decrement(mem) \
+ __single_op (LOCK_PREFIX, (mem), dec)
-#define catomic_decrement(mem) \
- __arch_decrement_body (__arch_decrement_cprefix, __arch_c, mem)
+#define catomic_decrement(mem) \
+ ({ \
+ if (SINGLE_THREAD_P) \
+ __single_op ("", (mem), dec); \
+ else \
+ atomic_decrement (mem); \
+ })
#define atomic_decrement_and_test(mem) \
@@ -463,73 +293,31 @@
: "=q" (__result), "=m" (*mem) \
: "m" (*mem), "ir" (bit)); \
else \
- __atomic_link_error (); \
+ __atomic_link_error (); \
__result; })
-#define __arch_and_body(lock, mem, mask) \
- do { \
- if (sizeof (*mem) == 1) \
- __asm __volatile (lock "andb %b1, %0" \
- : "=m" (*mem) \
- : IBR_CONSTRAINT (mask), "m" (*mem), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- else if (sizeof (*mem) == 2) \
- __asm __volatile (lock "andw %w1, %0" \
- : "=m" (*mem) \
- : "ir" (mask), "m" (*mem), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- else if (sizeof (*mem) == 4) \
- __asm __volatile (lock "andl %1, %0" \
- : "=m" (*mem) \
- : "ir" (mask), "m" (*mem), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- else if (__HAVE_64B_ATOMICS) \
- __asm __volatile (lock "andq %q1, %0" \
- : "=m" (*mem) \
- : "ir" (mask), "m" (*mem), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- else \
- __atomic_link_error (); \
- } while (0)
-
-#define __arch_cprefix \
- "cmpl $0, %%" SEG_REG ":%P3\n\tje 0f\n\tlock\n0:\t"
-
-#define atomic_and(mem, mask) __arch_and_body (LOCK_PREFIX, mem, mask)
-
-#define catomic_and(mem, mask) __arch_and_body (__arch_cprefix, mem, mask)
+#define atomic_and(mem, mask) \
+ __xchg_op (LOCK_PREFIX, (mem), (mask), and)
+#define catomic_and(mem, mask) \
+ ({ \
+ if (SINGLE_THREAD_P) \
+ __xchg_op ("", (mem), (mask), and); \
+ else \
+ atomic_and (mem, mask); \
+ })
-#define __arch_or_body(lock, mem, mask) \
- do { \
- if (sizeof (*mem) == 1) \
- __asm __volatile (lock "orb %b1, %0" \
- : "=m" (*mem) \
- : IBR_CONSTRAINT (mask), "m" (*mem), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- else if (sizeof (*mem) == 2) \
- __asm __volatile (lock "orw %w1, %0" \
- : "=m" (*mem) \
- : "ir" (mask), "m" (*mem), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- else if (sizeof (*mem) == 4) \
- __asm __volatile (lock "orl %1, %0" \
- : "=m" (*mem) \
- : "ir" (mask), "m" (*mem), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- else if (__HAVE_64B_ATOMICS) \
- __asm __volatile (lock "orq %q1, %0" \
- : "=m" (*mem) \
- : "ir" (mask), "m" (*mem), \
- "i" (offsetof (tcbhead_t, multiple_threads))); \
- else \
- __atomic_link_error (); \
- } while (0)
-
-#define atomic_or(mem, mask) __arch_or_body (LOCK_PREFIX, mem, mask)
+#define atomic_or(mem, mask) \
+ __xchg_op (LOCK_PREFIX, (mem), (mask), or)
-#define catomic_or(mem, mask) __arch_or_body (__arch_cprefix, mem, mask)
+#define catomic_or(mem, mask) \
+ ({ \
+ if (SINGLE_THREAD_P) \
+ __xchg_op ("", (mem), (mask), or); \
+ else \
+ atomic_or (mem, mask); \
+ })
/* We don't use mfence because it is supposedly slower due to having to
provide stronger guarantees (e.g., regarding self-modifying code). */
@@ -9,7 +9,6 @@ CLEANUP_JMP_BUF offsetof (struct pthread, cleanup_jmp_buf)
CLEANUP offsetof (struct pthread, cleanup)
CLEANUP_PREV offsetof (struct _pthread_cleanup_buffer, __prev)
MUTEX_FUTEX offsetof (pthread_mutex_t, __data.__lock)
-MULTIPLE_THREADS_OFFSET offsetof (tcbhead_t, multiple_threads)
POINTER_GUARD offsetof (tcbhead_t, pointer_guard)
FEATURE_1_OFFSET offsetof (tcbhead_t, feature_1)
SSP_BASE_OFFSET offsetof (tcbhead_t, ssp_base)
@@ -45,7 +45,7 @@ typedef struct
thread descriptor used by libpthread. */
dtv_t *dtv;
void *self; /* Pointer to the thread descriptor. */
- int multiple_threads;
+ int unused_multiple_threads;
int gscope_flag;
uintptr_t sysinfo;
uintptr_t stack_guard;