@@ -43,29 +43,16 @@ __condvar_fetch_xor_wseq_release (pthread_cond_t *cond, unsigned int val)
return atomic_fetch_xor_release (&cond->__data.__wseq, val);
}
-static uint64_t __attribute__ ((unused))
-__condvar_load_g1_start_relaxed (pthread_cond_t *cond)
-{
- return atomic_load_relaxed (&cond->__data.__g1_start);
-}
-
-static void __attribute__ ((unused))
-__condvar_add_g1_start_relaxed (pthread_cond_t *cond, unsigned int val)
-{
- atomic_store_relaxed (&cond->__data.__g1_start,
- atomic_load_relaxed (&cond->__data.__g1_start) + val);
-}
-
#else
-/* We use two 64b counters: __wseq and __g1_start. They are monotonically
- increasing and single-writer-multiple-readers counters, so we can implement
- load, fetch-and-add, and fetch-and-xor operations even when we just have
- 32b atomics. Values we add or xor are less than or equal to 1<<31 (*),
- so we only have to make overflow-and-addition atomic wrt. to concurrent
- load operations and xor operations. To do that, we split each counter into
- two 32b values of which we reserve the MSB of each to represent an
- overflow from the lower-order half to the higher-order half.
+/* __wseq is a 64b counter. It is a monotonically increasing
+ single-writer-multiple-readers counter, so we can implement load,
+ fetch-and-add, and fetch-and-xor operations even when we just have 32b
+ atomics. Values we add or xor are less than or equal to 1<<31, so we only
+ have to make overflow-and-addition atomic wrt. to concurrent load
+ operations and xor operations. To do that, we split the counter into two
+ 32b values of which we reserve the MSB of each to represent an overflow
+ from the lower-order half to the higher-order half.
In the common case, the state is (higher-order / lower-order half, and . is
basically concatenation of the bits):
@@ -104,11 +91,7 @@ __condvar_add_g1_start_relaxed (pthread_cond_t *cond, unsigned int val)
can almost always interpret a snapshot of each halves. Readers can be
forced to read a new snapshot when the read is concurrent with an overflow.
However, overflows will happen infrequently, so load operations are
- practically lock-free.
-
- (*) The highest value we add is __PTHREAD_COND_MAX_GROUP_SIZE << 2 to
- __g1_start (the two extra bits are for the lock in the two LSBs of
- __g1_start). */
+ practically lock-free. */
typedef struct
{
@@ -228,20 +211,6 @@ __condvar_fetch_xor_wseq_release (pthread_cond_t *cond, unsigned int val)
return ((uint64_t) h << 31) + l2;
}
-static uint64_t __attribute__ ((unused))
-__condvar_load_g1_start_relaxed (pthread_cond_t *cond)
-{
- return __condvar_load_64_relaxed
- ((_condvar_lohi *) &cond->__data.__g1_start32);
-}
-
-static void __attribute__ ((unused))
-__condvar_add_g1_start_relaxed (pthread_cond_t *cond, unsigned int val)
-{
- ignore_value (__condvar_fetch_add_64_relaxed
- ((_condvar_lohi *) &cond->__data.__g1_start32, val));
-}
-
#endif /* !__HAVE_64B_ATOMICS */
@@ -350,7 +319,7 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq,
behavior.
Note that this works correctly for a zero-initialized condvar too. */
unsigned int old_orig_size = __condvar_get_orig_size (cond);
- uint64_t old_g1_start = __condvar_load_g1_start_relaxed (cond) >> 1;
+ uint64_t old_g1_start = cond->__data.__g1_start;
if (((unsigned) (wseq - old_g1_start - old_orig_size)
+ cond->__data.__g_size[g1 ^ 1]) == 0)
return false;
@@ -380,10 +349,10 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq,
/* Wait until there are no group references anymore. The fetch-or operation
injects us into the modification order of __g_refs; release MO ensures
that waiters incrementing __g_refs after our fetch-or see the previous
- changes to __g_signals and to __g1_start that had to happen before we can
- switch this G1 and alias with an older group (we have two groups, so
- aliasing requires switching group roles twice). Note that nobody else
- can have set the wake-request flag, so we do not have to act upon it.
+ change to __g_signals that had to happen before we can switch this G1
+ and alias with an older group (we have two groups, so aliasing requires
+ switching group roles twice). Note that nobody else can have set the
+ wake-request flag, so we do not have to act upon it.
Also note that it is harmless if older waiters or waiters from this G1
get a group reference after we have quiesced the group because it will
@@ -421,15 +390,9 @@ __condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq,
after the waiters we waited for. */
atomic_fetch_and_acquire (cond->__data.__g_refs + g1, ~(unsigned int)1);
- /* Update __g1_start, which finishes closing this group. The value we add
- will never be negative because old_orig_size can only be zero when we
- switch groups the first time after a condvar was initialized, in which
- case G1 will be at index 1 and we will add a value of 1. See above for
- why this takes place after waiting for quiescence of the group.
- Relaxed MO is fine because the change comes with no additional
- constraints that others would have to observe. */
- __condvar_add_g1_start_relaxed (cond,
- (old_orig_size << 1) + (g1 == 1 ? 1 : - 1));
+ /* Update __g1_start, which finishes closing this group. See above for
+ why this takes place after waiting for quiescence of the group. */
+ cond->__data.__g1_start += old_orig_size;
/* Now reopen the group, thus enabling waiters to again block using the
futex controlled by __g_signals. Release MO so that observers that see
@@ -72,7 +72,7 @@ __condvar_cancel_waiting (pthread_cond_t *cond, uint64_t seq, unsigned int g,
not hold a reference on the group. */
__condvar_acquire_lock (cond, private);
- uint64_t g1_start = __condvar_load_g1_start_relaxed (cond) >> 1;
+ uint64_t g1_start = cond->__data.__g1_start;
if (g1_start > seq)
{
/* Our group is closed, so someone provided enough signals for it.
@@ -275,9 +275,8 @@ __condvar_cleanup_waiting (void *arg)
* Waiters fetch-add while having acquire the mutex associated with the
condvar. Signalers load it and fetch-xor it concurrently.
__g1_start: Starting position of G1 (inclusive)
- * LSB is index of current G2.
- * Modified by signalers while having acquired the condvar-internal lock
- and observed concurrently by waiters.
+ * Modified by signalers and observed by waiters, both only while having
+ acquired the condvar-internal lock.
__g1_orig_size: Initial size of G1
* The two least-significant bits represent the condvar-internal lock.
* Only accessed while having acquired the condvar-internal lock.
@@ -314,16 +313,6 @@ __condvar_cleanup_waiting (void *arg)
A PTHREAD_COND_INITIALIZER condvar has all fields set to zero, which yields
a condvar that has G2 starting at position 0 and a G1 that is closed.
- Because waiters do not claim ownership of a group right when obtaining a
- position in __wseq but only reference count the group when using futexes
- to block, it can happen that a group gets closed before a waiter can
- increment the reference count. Therefore, waiters have to check whether
- their group is already closed using __g1_start. They also have to perform
- this check when spinning when trying to grab a signal from __g_signals.
- Note that for these checks, using relaxed MO to load __g1_start is
- sufficient because if a waiter can see a sufficiently large value, it could
- have also consume a signal in the waiters group.
-
It is essential that the last field in pthread_cond_t is __g_signals[1]:
The previous condvar used a pointer-sized field in pthread_cond_t, so a
PTHREAD_COND_INITIALIZER from that condvar implementation might only
@@ -415,8 +404,7 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
/* Now wait until a signal is available in our group or it is closed.
Acquire MO so that if we observe a value of zero written after group
switching in __condvar_quiesce_and_switch_g1, we synchronize with that
- store and will see the prior update of __g1_start done while switching
- groups too. */
+ store. */
unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g);
do
@@ -436,11 +424,6 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
unsigned int spin = maxspin;
while (signals == 0 && spin > 0)
{
- /* Check that we are not spinning on a group that's already
- closed. */
- if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1))
- goto done;
-
/* TODO Back off. */
/* Reload signals. See above for MO. */
@@ -457,19 +440,7 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
if (signals != 0)
break;
- /* No signals available after spinning, so prepare to block.
- First check the closed flag on __g_signals that designates a
- concurrent attempt to reuse the group's slot. We use acquire MO for
- the __g_signals check to make sure we read the current value of
- __g1_start (see above). */
- if (((atomic_load_acquire (cond->__data.__g_signals + g) & 1) != 0)
- || (seq < (__condvar_load_g1_start_relaxed (cond) >> 1)))
- {
- /* Our group is closed. */
- goto done;
- }
-
- // Now block.
+ // No signals available after spinning, so block.
struct _pthread_cleanup_buffer buffer;
struct _condvar_cleanup_buffer cbuffer;
cbuffer.wseq = wseq;
@@ -501,9 +472,9 @@ __pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex,
}
}
- /* Try to grab a signal. Use acquire MO so that we see an up-to-date value
- of __g1_start when spinning above. */
- while (!atomic_compare_exchange_weak_acquire (cond->__data.__g_signals + g,
+ /* Try to grab a signal. Relaxed MO is enough because the group can't be
+ closed while we're in this loop, so there are no writes we could miss. */
+ while (!atomic_compare_exchange_weak_relaxed (cond->__data.__g_signals + g,
&signals, signals - 2));
done:
@@ -100,15 +100,7 @@ struct __pthread_cond_s
unsigned int __high;
} __wseq32;
};
- __extension__ union
- {
- __extension__ unsigned long long int __g1_start;
- struct
- {
- unsigned int __low;
- unsigned int __high;
- } __g1_start32;
- };
+ unsigned long long int __g1_start;
unsigned int __g_refs[2] __LOCK_ALIGNMENT;
unsigned int __g_size[2];
unsigned int __g1_orig_size;
@@ -152,7 +152,7 @@ enum
/* Conditional variable handling. */
-#define PTHREAD_COND_INITIALIZER { { {0}, {0}, {0, 0}, {0, 0}, 0, 0, {0, 0} } }
+#define PTHREAD_COND_INITIALIZER { { {0}, 0, {0, 0}, {0, 0}, 0, 0, {0, 0} } }
/* Cleanup buffers */