Message ID | 875ytag4tx.fsf@oldenburg.str.redhat.com |
---|---|

State | Superseded |

Headers | show |

Series | nptl: Extract <bits/atomic_wide_counter.h> from pthread_cond_common.c | expand |

Context | Check | Description |
---|---|---|

dj/TryBot-apply_patch | success | Patch applied to master at the time it was sent |

dj/TryBot-32bit | success | Build for i686 |

On Nov 02 2021, Florian Weimer via Libc-alpha wrote: > +/* Counter that is monotonically increasing (by less than 2**31 per > + increment), with a single writer, and an arbitrary number of > + readers. */ > +typedef union > +{ > + __extension__ unsigned long long int __value64; > + struct > + { > + unsigned int __low; > + unsigned int __high; > + } __value32; > +} __atomic_wide_counter; Why doesn't this need to account for endianess? Andreas.

* Andreas Schwab: > On Nov 02 2021, Florian Weimer via Libc-alpha wrote: > >> +/* Counter that is monotonically increasing (by less than 2**31 per >> + increment), with a single writer, and an arbitrary number of >> + readers. */ >> +typedef union >> +{ >> + __extension__ unsigned long long int __value64; >> + struct >> + { >> + unsigned int __low; >> + unsigned int __high; >> + } __value32; >> +} __atomic_wide_counter; > > Why doesn't this need to account for endianess? The ABI is not compatible between 64-bit and 32-bit for other reasons (the counters count differently, the 32-bit variant is just 62 bits). You cannot have a shared mapping with different word sizes. I don't think this matters. Thanks, Florian

On Nov 03 2021, Florian Weimer wrote: > * Andreas Schwab: > >> On Nov 02 2021, Florian Weimer via Libc-alpha wrote: >> >>> +/* Counter that is monotonically increasing (by less than 2**31 per >>> + increment), with a single writer, and an arbitrary number of >>> + readers. */ >>> +typedef union >>> +{ >>> + __extension__ unsigned long long int __value64; >>> + struct >>> + { >>> + unsigned int __low; >>> + unsigned int __high; >>> + } __value32; >>> +} __atomic_wide_counter; >> >> Why doesn't this need to account for endianess? > > The ABI is not compatible between 64-bit and 32-bit for other reasons > (the counters count differently, the 32-bit variant is just 62 bits). > You cannot have a shared mapping with different word sizes. I don't > think this matters. Why do you need a union then? Andreas.

* Andreas Schwab: > On Nov 03 2021, Florian Weimer wrote: > >> * Andreas Schwab: >> >>> On Nov 02 2021, Florian Weimer via Libc-alpha wrote: >>> >>>> +/* Counter that is monotonically increasing (by less than 2**31 per >>>> + increment), with a single writer, and an arbitrary number of >>>> + readers. */ >>>> +typedef union >>>> +{ >>>> + __extension__ unsigned long long int __value64; >>>> + struct >>>> + { >>>> + unsigned int __low; >>>> + unsigned int __high; >>>> + } __value32; >>>> +} __atomic_wide_counter; >>> >>> Why doesn't this need to account for endianess? >> >> The ABI is not compatible between 64-bit and 32-bit for other reasons >> (the counters count differently, the 32-bit variant is just 62 bits). >> You cannot have a shared mapping with different word sizes. I don't >> think this matters. > > Why do you need a union then? __HAVE_64B_ATOMICS is not available in the installed headers. It's what governs the choice between the two layouts, and not __WORDSIZE. Thanks, Florian

On Nov 03 2021, Florian Weimer wrote: > * Andreas Schwab: > >> On Nov 03 2021, Florian Weimer wrote: >> >>> * Andreas Schwab: >>> >>>> On Nov 02 2021, Florian Weimer via Libc-alpha wrote: >>>> >>>>> +/* Counter that is monotonically increasing (by less than 2**31 per >>>>> + increment), with a single writer, and an arbitrary number of >>>>> + readers. */ >>>>> +typedef union >>>>> +{ >>>>> + __extension__ unsigned long long int __value64; >>>>> + struct >>>>> + { >>>>> + unsigned int __low; >>>>> + unsigned int __high; >>>>> + } __value32; >>>>> +} __atomic_wide_counter; >>>> >>>> Why doesn't this need to account for endianess? >>> >>> The ABI is not compatible between 64-bit and 32-bit for other reasons >>> (the counters count differently, the 32-bit variant is just 62 bits). >>> You cannot have a shared mapping with different word sizes. I don't >>> think this matters. >> >> Why do you need a union then? > > __HAVE_64B_ATOMICS is not available in the installed headers. It's what > governs the choice between the two layouts, and not __WORDSIZE. Why is it an installed header? If it cannot be used without this internal define, how is that useful outside glibc? Andreas.

* Andreas Schwab: > On Nov 03 2021, Florian Weimer wrote: > >> * Andreas Schwab: >> >>> On Nov 03 2021, Florian Weimer wrote: >>> >>>> * Andreas Schwab: >>>> >>>>> On Nov 02 2021, Florian Weimer via Libc-alpha wrote: >>>>> >>>>>> +/* Counter that is monotonically increasing (by less than 2**31 per >>>>>> + increment), with a single writer, and an arbitrary number of >>>>>> + readers. */ >>>>>> +typedef union >>>>>> +{ >>>>>> + __extension__ unsigned long long int __value64; >>>>>> + struct >>>>>> + { >>>>>> + unsigned int __low; >>>>>> + unsigned int __high; >>>>>> + } __value32; >>>>>> +} __atomic_wide_counter; >>>>> >>>>> Why doesn't this need to account for endianess? >>>> >>>> The ABI is not compatible between 64-bit and 32-bit for other reasons >>>> (the counters count differently, the 32-bit variant is just 62 bits). >>>> You cannot have a shared mapping with different word sizes. I don't >>>> think this matters. >>> >>> Why do you need a union then? >> >> __HAVE_64B_ATOMICS is not available in the installed headers. It's what >> governs the choice between the two layouts, and not __WORDSIZE. > > Why is it an installed header? If it cannot be used without this > internal define, how is that useful outside glibc? It's exposed as part of the public pthread_cond_t type, presumably so that the initializer works. Thanks, Florian

diff --git a/bits/atomic_wide_counter.h b/bits/atomic_wide_counter.h new file mode 100644 index 0000000000..0687eb554e --- /dev/null +++ b/bits/atomic_wide_counter.h @@ -0,0 +1,35 @@ +/* Monotonically increasing wide counters (at least 62 bits). + Copyright (C) 2016-2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _BITS_ATOMIC_WIDE_COUNTER_H +#define _BITS_ATOMIC_WIDE_COUNTER_H + +/* Counter that is monotonically increasing (by less than 2**31 per + increment), with a single writer, and an arbitrary number of + readers. */ +typedef union +{ + __extension__ unsigned long long int __value64; + struct + { + unsigned int __low; + unsigned int __high; + } __value32; +} __atomic_wide_counter; + +#endif /* _BITS_ATOMIC_WIDE_COUNTER_H */ diff --git a/include/atomic_wide_counter.h b/include/atomic_wide_counter.h new file mode 100644 index 0000000000..31f009d5e6 --- /dev/null +++ b/include/atomic_wide_counter.h @@ -0,0 +1,89 @@ +/* Monotonically increasing wide counters (at least 62 bits). + Copyright (C) 2016-2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#ifndef _ATOMIC_WIDE_COUNTER_H +#define _ATOMIC_WIDE_COUNTER_H + +#include <atomic.h> +#include <bits/atomic_wide_counter.h> + +#if __HAVE_64B_ATOMICS + +static inline uint64_t +__atomic_wide_counter_load_relaxed (__atomic_wide_counter *c) +{ + return atomic_load_relaxed (&c->__value64); +} + +static inline uint64_t +__atomic_wide_counter_fetch_add_relaxed (__atomic_wide_counter *c, + unsigned int val) +{ + return atomic_fetch_add_relaxed (&c->__value64, val); +} + +static inline uint64_t +__atomic_wide_counter_fetch_add_acquire (__atomic_wide_counter *c, + unsigned int val) +{ + return atomic_fetch_add_acquire (&c->__value64, val); +} + +static inline void +__atomic_wide_counter_add_relaxed (__atomic_wide_counter *c, + unsigned int val) +{ + atomic_store_relaxed (&c->__value64, + atomic_load_relaxed (&c->__value64) + val); +} + +static uint64_t __attribute__ ((unused)) +__atomic_wide_counter_fetch_xor_release (__atomic_wide_counter *c, + unsigned int val) +{ + return atomic_fetch_xor_release (&c->__value64, val); +} + +#else /* !__HAVE_64B_ATOMICS */ + +uint64_t __atomic_wide_counter_load_relaxed (__atomic_wide_counter *c) + attribute_hidden; + +uint64_t __atomic_wide_counter_fetch_add_relaxed (__atomic_wide_counter *c, + unsigned int op) + attribute_hidden; + +static inline uint64_t +__atomic_wide_counter_fetch_add_acquire (__atomic_wide_counter *c, + unsigned int val) +{ + uint64_t r = __atomic_wide_counter_fetch_add_relaxed (c, val); + atomic_thread_fence_acquire (); + return r; +} + +static inline void +__atomic_wide_counter_add_relaxed (__atomic_wide_counter *c, + unsigned int val) +{ + __atomic_wide_counter_fetch_add_relaxed (c, val); +} + +#endif /* !__HAVE_64B_ATOMICS */ + +#endif /* _ATOMIC_WIDE_COUNTER_H */ diff --git a/include/bits/atomic_wide_counter.h b/include/bits/atomic_wide_counter.h new file mode 100644 index 0000000000..8fb09a5291 --- /dev/null +++ b/include/bits/atomic_wide_counter.h @@ -0,0 +1 @@ +#include_next <bits/atomic_wide_counter.h> diff --git a/misc/Makefile b/misc/Makefile index 1083ba3bfc..3b66cb9f6a 100644 --- a/misc/Makefile +++ b/misc/Makefile @@ -73,7 +73,8 @@ routines := brk sbrk sstk ioctl \ fgetxattr flistxattr fremovexattr fsetxattr getxattr \ listxattr lgetxattr llistxattr lremovexattr lsetxattr \ removexattr setxattr getauxval ifunc-impl-list makedev \ - allocate_once fd_to_filename single_threaded unwind-link + allocate_once fd_to_filename single_threaded unwind-link \ + atomic_wide_counter generated += tst-error1.mtrace tst-error1-mem.out \ tst-allocate_once.mtrace tst-allocate_once-mem.out diff --git a/misc/atomic_wide_counter.c b/misc/atomic_wide_counter.c new file mode 100644 index 0000000000..56d8981925 --- /dev/null +++ b/misc/atomic_wide_counter.c @@ -0,0 +1,127 @@ +/* Monotonically increasing wide counters (at least 62 bits). + Copyright (C) 2016-2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <atomic_wide_counter.h> + +#if !__HAVE_64B_ATOMICS + +/* Values we add or xor are less than or equal to 1<<31, so we only + have to make overflow-and-addition atomic wrt. to concurrent load + operations and xor operations. To do that, we split each counter + into two 32b values of which we reserve the MSB of each to + represent an overflow from the lower-order half to the higher-order + half. + + In the common case, the state is (higher-order / lower-order half, and . is + basically concatenation of the bits): + 0.h / 0.l = h.l + + When we add a value of x that overflows (i.e., 0.l + x == 1.L), we run the + following steps S1-S4 (the values these represent are on the right-hand + side): + S1: 0.h / 1.L == (h+1).L + S2: 1.(h+1) / 1.L == (h+1).L + S3: 1.(h+1) / 0.L == (h+1).L + S4: 0.(h+1) / 0.L == (h+1).L + If the LSB of the higher-order half is set, readers will ignore the + overflow bit in the lower-order half. + + To get an atomic snapshot in load operations, we exploit that the + higher-order half is monotonically increasing; if we load a value V from + it, then read the lower-order half, and then read the higher-order half + again and see the same value V, we know that both halves have existed in + the sequence of values the full counter had. This is similar to the + validated reads in the time-based STMs in GCC's libitm (e.g., + method_ml_wt). + + One benefit of this scheme is that this makes load operations + obstruction-free because unlike if we would just lock the counter, readers + can almost always interpret a snapshot of each halves. Readers can be + forced to read a new snapshot when the read is concurrent with an overflow. + However, overflows will happen infrequently, so load operations are + practically lock-free. */ + +uint64_t +__atomic_wide_counter_fetch_add_relaxed (__atomic_wide_counter *c, + unsigned int op) +{ + /* S1. Note that this is an atomic read-modify-write so it extends the + release sequence of release MO store at S3. */ + unsigned int l = atomic_fetch_add_relaxed (&c->__value32.__low, op); + unsigned int h = atomic_load_relaxed (&c->__value32.__high); + uint64_t result = ((uint64_t) h << 31) | l; + l += op; + if ((l >> 31) > 0) + { + /* Overflow. Need to increment higher-order half. Note that all + add operations are ordered in happens-before. */ + h++; + /* S2. Release MO to synchronize with the loads of the higher-order half + in the load operation. See __condvar_load_64_relaxed. */ + atomic_store_release (&c->__value32.__high, + h | ((unsigned int) 1 << 31)); + l ^= (unsigned int) 1 << 31; + /* S3. See __condvar_load_64_relaxed. */ + atomic_store_release (&c->__value32.__low, l); + /* S4. Likewise. */ + atomic_store_release (&c->__value32.__high, h); + } + return result; +} + +uint64_t +__atomic_wide_counter_load_relaxed (__atomic_wide_counter *c) +{ + unsigned int h, l, h2; + do + { + /* This load and the second one below to the same location read from the + stores in the overflow handling of the add operation or the + initializing stores (which is a simple special case because + initialization always completely happens before further use). + Because no two stores to the higher-order half write the same value, + the loop ensures that if we continue to use the snapshot, this load + and the second one read from the same store operation. All candidate + store operations have release MO. + If we read from S2 in the first load, then we will see the value of + S1 on the next load (because we synchronize with S2), or a value + later in modification order. We correctly ignore the lower-half's + overflow bit in this case. If we read from S4, then we will see the + value of S3 in the next load (or a later value), which does not have + the overflow bit set anymore. + */ + h = atomic_load_acquire (&c->__value32.__high); + /* This will read from the release sequence of S3 (i.e, either the S3 + store or the read-modify-writes at S1 following S3 in modification + order). Thus, the read synchronizes with S3, and the following load + of the higher-order half will read from the matching S2 (or a later + value). + Thus, if we read a lower-half value here that already overflowed and + belongs to an increased higher-order half value, we will see the + latter and h and h2 will not be equal. */ + l = atomic_load_acquire (&c->__value32.__low); + /* See above. */ + h2 = atomic_load_relaxed (&c->__value32.__high); + } + while (h != h2); + if (((l >> 31) > 0) && ((h >> 31) > 0)) + l ^= (unsigned int) 1 << 31; + return ((uint64_t) (h & ~((unsigned int) 1 << 31)) << 31) + l; +} + +#endif /* !__HAVE_64B_ATOMICS */ diff --git a/nptl/Makefile b/nptl/Makefile index ff4d590f11..6310aecaad 100644 --- a/nptl/Makefile +++ b/nptl/Makefile @@ -22,8 +22,14 @@ subdir := nptl include ../Makeconfig -headers := pthread.h semaphore.h bits/semaphore.h \ - bits/struct_mutex.h bits/struct_rwlock.h +headers := \ + bits/atomic_wide_counter.h \ + bits/semaphore.h \ + bits/struct_mutex.h \ + bits/struct_rwlock.h \ + pthread.h \ + semaphore.h \ + # headers extra-libs := libpthread extra-libs-others := $(extra-libs) @@ -270,7 +276,7 @@ tests = tst-attr2 tst-attr3 tst-default-attr \ tst-mutexpi1 tst-mutexpi2 tst-mutexpi3 tst-mutexpi4 \ tst-mutexpi5 tst-mutexpi5a tst-mutexpi6 tst-mutexpi7 tst-mutexpi7a \ tst-mutexpi9 tst-mutexpi10 \ - tst-cond22 tst-cond26 \ + tst-cond26 \ tst-robustpi1 tst-robustpi2 tst-robustpi3 tst-robustpi4 tst-robustpi5 \ tst-robustpi6 tst-robustpi7 tst-robustpi9 \ tst-rwlock2 tst-rwlock2a tst-rwlock2b tst-rwlock3 \ @@ -319,6 +325,7 @@ tests-internal := tst-robustpi8 tst-rwlock19 tst-rwlock20 \ tst-barrier5 tst-signal7 tst-mutex8 tst-mutex8-static \ tst-mutexpi8 tst-mutexpi8-static \ tst-setgetname \ + tst-cond22 \ xtests = tst-setuid1 tst-setuid1-static tst-setuid2 \ tst-mutexpp1 tst-mutexpp6 tst-mutexpp10 tst-setgroups \ diff --git a/nptl/pthread_cond_common.c b/nptl/pthread_cond_common.c index c35b9ef03a..fb56f93b6e 100644 --- a/nptl/pthread_cond_common.c +++ b/nptl/pthread_cond_common.c @@ -17,79 +17,52 @@ <https://www.gnu.org/licenses/>. */ #include <atomic.h> +#include <atomic_wide_counter.h> #include <stdint.h> #include <pthread.h> -/* We need 3 least-significant bits on __wrefs for something else. */ +/* We need 3 least-significant bits on __wrefs for something else. + This also matches __atomic_wide_counter requirements: The highest + value we add is __PTHREAD_COND_MAX_GROUP_SIZE << 2 to __g1_start + (the two extra bits are for the lock in the two LSBs of + __g1_start). */ #define __PTHREAD_COND_MAX_GROUP_SIZE ((unsigned) 1 << 29) +static inline uint64_t +__condvar_load_wseq_relaxed (pthread_cond_t *cond) +{ + return __atomic_wide_counter_load_relaxed (&cond->__data.__wseq); +} + +static inline uint64_t +__condvar_fetch_add_wseq_acquire (pthread_cond_t *cond, unsigned int val) +{ + return __atomic_wide_counter_fetch_add_acquire (&cond->__data.__wseq, val); +} + +static inline uint64_t +__condvar_load_g1_start_relaxed (pthread_cond_t *cond) +{ + return __atomic_wide_counter_load_relaxed (&cond->__data.__g1_start); +} + +static inline void +__condvar_add_g1_start_relaxed (pthread_cond_t *cond, unsigned int val) +{ + __atomic_wide_counter_add_relaxed (&cond->__data.__g1_start, val); +} + #if __HAVE_64B_ATOMICS == 1 -static uint64_t __attribute__ ((unused)) -__condvar_load_wseq_relaxed (pthread_cond_t *cond) -{ - return atomic_load_relaxed (&cond->__data.__wseq); -} - -static uint64_t __attribute__ ((unused)) -__condvar_fetch_add_wseq_acquire (pthread_cond_t *cond, unsigned int val) -{ - return atomic_fetch_add_acquire (&cond->__data.__wseq, val); -} - -static uint64_t __attribute__ ((unused)) +static inline uint64_t __condvar_fetch_xor_wseq_release (pthread_cond_t *cond, unsigned int val) { - return atomic_fetch_xor_release (&cond->__data.__wseq, val); + return atomic_fetch_xor_release (&cond->__data.__wseq.__value64, val); } -static uint64_t __attribute__ ((unused)) -__condvar_load_g1_start_relaxed (pthread_cond_t *cond) -{ - return atomic_load_relaxed (&cond->__data.__g1_start); -} - -static void __attribute__ ((unused)) -__condvar_add_g1_start_relaxed (pthread_cond_t *cond, unsigned int val) -{ - atomic_store_relaxed (&cond->__data.__g1_start, - atomic_load_relaxed (&cond->__data.__g1_start) + val); -} - -#else - -/* We use two 64b counters: __wseq and __g1_start. They are monotonically - increasing and single-writer-multiple-readers counters, so we can implement - load, fetch-and-add, and fetch-and-xor operations even when we just have - 32b atomics. Values we add or xor are less than or equal to 1<<31 (*), - so we only have to make overflow-and-addition atomic wrt. to concurrent - load operations and xor operations. To do that, we split each counter into - two 32b values of which we reserve the MSB of each to represent an - overflow from the lower-order half to the higher-order half. - - In the common case, the state is (higher-order / lower-order half, and . is - basically concatenation of the bits): - 0.h / 0.l = h.l - - When we add a value of x that overflows (i.e., 0.l + x == 1.L), we run the - following steps S1-S4 (the values these represent are on the right-hand - side): - S1: 0.h / 1.L == (h+1).L - S2: 1.(h+1) / 1.L == (h+1).L - S3: 1.(h+1) / 0.L == (h+1).L - S4: 0.(h+1) / 0.L == (h+1).L - If the LSB of the higher-order half is set, readers will ignore the - overflow bit in the lower-order half. - - To get an atomic snapshot in load operations, we exploit that the - higher-order half is monotonically increasing; if we load a value V from - it, then read the lower-order half, and then read the higher-order half - again and see the same value V, we know that both halves have existed in - the sequence of values the full counter had. This is similar to the - validated reads in the time-based STMs in GCC's libitm (e.g., - method_ml_wt). +#else /* !__HAVE_64B_ATOMICS */ - The xor operation needs to be an atomic read-modify-write. The write +/* The xor operation needs to be an atomic read-modify-write. The write itself is not an issue as it affects just the lower-order half but not bits used in the add operation. To make the full fetch-and-xor atomic, we exploit that concurrently, the value can increase by at most 1<<31 (*): The @@ -97,117 +70,18 @@ __condvar_add_g1_start_relaxed (pthread_cond_t *cond, unsigned int val) than __PTHREAD_COND_MAX_GROUP_SIZE waiters can enter concurrently and thus increment __wseq. Therefore, if the xor operation observes a value of __wseq, then the value it applies the modification to later on can be - derived (see below). - - One benefit of this scheme is that this makes load operations - obstruction-free because unlike if we would just lock the counter, readers - can almost always interpret a snapshot of each halves. Readers can be - forced to read a new snapshot when the read is concurrent with an overflow. - However, overflows will happen infrequently, so load operations are - practically lock-free. - - (*) The highest value we add is __PTHREAD_COND_MAX_GROUP_SIZE << 2 to - __g1_start (the two extra bits are for the lock in the two LSBs of - __g1_start). */ - -typedef struct -{ - unsigned int low; - unsigned int high; -} _condvar_lohi; - -static uint64_t -__condvar_fetch_add_64_relaxed (_condvar_lohi *lh, unsigned int op) -{ - /* S1. Note that this is an atomic read-modify-write so it extends the - release sequence of release MO store at S3. */ - unsigned int l = atomic_fetch_add_relaxed (&lh->low, op); - unsigned int h = atomic_load_relaxed (&lh->high); - uint64_t result = ((uint64_t) h << 31) | l; - l += op; - if ((l >> 31) > 0) - { - /* Overflow. Need to increment higher-order half. Note that all - add operations are ordered in happens-before. */ - h++; - /* S2. Release MO to synchronize with the loads of the higher-order half - in the load operation. See __condvar_load_64_relaxed. */ - atomic_store_release (&lh->high, h | ((unsigned int) 1 << 31)); - l ^= (unsigned int) 1 << 31; - /* S3. See __condvar_load_64_relaxed. */ - atomic_store_release (&lh->low, l); - /* S4. Likewise. */ - atomic_store_release (&lh->high, h); - } - return result; -} - -static uint64_t -__condvar_load_64_relaxed (_condvar_lohi *lh) -{ - unsigned int h, l, h2; - do - { - /* This load and the second one below to the same location read from the - stores in the overflow handling of the add operation or the - initializing stores (which is a simple special case because - initialization always completely happens before further use). - Because no two stores to the higher-order half write the same value, - the loop ensures that if we continue to use the snapshot, this load - and the second one read from the same store operation. All candidate - store operations have release MO. - If we read from S2 in the first load, then we will see the value of - S1 on the next load (because we synchronize with S2), or a value - later in modification order. We correctly ignore the lower-half's - overflow bit in this case. If we read from S4, then we will see the - value of S3 in the next load (or a later value), which does not have - the overflow bit set anymore. - */ - h = atomic_load_acquire (&lh->high); - /* This will read from the release sequence of S3 (i.e, either the S3 - store or the read-modify-writes at S1 following S3 in modification - order). Thus, the read synchronizes with S3, and the following load - of the higher-order half will read from the matching S2 (or a later - value). - Thus, if we read a lower-half value here that already overflowed and - belongs to an increased higher-order half value, we will see the - latter and h and h2 will not be equal. */ - l = atomic_load_acquire (&lh->low); - /* See above. */ - h2 = atomic_load_relaxed (&lh->high); - } - while (h != h2); - if (((l >> 31) > 0) && ((h >> 31) > 0)) - l ^= (unsigned int) 1 << 31; - return ((uint64_t) (h & ~((unsigned int) 1 << 31)) << 31) + l; -} - -static uint64_t __attribute__ ((unused)) -__condvar_load_wseq_relaxed (pthread_cond_t *cond) -{ - return __condvar_load_64_relaxed ((_condvar_lohi *) &cond->__data.__wseq32); -} - -static uint64_t __attribute__ ((unused)) -__condvar_fetch_add_wseq_acquire (pthread_cond_t *cond, unsigned int val) -{ - uint64_t r = __condvar_fetch_add_64_relaxed - ((_condvar_lohi *) &cond->__data.__wseq32, val); - atomic_thread_fence_acquire (); - return r; -} + derived. */ static uint64_t __attribute__ ((unused)) __condvar_fetch_xor_wseq_release (pthread_cond_t *cond, unsigned int val) { - _condvar_lohi *lh = (_condvar_lohi *) &cond->__data.__wseq32; /* First, get the current value. See __condvar_load_64_relaxed. */ unsigned int h, l, h2; do { - h = atomic_load_acquire (&lh->high); - l = atomic_load_acquire (&lh->low); - h2 = atomic_load_relaxed (&lh->high); + h = atomic_load_acquire (&cond->__data.__wseq.__value32.__high); + l = atomic_load_acquire (&cond->__data.__wseq.__value32.__low); + h2 = atomic_load_relaxed (&cond->__data.__wseq.__value32.__high); } while (h != h2); if (((l >> 31) > 0) && ((h >> 31) == 0)) @@ -219,8 +93,9 @@ __condvar_fetch_xor_wseq_release (pthread_cond_t *cond, unsigned int val) earlier in modification order than the following fetch-xor. This uses release MO to make the full operation have release semantics (all other operations access the lower-order half). */ - unsigned int l2 = atomic_fetch_xor_release (&lh->low, val) - & ~((unsigned int) 1 << 31); + unsigned int l2 + = (atomic_fetch_xor_release (&cond->__data.__wseq.__value32.__low, val) + & ~((unsigned int) 1 << 31)); if (l2 < l) /* The lower-order half overflowed in the meantime. This happened exactly once due to the limit on concurrent waiters (see above). */ @@ -228,22 +103,7 @@ __condvar_fetch_xor_wseq_release (pthread_cond_t *cond, unsigned int val) return ((uint64_t) h << 31) + l2; } -static uint64_t __attribute__ ((unused)) -__condvar_load_g1_start_relaxed (pthread_cond_t *cond) -{ - return __condvar_load_64_relaxed - ((_condvar_lohi *) &cond->__data.__g1_start32); -} - -static void __attribute__ ((unused)) -__condvar_add_g1_start_relaxed (pthread_cond_t *cond, unsigned int val) -{ - ignore_value (__condvar_fetch_add_64_relaxed - ((_condvar_lohi *) &cond->__data.__g1_start32, val)); -} - -#endif /* !__HAVE_64B_ATOMICS */ - +#endif /* !__HAVE_64B_ATOMICS */ /* The lock that signalers use. See pthread_cond_wait_common for uses. The lock is our normal three-state lock: not acquired (0) / acquired (1) / diff --git a/nptl/tst-cond22.c b/nptl/tst-cond22.c index 64f19ea0a5..1336e9c79d 100644 --- a/nptl/tst-cond22.c +++ b/nptl/tst-cond22.c @@ -106,8 +106,11 @@ do_test (void) status = 1; } - printf ("cond = { %llu, %llu, %u/%u/%u, %u/%u/%u, %u, %u }\n", - c.__data.__wseq, c.__data.__g1_start, + printf ("cond = { 0x%x:%x, 0x%x:%x, %u/%u/%u, %u/%u/%u, %u, %u }\n", + c.__data.__wseq.__value32.__high, + c.__data.__wseq.__value32.__low, + c.__data.__g1_start.__value32.__high, + c.__data.__g1_start.__value32.__low, c.__data.__g_signals[0], c.__data.__g_refs[0], c.__data.__g_size[0], c.__data.__g_signals[1], c.__data.__g_refs[1], c.__data.__g_size[1], c.__data.__g1_orig_size, c.__data.__wrefs); @@ -149,8 +152,11 @@ do_test (void) status = 1; } - printf ("cond = { %llu, %llu, %u/%u/%u, %u/%u/%u, %u, %u }\n", - c.__data.__wseq, c.__data.__g1_start, + printf ("cond = { 0x%x:%x, 0x%x:%x, %u/%u/%u, %u/%u/%u, %u, %u }\n", + c.__data.__wseq.__value32.__high, + c.__data.__wseq.__value32.__low, + c.__data.__g1_start.__value32.__high, + c.__data.__g1_start.__value32.__low, c.__data.__g_signals[0], c.__data.__g_refs[0], c.__data.__g_size[0], c.__data.__g_signals[1], c.__data.__g_refs[1], c.__data.__g_size[1], c.__data.__g1_orig_size, c.__data.__wrefs); diff --git a/sysdeps/nptl/bits/thread-shared-types.h b/sysdeps/nptl/bits/thread-shared-types.h index 44bf1e358d..b82a79a43e 100644 --- a/sysdeps/nptl/bits/thread-shared-types.h +++ b/sysdeps/nptl/bits/thread-shared-types.h @@ -43,6 +43,8 @@ #include <bits/pthreadtypes-arch.h> +#include <bits/atomic_wide_counter.h> + /* Common definition of pthread_mutex_t. */ @@ -91,24 +93,8 @@ typedef struct __pthread_internal_slist struct __pthread_cond_s { - __extension__ union - { - __extension__ unsigned long long int __wseq; - struct - { - unsigned int __low; - unsigned int __high; - } __wseq32; - }; - __extension__ union - { - __extension__ unsigned long long int __g1_start; - struct - { - unsigned int __low; - unsigned int __high; - } __g1_start32; - }; + __atomic_wide_counter __wseq; + __atomic_wide_counter __g1_start; unsigned int __g_refs[2] __LOCK_ALIGNMENT; unsigned int __g_size[2]; unsigned int __g1_orig_size;