[v4] Remove catomics

Message ID AS4PR08MB7901F6228D9EF556A3FE027D83249@AS4PR08MB7901.eurprd08.prod.outlook.com
State Failed CI
Headers
Series [v4] Remove catomics |

Checks

Context Check Description
dj/TryBot-apply_patch fail Patch failed to apply to master at the time it was sent
dj/TryBot-32bit fail Patch series failed to apply

Commit Message

Wilco Dijkstra Oct. 14, 2022, 4:38 p.m. UTC
  v4: rebased to latest trunk

The catomics are not supported on most targets and are only used in a few places which are not
performance critical, so replace all uses with more standard atomics.
Replace uses of catomic_add, catomic_increment, catomic_decrement and catomic_fetch_and_add
with atomic_fetch_add_relaxed which maps to a standard compiler builtin. Relaxed memory ordering
is correct for simple counters since they only need atomicity.

Passes regress on AArch64 and build-many-glibcs

---
  

Comments

Carlos O'Donell Oct. 17, 2022, 1:25 p.m. UTC | #1
On 10/14/22 12:38, Wilco Dijkstra via Libc-alpha wrote:
> v4: rebased to latest trunk
> 
> The catomics are not supported on most targets and are only used in a few places which are not
> performance critical, so replace all uses with more standard atomics.
> Replace uses of catomic_add, catomic_increment, catomic_decrement and catomic_fetch_and_add
> with atomic_fetch_add_relaxed which maps to a standard compiler builtin. Relaxed memory ordering
> is correct for simple counters since they only need atomicity.
> 
> Passes regress on AArch64 and build-many-glibcs

Fails pre-commit CI:
https://patchwork.sourceware.org/project/glibc/patch/AS4PR08MB7901F6228D9EF556A3FE027D83249@AS4PR08MB7901.eurprd08.prod.outlook.com/

Could you please review?
 
> ---
> 
> diff --git a/elf/dl-fptr.c b/elf/dl-fptr.c
> index 6645a260b809ecd521796e0d1adee56b3e0bd993..d6e63b807b597b886562657da2d007fc9053be72 100644
> --- a/elf/dl-fptr.c
> +++ b/elf/dl-fptr.c
> @@ -40,7 +40,7 @@
>  
>  #ifndef COMPARE_AND_SWAP
>  # define COMPARE_AND_SWAP(ptr, old, new) \
> -  (catomic_compare_and_exchange_bool_acq (ptr, new, old) == 0)
> +  (atomic_compare_and_exchange_bool_acq (ptr, new, old) == 0)
>  #endif
>  
>  ElfW(Addr) _dl_boot_fptr_table [ELF_MACHINE_BOOT_FPTR_TABLE_LEN];
> diff --git a/elf/dl-profile.c b/elf/dl-profile.c
> index 96ba6067240f2a2fec905442647b04272db523df..0af1f577d2d695d08edce9e13d9b39f77911b1d5 100644
> --- a/elf/dl-profile.c
> +++ b/elf/dl-profile.c
> @@ -552,7 +552,7 @@ _dl_mcount (ElfW(Addr) frompc, ElfW(Addr) selfpc)
>  	      froms[newfromidx].here = &data[narcs];
>  	      froms[newfromidx].link = tos[to_index];
>  	      tos[to_index] = newfromidx;
> -	      catomic_increment (&narcs);
> +	      atomic_fetch_add_relaxed (&narcs, 1);
>  	    }
>  
>  	  /* If we still have no entry stop searching and insert.  */
> diff --git a/include/atomic.h b/include/atomic.h
> index 2cb52c9cfd894308b97b97a04dd574b2287bf1b2..227581d60d85ba6d0f7223a94bbd6ff90e50ff1c 100644
> --- a/include/atomic.h
> +++ b/include/atomic.h
> @@ -24,13 +24,6 @@
>     - atomic arithmetic and logic operation on memory.  They all
>       have the prefix "atomic_".
>  
> -   - conditionally atomic operations of the same kinds.  These
> -     always behave identical but can be faster when atomicity
> -     is not really needed since only one thread has access to
> -     the memory location.  In that case the code is slower in
> -     the multi-thread case.  The interfaces have the prefix
> -     "catomic_".
> -
>     - support functions like barriers.  They also have the prefix
>       "atomic_".
>  
> @@ -93,29 +86,6 @@
>  #endif
>  
>  
> -#ifndef catomic_compare_and_exchange_val_acq
> -# ifdef __arch_c_compare_and_exchange_val_32_acq
> -#  define catomic_compare_and_exchange_val_acq(mem, newval, oldval) \
> -  __atomic_val_bysize (__arch_c_compare_and_exchange_val,acq,		      \
> -		       mem, newval, oldval)
> -# else
> -#  define catomic_compare_and_exchange_val_acq(mem, newval, oldval) \
> -  atomic_compare_and_exchange_val_acq (mem, newval, oldval)
> -# endif
> -#endif
> -
> -
> -#ifndef catomic_compare_and_exchange_val_rel
> -# ifndef atomic_compare_and_exchange_val_rel
> -#  define catomic_compare_and_exchange_val_rel(mem, newval, oldval)	      \
> -  catomic_compare_and_exchange_val_acq (mem, newval, oldval)
> -# else
> -#  define catomic_compare_and_exchange_val_rel(mem, newval, oldval)	      \
> -  atomic_compare_and_exchange_val_rel (mem, newval, oldval)
> -# endif
> -#endif
> -
> -
>  #ifndef atomic_compare_and_exchange_val_rel
>  # define atomic_compare_and_exchange_val_rel(mem, newval, oldval)	      \
>    atomic_compare_and_exchange_val_acq (mem, newval, oldval)
> @@ -141,23 +111,6 @@
>  #endif
>  
>  
> -#ifndef catomic_compare_and_exchange_bool_acq
> -# ifdef __arch_c_compare_and_exchange_bool_32_acq
> -#  define catomic_compare_and_exchange_bool_acq(mem, newval, oldval) \
> -  __atomic_bool_bysize (__arch_c_compare_and_exchange_bool,acq,		      \
> -		        mem, newval, oldval)
> -# else
> -#  define catomic_compare_and_exchange_bool_acq(mem, newval, oldval) \
> -  ({ /* Cannot use __oldval here, because macros later in this file might     \
> -	call this macro with __oldval argument.	 */			      \
> -     __typeof (oldval) __atg4_old = (oldval);				      \
> -     catomic_compare_and_exchange_val_acq (mem, newval, __atg4_old)	      \
> -       != __atg4_old;							      \
> -  })
> -# endif
> -#endif
> -
> -
>  /* Store NEWVALUE in *MEM and return the old value.  */
>  #ifndef atomic_exchange_acq
>  # define atomic_exchange_acq(mem, newvalue) \
> @@ -212,23 +165,6 @@
>    atomic_exchange_and_add_acq(mem, value)
>  #endif
>  
> -#ifndef catomic_exchange_and_add
> -# define catomic_exchange_and_add(mem, value) \
> -  ({ __typeof (*(mem)) __atg7_oldv;					      \
> -     __typeof (mem) __atg7_memp = (mem);				      \
> -     __typeof (*(mem)) __atg7_value = (value);				      \
> -									      \
> -     do									      \
> -       __atg7_oldv = *__atg7_memp;					      \
> -     while (__builtin_expect						      \
> -	    (catomic_compare_and_exchange_bool_acq (__atg7_memp,	      \
> -						    __atg7_oldv		      \
> -						    + __atg7_value,	      \
> -						    __atg7_oldv), 0));	      \
> -									      \
> -     __atg7_oldv; })
> -#endif
> -
>  
>  #ifndef atomic_max
>  # define atomic_max(mem, value) \
> @@ -247,24 +183,6 @@
>  #endif
>  
>  
> -#ifndef catomic_max
> -# define catomic_max(mem, value) \
> -  do {									      \
> -    __typeof (*(mem)) __atg9_oldv;					      \
> -    __typeof (mem) __atg9_memp = (mem);					      \
> -    __typeof (*(mem)) __atg9_value = (value);				      \
> -    do {								      \
> -      __atg9_oldv = *__atg9_memp;					      \
> -      if (__atg9_oldv >= __atg9_value)					      \
> -	break;								      \
> -    } while (__builtin_expect						      \
> -	     (catomic_compare_and_exchange_bool_acq (__atg9_memp,	      \
> -						     __atg9_value,	      \
> -						     __atg9_oldv), 0));	      \
> -  } while (0)
> -#endif
> -
> -
>  #ifndef atomic_min
>  # define atomic_min(mem, value) \
>    do {									      \
> @@ -288,32 +206,16 @@
>  #endif
>  
>  
> -#ifndef catomic_add
> -# define catomic_add(mem, value) \
> -  (void) catomic_exchange_and_add ((mem), (value))
> -#endif
> -
> -
>  #ifndef atomic_increment
>  # define atomic_increment(mem) atomic_add ((mem), 1)
>  #endif
>  
>  
> -#ifndef catomic_increment
> -# define catomic_increment(mem) catomic_add ((mem), 1)
> -#endif
> -
> -
>  #ifndef atomic_increment_val
>  # define atomic_increment_val(mem) (atomic_exchange_and_add ((mem), 1) + 1)
>  #endif
>  
>  
> -#ifndef catomic_increment_val
> -# define catomic_increment_val(mem) (catomic_exchange_and_add ((mem), 1) + 1)
> -#endif
> -
> -
>  /* Add one to *MEM and return true iff it's now zero.  */
>  #ifndef atomic_increment_and_test
>  # define atomic_increment_and_test(mem) \
> @@ -326,21 +228,11 @@
>  #endif
>  
>  
> -#ifndef catomic_decrement
> -# define catomic_decrement(mem) catomic_add ((mem), -1)
> -#endif
> -
> -
>  #ifndef atomic_decrement_val
>  # define atomic_decrement_val(mem) (atomic_exchange_and_add ((mem), -1) - 1)
>  #endif
>  
>  
> -#ifndef catomic_decrement_val
> -# define catomic_decrement_val(mem) (catomic_exchange_and_add ((mem), -1) - 1)
> -#endif
> -
> -
>  /* Subtract 1 from *MEM and return true iff it's now zero.  */
>  #ifndef atomic_decrement_and_test
>  # define atomic_decrement_and_test(mem) \
> @@ -421,22 +313,6 @@
>    } while (0)
>  #endif
>  
> -#ifndef catomic_and
> -# define catomic_and(mem, mask) \
> -  do {									      \
> -    __typeof (*(mem)) __atg20_old;					      \
> -    __typeof (mem) __atg20_memp = (mem);				      \
> -    __typeof (*(mem)) __atg20_mask = (mask);				      \
> -									      \
> -    do									      \
> -      __atg20_old = (*__atg20_memp);					      \
> -    while (__builtin_expect						      \
> -	   (catomic_compare_and_exchange_bool_acq (__atg20_memp,	      \
> -						   __atg20_old & __atg20_mask,\
> -						   __atg20_old), 0));	      \
> -  } while (0)
> -#endif
> -
>  /* Atomically *mem &= mask and return the old value of *mem.  */
>  #ifndef atomic_and_val
>  # define atomic_and_val(mem, mask) \
> @@ -471,22 +347,6 @@
>    } while (0)
>  #endif
>  
> -#ifndef catomic_or
> -# define catomic_or(mem, mask) \
> -  do {									      \
> -    __typeof (*(mem)) __atg18_old;					      \
> -    __typeof (mem) __atg18_memp = (mem);				      \
> -    __typeof (*(mem)) __atg18_mask = (mask);				      \
> -									      \
> -    do									      \
> -      __atg18_old = (*__atg18_memp);					      \
> -    while (__builtin_expect						      \
> -	   (catomic_compare_and_exchange_bool_acq (__atg18_memp,	      \
> -						   __atg18_old | __atg18_mask,\
> -						   __atg18_old), 0));	      \
> -  } while (0)
> -#endif
> -
>  /* Atomically *mem |= mask and return the old value of *mem.  */
>  #ifndef atomic_or_val
>  # define atomic_or_val(mem, mask) \
> diff --git a/malloc/arena.c b/malloc/arena.c
> index 36786530edafe02cab0177b6afb8bcbe32323a96..3826caa04d99bf6bcf33dae46aefffca7607a33c 100644
> --- a/malloc/arena.c
> +++ b/malloc/arena.c
> @@ -950,11 +950,11 @@ arena_get2 (size_t size, mstate avoid_arena)
>           enough address space to create that many arenas.  */
>        if (__glibc_unlikely (n <= narenas_limit - 1))
>          {
> -          if (catomic_compare_and_exchange_bool_acq (&narenas, n + 1, n))
> +          if (atomic_compare_and_exchange_bool_acq (&narenas, n + 1, n))
>              goto repeat;
>            a = _int_new_arena (size);
>  	  if (__glibc_unlikely (a == NULL))
> -            catomic_decrement (&narenas);
> +            atomic_fetch_add_relaxed (&narenas, -1);
>          }
>        else
>          a = reused_arena (avoid_arena);
> diff --git a/malloc/malloc.c b/malloc/malloc.c
> index 953183e9567b65b5e1f308f1fe8c9323c0fc2e1f..4059f856b5b9cd6490e67cef38760733592fb5f6 100644
> --- a/malloc/malloc.c
> +++ b/malloc/malloc.c
> @@ -3808,7 +3808,7 @@ _int_malloc (mstate av, size_t bytes)
>        if (__glibc_unlikely (pp != NULL && misaligned_chunk (pp)))       \
>  	malloc_printerr ("malloc(): unaligned fastbin chunk detected"); \
>      }							\
> -  while ((pp = catomic_compare_and_exchange_val_acq (fb, pp, victim)) \
> +  while ((pp = atomic_compare_and_exchange_val_acq (fb, pp, victim)) \
>  	 != victim);					\
>  
>    if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
> @@ -4526,7 +4526,7 @@ _int_free (mstate av, mchunkptr p, int have_lock)
>  	  old2 = old;
>  	  p->fd = PROTECT_PTR (&p->fd, old);
>  	}
> -      while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2))
> +      while ((old = atomic_compare_and_exchange_val_rel (fb, p, old2))
>  	     != old2);
>  
>      /* Check that size of fastbin chunk at the top is the same as
> diff --git a/malloc/memusage.c b/malloc/memusage.c
> index f30906dffb2731c104ea375af48f59c65bcc7c9c..74712834fa8b96fb2d9589d34b34ab07d05a84ca 100644
> --- a/malloc/memusage.c
> +++ b/malloc/memusage.c
> @@ -148,8 +148,8 @@ update_data (struct header *result, size_t len, size_t old_len)
>  
>    /* Compute current heap usage and compare it with the maximum value.  */
>    size_t heap
> -    = catomic_exchange_and_add (&current_heap, len - old_len) + len - old_len;
> -  catomic_max (&peak_heap, heap);
> +    = atomic_fetch_add_relaxed (&current_heap, len - old_len) + len - old_len;
> +  atomic_max (&peak_heap, heap);
>  
>    /* Compute current stack usage and compare it with the maximum
>       value.  The base stack pointer might not be set if this is not
> @@ -172,15 +172,15 @@ update_data (struct header *result, size_t len, size_t old_len)
>      start_sp = sp;
>    size_t current_stack = start_sp - sp;
>  #endif
> -  catomic_max (&peak_stack, current_stack);
> +  atomic_max (&peak_stack, current_stack);
>  
>    /* Add up heap and stack usage and compare it with the maximum value.  */
> -  catomic_max (&peak_total, heap + current_stack);
> +  atomic_max (&peak_total, heap + current_stack);
>  
>    /* Store the value only if we are writing to a file.  */
>    if (fd != -1)
>      {
> -      uint32_t idx = catomic_exchange_and_add (&buffer_cnt, 1);
> +      uint32_t idx = atomic_fetch_add_relaxed (&buffer_cnt, 1);
>        if (idx + 1 >= 2 * buffer_size)
>          {
>            /* We try to reset the counter to the correct range.  If
> @@ -188,7 +188,7 @@ update_data (struct header *result, size_t len, size_t old_len)
>               counter it does not matter since that thread will take
>               care of the correction.  */
>            uint32_t reset = (idx + 1) % (2 * buffer_size);
> -          catomic_compare_and_exchange_val_acq (&buffer_cnt, reset, idx + 1);
> +          atomic_compare_and_exchange_val_acq (&buffer_cnt, reset, idx + 1);
>            if (idx >= 2 * buffer_size)
>              idx = reset - 1;
>          }
> @@ -362,24 +362,24 @@ malloc (size_t len)
>      return (*mallocp)(len);
>  
>    /* Keep track of number of calls.  */
> -  catomic_increment (&calls[idx_malloc]);
> +  atomic_fetch_add_relaxed (&calls[idx_malloc], 1);
>    /* Keep track of total memory consumption for `malloc'.  */
> -  catomic_add (&total[idx_malloc], len);
> +  atomic_fetch_add_relaxed (&total[idx_malloc], len);
>    /* Keep track of total memory requirement.  */
> -  catomic_add (&grand_total, len);
> +  atomic_fetch_add_relaxed (&grand_total, len);
>    /* Remember the size of the request.  */
>    if (len < 65536)
> -    catomic_increment (&histogram[len / 16]);
> +    atomic_fetch_add_relaxed (&histogram[len / 16], 1);
>    else
> -    catomic_increment (&large);
> +    atomic_fetch_add_relaxed (&large, 1);
>    /* Total number of calls of any of the functions.  */
> -  catomic_increment (&calls_total);
> +  atomic_fetch_add_relaxed (&calls_total, 1);
>  
>    /* Do the real work.  */
>    result = (struct header *) (*mallocp)(len + sizeof (struct header));
>    if (result == NULL)
>      {
> -      catomic_increment (&failed[idx_malloc]);
> +      atomic_fetch_add_relaxed (&failed[idx_malloc], 1);
>        return NULL;
>      }
>  
> @@ -430,21 +430,21 @@ realloc (void *old, size_t len)
>      }
>  
>    /* Keep track of number of calls.  */
> -  catomic_increment (&calls[idx_realloc]);
> +  atomic_fetch_add_relaxed (&calls[idx_realloc], 1);
>    if (len > old_len)
>      {
>        /* Keep track of total memory consumption for `realloc'.  */
> -      catomic_add (&total[idx_realloc], len - old_len);
> +      atomic_fetch_add_relaxed (&total[idx_realloc], len - old_len);
>        /* Keep track of total memory requirement.  */
> -      catomic_add (&grand_total, len - old_len);
> +      atomic_fetch_add_relaxed (&grand_total, len - old_len);
>      }
>  
>    if (len == 0 && old != NULL)
>      {
>        /* Special case.  */
> -      catomic_increment (&realloc_free);
> +      atomic_fetch_add_relaxed (&realloc_free, 1);
>        /* Keep track of total memory freed using `free'.  */
> -      catomic_add (&total[idx_free], real->length);
> +      atomic_fetch_add_relaxed (&total[idx_free], real->length);
>  
>        /* Update the allocation data and write out the records if necessary.  */
>        update_data (NULL, 0, old_len);
> @@ -457,26 +457,26 @@ realloc (void *old, size_t len)
>  
>    /* Remember the size of the request.  */
>    if (len < 65536)
> -    catomic_increment (&histogram[len / 16]);
> +    atomic_fetch_add_relaxed (&histogram[len / 16], 1);
>    else
> -    catomic_increment (&large);
> +    atomic_fetch_add_relaxed (&large, 1);
>    /* Total number of calls of any of the functions.  */
> -  catomic_increment (&calls_total);
> +  atomic_fetch_add_relaxed (&calls_total, 1);
>  
>    /* Do the real work.  */
>    result = (struct header *) (*reallocp)(real, len + sizeof (struct header));
>    if (result == NULL)
>      {
> -      catomic_increment (&failed[idx_realloc]);
> +      atomic_fetch_add_relaxed (&failed[idx_realloc], 1);
>        return NULL;
>      }
>  
>    /* Record whether the reduction/increase happened in place.  */
>    if (real == result)
> -    catomic_increment (&inplace);
> +    atomic_fetch_add_relaxed (&inplace, 1);
>    /* Was the buffer increased?  */
>    if (old_len > len)
> -    catomic_increment (&decreasing);
> +    atomic_fetch_add_relaxed (&decreasing, 1);
>  
>    /* Update the allocation data and write out the records if necessary.  */
>    update_data (result, len, old_len);
> @@ -508,16 +508,16 @@ calloc (size_t n, size_t len)
>      return (*callocp)(n, len);
>  
>    /* Keep track of number of calls.  */
> -  catomic_increment (&calls[idx_calloc]);
> +  atomic_fetch_add_relaxed (&calls[idx_calloc], 1);
>    /* Keep track of total memory consumption for `calloc'.  */
> -  catomic_add (&total[idx_calloc], size);
> +  atomic_fetch_add_relaxed (&total[idx_calloc], size);
>    /* Keep track of total memory requirement.  */
> -  catomic_add (&grand_total, size);
> +  atomic_fetch_add_relaxed (&grand_total, size);
>    /* Remember the size of the request.  */
>    if (size < 65536)
> -    catomic_increment (&histogram[size / 16]);
> +    atomic_fetch_add_relaxed (&histogram[size / 16], 1);
>    else
> -    catomic_increment (&large);
> +    atomic_fetch_add_relaxed (&large, 1);
>    /* Total number of calls of any of the functions.  */
>    ++calls_total;
>  
> @@ -525,7 +525,7 @@ calloc (size_t n, size_t len)
>    result = (struct header *) (*mallocp)(size + sizeof (struct header));
>    if (result == NULL)
>      {
> -      catomic_increment (&failed[idx_calloc]);
> +      atomic_fetch_add_relaxed (&failed[idx_calloc], 1);
>        return NULL;
>      }
>  
> @@ -563,7 +563,7 @@ free (void *ptr)
>    /* `free (NULL)' has no effect.  */
>    if (ptr == NULL)
>      {
> -      catomic_increment (&calls[idx_free]);
> +      atomic_fetch_add_relaxed (&calls[idx_free], 1);
>        return;
>      }
>  
> @@ -577,9 +577,9 @@ free (void *ptr)
>      }
>  
>    /* Keep track of number of calls.  */
> -  catomic_increment (&calls[idx_free]);
> +  atomic_fetch_add_relaxed (&calls[idx_free], 1);
>    /* Keep track of total memory freed using `free'.  */
> -  catomic_add (&total[idx_free], real->length);
> +  atomic_fetch_add_relaxed (&total[idx_free], real->length);
>  
>    /* Update the allocation data and write out the records if necessary.  */
>    update_data (NULL, 0, real->length);
> @@ -614,22 +614,22 @@ mmap (void *start, size_t len, int prot, int flags, int fd, off_t offset)
>                   ? idx_mmap_a : prot & PROT_WRITE ? idx_mmap_w : idx_mmap_r);
>  
>        /* Keep track of number of calls.  */
> -      catomic_increment (&calls[idx]);
> +      atomic_fetch_add_relaxed (&calls[idx], 1);
>        /* Keep track of total memory consumption for `malloc'.  */
> -      catomic_add (&total[idx], len);
> +      atomic_fetch_add_relaxed (&total[idx], len);
>        /* Keep track of total memory requirement.  */
> -      catomic_add (&grand_total, len);
> +      atomic_fetch_add_relaxed (&grand_total, len);
>        /* Remember the size of the request.  */
>        if (len < 65536)
> -        catomic_increment (&histogram[len / 16]);
> +        atomic_fetch_add_relaxed (&histogram[len / 16], 1);
>        else
> -        catomic_increment (&large);
> +        atomic_fetch_add_relaxed (&large, 1);
>        /* Total number of calls of any of the functions.  */
> -      catomic_increment (&calls_total);
> +      atomic_fetch_add_relaxed (&calls_total, 1);
>  
>        /* Check for failures.  */
>        if (result == NULL)
> -        catomic_increment (&failed[idx]);
> +        atomic_fetch_add_relaxed (&failed[idx], 1);
>        else if (idx == idx_mmap_w)
>          /* Update the allocation data and write out the records if
>             necessary.  Note the first parameter is NULL which means
> @@ -667,22 +667,22 @@ mmap64 (void *start, size_t len, int prot, int flags, int fd, off64_t offset)
>                   ? idx_mmap_a : prot & PROT_WRITE ? idx_mmap_w : idx_mmap_r);
>  
>        /* Keep track of number of calls.  */
> -      catomic_increment (&calls[idx]);
> +      atomic_fetch_add_relaxed (&calls[idx], 1);
>        /* Keep track of total memory consumption for `malloc'.  */
> -      catomic_add (&total[idx], len);
> +      atomic_fetch_add_relaxed (&total[idx], len);
>        /* Keep track of total memory requirement.  */
> -      catomic_add (&grand_total, len);
> +      atomic_fetch_add_relaxed (&grand_total, len);
>        /* Remember the size of the request.  */
>        if (len < 65536)
> -        catomic_increment (&histogram[len / 16]);
> +        atomic_fetch_add_relaxed (&histogram[len / 16], 1);
>        else
> -        catomic_increment (&large);
> +        atomic_fetch_add_relaxed (&large, 1);
>        /* Total number of calls of any of the functions.  */
> -      catomic_increment (&calls_total);
> +      atomic_fetch_add_relaxed (&calls_total, 1);
>  
>        /* Check for failures.  */
>        if (result == NULL)
> -        catomic_increment (&failed[idx]);
> +        atomic_fetch_add_relaxed (&failed[idx], 1);
>        else if (idx == idx_mmap_w)
>          /* Update the allocation data and write out the records if
>             necessary.  Note the first parameter is NULL which means
> @@ -722,33 +722,33 @@ mremap (void *start, size_t old_len, size_t len, int flags, ...)
>    if (!not_me && trace_mmap)
>      {
>        /* Keep track of number of calls.  */
> -      catomic_increment (&calls[idx_mremap]);
> +      atomic_fetch_add_relaxed (&calls[idx_mremap], 1);
>        if (len > old_len)
>          {
>            /* Keep track of total memory consumption for `malloc'.  */
> -          catomic_add (&total[idx_mremap], len - old_len);
> +          atomic_fetch_add_relaxed (&total[idx_mremap], len - old_len);
>            /* Keep track of total memory requirement.  */
> -          catomic_add (&grand_total, len - old_len);
> +          atomic_fetch_add_relaxed (&grand_total, len - old_len);
>          }
>        /* Remember the size of the request.  */
>        if (len < 65536)
> -        catomic_increment (&histogram[len / 16]);
> +        atomic_fetch_add_relaxed (&histogram[len / 16], 1);
>        else
> -        catomic_increment (&large);
> +        atomic_fetch_add_relaxed (&large, 1);
>        /* Total number of calls of any of the functions.  */
> -      catomic_increment (&calls_total);
> +      atomic_fetch_add_relaxed (&calls_total, 1);
>  
>        /* Check for failures.  */
>        if (result == NULL)
> -        catomic_increment (&failed[idx_mremap]);
> +        atomic_fetch_add_relaxed (&failed[idx_mremap], 1);
>        else
>          {
>            /* Record whether the reduction/increase happened in place.  */
>            if (start == result)
> -            catomic_increment (&inplace_mremap);
> +            atomic_fetch_add_relaxed (&inplace_mremap, 1);
>            /* Was the buffer increased?  */
>            if (old_len > len)
> -            catomic_increment (&decreasing_mremap);
> +            atomic_fetch_add_relaxed (&decreasing_mremap, 1);
>  
>            /* Update the allocation data and write out the records if
>               necessary.  Note the first parameter is NULL which means
> @@ -783,19 +783,19 @@ munmap (void *start, size_t len)
>    if (!not_me && trace_mmap)
>      {
>        /* Keep track of number of calls.  */
> -      catomic_increment (&calls[idx_munmap]);
> +      atomic_fetch_add_relaxed (&calls[idx_munmap], 1);
>  
>        if (__glibc_likely (result == 0))
>          {
>            /* Keep track of total memory freed using `free'.  */
> -          catomic_add (&total[idx_munmap], len);
> +          atomic_fetch_add_relaxed (&total[idx_munmap], len);
>  
>            /* Update the allocation data and write out the records if
>               necessary.  */
>            update_data (NULL, 0, len);
>          }
>        else
> -        catomic_increment (&failed[idx_munmap]);
> +        atomic_fetch_add_relaxed (&failed[idx_munmap], 1);
>      }
>  
>    return result;
> diff --git a/manual/memory.texi b/manual/memory.texi
> index f69824841e5e352e8562f87b6c5dd4d96424f59f..2dfd09ea4aace004067e2e1d51c9e1292d1f7452 100644
> --- a/manual/memory.texi
> +++ b/manual/memory.texi
> @@ -354,7 +354,7 @@ this function is in @file{stdlib.h}.
>  @c that's protected by list_lock; next_free is only modified while
>  @c list_lock is held too.  All other data members of an arena, as well
>  @c as the metadata of the memory areas assigned to it, are only modified
> -@c while holding the arena's mutex (fastbin pointers use catomic ops
> +@c while holding the arena's mutex (fastbin pointers use atomic ops
>  @c because they may be modified by free without taking the arena's
>  @c lock).  Some reassurance was needed for fastbins, for it wasn't clear
>  @c how they were initialized.  It turns out they are always
> @@ -383,7 +383,7 @@ this function is in @file{stdlib.h}.
>  @c     mutex_lock (arena lock) dup @asulock @aculock [returns locked]
>  @c    __get_nprocs ext ok @acsfd
>  @c    NARENAS_FROM_NCORES ok
> -@c    catomic_compare_and_exchange_bool_acq ok
> +@c    atomic_compare_and_exchange_bool_acq ok
>  @c    _int_new_arena ok @asulock @aculock @acsmem
>  @c     new_heap ok @acsmem
>  @c      mmap ok @acsmem
> @@ -397,7 +397,7 @@ this function is in @file{stdlib.h}.
>  @c     mutex_lock (list_lock) dup @asulock @aculock
>  @c     atomic_thread_fence_release ok
>  @c     mutex_unlock (list_lock) @aculock
> -@c    catomic_decrement ok
> +@c    atomic_fetch_add_relaxed ok
>  @c    reused_arena @asulock @aculock
>  @c      reads&writes next_to_use and iterates over arena next without guards
>  @c      those are harmless as long as we don't drop arenas from the
> @@ -414,7 +414,7 @@ this function is in @file{stdlib.h}.
>  @c   get_max_fast ok
>  @c   fastbin_index ok
>  @c   fastbin ok
> -@c   catomic_compare_and_exhange_val_acq ok
> +@c   atomic_compare_and_exhange_val_acq ok
>  @c   malloc_printerr dup @mtsenv
>  @c     if we get to it, we're toast already, undefined behavior must have
>  @c     been invoked before
> @@ -521,10 +521,10 @@ this function is in @file{stdlib.h}.
>  @c     chunk2mem dup ok
>  @c     free_perturb ok
>  @c     set_fastchunks ok
> -@c      catomic_and ok
> +@c      atomic_and ok
>  @c     fastbin_index dup ok
>  @c     fastbin dup ok
> -@c     catomic_compare_and_exchange_val_rel ok
> +@c     atomic_compare_and_exchange_val_rel ok
>  @c     chunk_is_mmapped ok
>  @c     contiguous dup ok
>  @c     prev_inuse ok
> @@ -706,7 +706,7 @@ The prototype for this function is in @file{stdlib.h}.
>  @safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}}
>  @c __libc_free @asulock @aculock @acsfd @acsmem
>  @c   releasing memory into fastbins modifies the arena without taking
> -@c   its mutex, but catomic operations ensure safety.  If two (or more)
> +@c   its mutex, but atomic operations ensure safety.  If two (or more)
>  @c   threads are running malloc and have their own arenas locked when
>  @c   each gets a signal whose handler free()s large (non-fastbin-able)
>  @c   blocks from each other's arena, we deadlock; this is a more general
> diff --git a/misc/tst-atomic.c b/misc/tst-atomic.c
> index 6d681a7bfdf4f48b4c04a073ebd480326dbd3cc8..4f9d2c1a46b363d346dbc2fa0962ae196844a43a 100644
> --- a/misc/tst-atomic.c
> +++ b/misc/tst-atomic.c
> @@ -393,117 +393,6 @@ do_test (void)
>      }
>  #endif
>  
> -#ifdef catomic_compare_and_exchange_val_acq
> -  mem = 24;
> -  if (catomic_compare_and_exchange_val_acq (&mem, 35, 24) != 24
> -      || mem != 35)
> -    {
> -      puts ("catomic_compare_and_exchange_val_acq test 1 failed");
> -      ret = 1;
> -    }
> -
> -  mem = 12;
> -  if (catomic_compare_and_exchange_val_acq (&mem, 10, 15) != 12
> -      || mem != 12)
> -    {
> -      puts ("catomic_compare_and_exchange_val_acq test 2 failed");
> -      ret = 1;
> -    }
> -
> -  mem = -15;
> -  if (catomic_compare_and_exchange_val_acq (&mem, -56, -15) != -15
> -      || mem != -56)
> -    {
> -      puts ("catomic_compare_and_exchange_val_acq test 3 failed");
> -      ret = 1;
> -    }
> -
> -  mem = -1;
> -  if (catomic_compare_and_exchange_val_acq (&mem, 17, 0) != -1
> -      || mem != -1)
> -    {
> -      puts ("catomic_compare_and_exchange_val_acq test 4 failed");
> -      ret = 1;
> -    }
> -#endif
> -
> -  mem = 24;
> -  if (catomic_compare_and_exchange_bool_acq (&mem, 35, 24)
> -      || mem != 35)
> -    {
> -      puts ("catomic_compare_and_exchange_bool_acq test 1 failed");
> -      ret = 1;
> -    }
> -
> -  mem = 12;
> -  if (! catomic_compare_and_exchange_bool_acq (&mem, 10, 15)
> -      || mem != 12)
> -    {
> -      puts ("catomic_compare_and_exchange_bool_acq test 2 failed");
> -      ret = 1;
> -    }
> -
> -  mem = -15;
> -  if (catomic_compare_and_exchange_bool_acq (&mem, -56, -15)
> -      || mem != -56)
> -    {
> -      puts ("catomic_compare_and_exchange_bool_acq test 3 failed");
> -      ret = 1;
> -    }
> -
> -  mem = -1;
> -  if (! catomic_compare_and_exchange_bool_acq (&mem, 17, 0)
> -      || mem != -1)
> -    {
> -      puts ("catomic_compare_and_exchange_bool_acq test 4 failed");
> -      ret = 1;
> -    }
> -
> -  mem = 2;
> -  if (catomic_exchange_and_add (&mem, 11) != 2
> -      || mem != 13)
> -    {
> -      puts ("catomic_exchange_and_add test failed");
> -      ret = 1;
> -    }
> -
> -  mem = -21;
> -  catomic_add (&mem, 22);
> -  if (mem != 1)
> -    {
> -      puts ("catomic_add test failed");
> -      ret = 1;
> -    }
> -
> -  mem = -1;
> -  catomic_increment (&mem);
> -  if (mem != 0)
> -    {
> -      puts ("catomic_increment test failed");
> -      ret = 1;
> -    }
> -
> -  mem = 2;
> -  if (catomic_increment_val (&mem) != 3)
> -    {
> -      puts ("catomic_increment_val test failed");
> -      ret = 1;
> -    }
> -
> -  mem = 17;
> -  catomic_decrement (&mem);
> -  if (mem != 16)
> -    {
> -      puts ("catomic_decrement test failed");
> -      ret = 1;
> -    }
> -
> -  if (catomic_decrement_val (&mem) != 15)
> -    {
> -      puts ("catomic_decrement_val test failed");
> -      ret = 1;
> -    }
> -
>    /* Tests for C11-like atomics.  */
>    mem = 11;
>    if (atomic_load_relaxed (&mem) != 11 || atomic_load_acquire (&mem) != 11)
> diff --git a/sysdeps/hppa/dl-fptr.c b/sysdeps/hppa/dl-fptr.c
> index 97882a8106a719642b2778d3ca8a69ef202cce2d..0562467d6f79f76b78b2cf169fdd059a993296d3 100644
> --- a/sysdeps/hppa/dl-fptr.c
> +++ b/sysdeps/hppa/dl-fptr.c
> @@ -41,10 +41,8 @@
>  # error "ELF_MACHINE_LOAD_ADDRESS is not defined."
>  #endif
>  
> -#ifndef COMPARE_AND_SWAP
> -# define COMPARE_AND_SWAP(ptr, old, new) \
> -  (catomic_compare_and_exchange_bool_acq (ptr, new, old) == 0)
> -#endif
> +#define COMPARE_AND_SWAP(ptr, old, new) \
> +  (atomic_compare_and_exchange_bool_acq (ptr, new, old) == 0)
>  
>  ElfW(Addr) _dl_boot_fptr_table [ELF_MACHINE_BOOT_FPTR_TABLE_LEN];
>  
> 
>
  
Wilco Dijkstra Oct. 17, 2022, 2:50 p.m. UTC | #2
Hi Carlos,

> Fails pre-commit CI:
>https://patchwork.sourceware.org/project/glibc/patch/AS4PR08MB7901F6228D9EF556A3FE027D83249@AS4PR08MB7901.eurprd08.prod.outlook.com/
>
> Could you please review?
 
It works fine for me - the patch merges using patch -p 1 -i <patchfile>.
What settings do you use in the CI?

Cheers,
Wilco
  
Carlos O'Donell Oct. 17, 2022, 9:56 p.m. UTC | #3
On 10/17/22 10:50, Wilco Dijkstra wrote:
> Hi Carlos,
> 
>> Fails pre-commit CI:
>> https://patchwork.sourceware.org/project/glibc/patch/AS4PR08MB7901F6228D9EF556A3FE027D83249@AS4PR08MB7901.eurprd08.prod.outlook.com/
>>
>> Could you please review?
>  
> It works fine for me - the patch merges using patch -p 1 -i <patchfile>.
> What settings do you use in the CI?

git apply -p1

https://gitlab.com/djdelorie/glibc-cicd/-/blob/main/trybot-apply_patch.py#L65
  
Wilco Dijkstra Oct. 18, 2022, 1:01 p.m. UTC | #4
Hi Carlos,

>> It works fine for me - the patch merges using patch -p 1 -i <patchfile>.
>> What settings do you use in the CI?
>
> git apply -p1
>
> https://gitlab.com/djdelorie/glibc-cicd/-/blob/main/trybot-apply_patch.py#L65

Right, it seems that is not compatible with the default settings of 'patch'. In general
for CI you want to try to match automatically whenever feasible rather than only
accept perfect matching patches. This is even true for auto-commit CI since you'd
only commit after build&test passes.

How about adding '-C1' which allows for a reduced context? 

Cheers,
Wilco
  
Szabolcs Nagy Oct. 18, 2022, 1:34 p.m. UTC | #5
The 10/18/2022 13:01, Wilco Dijkstra via Libc-alpha wrote:
> Hi Carlos,
> 
> >> It works fine for me - the patch merges using patch -p 1 -i <patchfile>.
> >> What settings do you use in the CI?
> >
> > git apply -p1
> >
> > https://gitlab.com/djdelorie/glibc-cicd/-/blob/main/trybot-apply_patch.py#L65
> 
> Right, it seems that is not compatible with the default settings of 'patch'. In general
> for CI you want to try to match automatically whenever feasible rather than only
> accept perfect matching patches. This is even true for auto-commit CI since you'd
> only commit after build&test passes.
> 
> How about adding '-C1' which allows for a reduced context? 

i think it is expected that there is no conflict with whatever is in
current master, which works if you rebase your patch before posting.
  

Patch

diff --git a/elf/dl-fptr.c b/elf/dl-fptr.c
index 6645a260b809ecd521796e0d1adee56b3e0bd993..d6e63b807b597b886562657da2d007fc9053be72 100644
--- a/elf/dl-fptr.c
+++ b/elf/dl-fptr.c
@@ -40,7 +40,7 @@ 
 
 #ifndef COMPARE_AND_SWAP
 # define COMPARE_AND_SWAP(ptr, old, new) \
-  (catomic_compare_and_exchange_bool_acq (ptr, new, old) == 0)
+  (atomic_compare_and_exchange_bool_acq (ptr, new, old) == 0)
 #endif
 
 ElfW(Addr) _dl_boot_fptr_table [ELF_MACHINE_BOOT_FPTR_TABLE_LEN];
diff --git a/elf/dl-profile.c b/elf/dl-profile.c
index 96ba6067240f2a2fec905442647b04272db523df..0af1f577d2d695d08edce9e13d9b39f77911b1d5 100644
--- a/elf/dl-profile.c
+++ b/elf/dl-profile.c
@@ -552,7 +552,7 @@  _dl_mcount (ElfW(Addr) frompc, ElfW(Addr) selfpc)
 	      froms[newfromidx].here = &data[narcs];
 	      froms[newfromidx].link = tos[to_index];
 	      tos[to_index] = newfromidx;
-	      catomic_increment (&narcs);
+	      atomic_fetch_add_relaxed (&narcs, 1);
 	    }
 
 	  /* If we still have no entry stop searching and insert.  */
diff --git a/include/atomic.h b/include/atomic.h
index 2cb52c9cfd894308b97b97a04dd574b2287bf1b2..227581d60d85ba6d0f7223a94bbd6ff90e50ff1c 100644
--- a/include/atomic.h
+++ b/include/atomic.h
@@ -24,13 +24,6 @@ 
    - atomic arithmetic and logic operation on memory.  They all
      have the prefix "atomic_".
 
-   - conditionally atomic operations of the same kinds.  These
-     always behave identical but can be faster when atomicity
-     is not really needed since only one thread has access to
-     the memory location.  In that case the code is slower in
-     the multi-thread case.  The interfaces have the prefix
-     "catomic_".
-
    - support functions like barriers.  They also have the prefix
      "atomic_".
 
@@ -93,29 +86,6 @@ 
 #endif
 
 
-#ifndef catomic_compare_and_exchange_val_acq
-# ifdef __arch_c_compare_and_exchange_val_32_acq
-#  define catomic_compare_and_exchange_val_acq(mem, newval, oldval) \
-  __atomic_val_bysize (__arch_c_compare_and_exchange_val,acq,		      \
-		       mem, newval, oldval)
-# else
-#  define catomic_compare_and_exchange_val_acq(mem, newval, oldval) \
-  atomic_compare_and_exchange_val_acq (mem, newval, oldval)
-# endif
-#endif
-
-
-#ifndef catomic_compare_and_exchange_val_rel
-# ifndef atomic_compare_and_exchange_val_rel
-#  define catomic_compare_and_exchange_val_rel(mem, newval, oldval)	      \
-  catomic_compare_and_exchange_val_acq (mem, newval, oldval)
-# else
-#  define catomic_compare_and_exchange_val_rel(mem, newval, oldval)	      \
-  atomic_compare_and_exchange_val_rel (mem, newval, oldval)
-# endif
-#endif
-
-
 #ifndef atomic_compare_and_exchange_val_rel
 # define atomic_compare_and_exchange_val_rel(mem, newval, oldval)	      \
   atomic_compare_and_exchange_val_acq (mem, newval, oldval)
@@ -141,23 +111,6 @@ 
 #endif
 
 
-#ifndef catomic_compare_and_exchange_bool_acq
-# ifdef __arch_c_compare_and_exchange_bool_32_acq
-#  define catomic_compare_and_exchange_bool_acq(mem, newval, oldval) \
-  __atomic_bool_bysize (__arch_c_compare_and_exchange_bool,acq,		      \
-		        mem, newval, oldval)
-# else
-#  define catomic_compare_and_exchange_bool_acq(mem, newval, oldval) \
-  ({ /* Cannot use __oldval here, because macros later in this file might     \
-	call this macro with __oldval argument.	 */			      \
-     __typeof (oldval) __atg4_old = (oldval);				      \
-     catomic_compare_and_exchange_val_acq (mem, newval, __atg4_old)	      \
-       != __atg4_old;							      \
-  })
-# endif
-#endif
-
-
 /* Store NEWVALUE in *MEM and return the old value.  */
 #ifndef atomic_exchange_acq
 # define atomic_exchange_acq(mem, newvalue) \
@@ -212,23 +165,6 @@ 
   atomic_exchange_and_add_acq(mem, value)
 #endif
 
-#ifndef catomic_exchange_and_add
-# define catomic_exchange_and_add(mem, value) \
-  ({ __typeof (*(mem)) __atg7_oldv;					      \
-     __typeof (mem) __atg7_memp = (mem);				      \
-     __typeof (*(mem)) __atg7_value = (value);				      \
-									      \
-     do									      \
-       __atg7_oldv = *__atg7_memp;					      \
-     while (__builtin_expect						      \
-	    (catomic_compare_and_exchange_bool_acq (__atg7_memp,	      \
-						    __atg7_oldv		      \
-						    + __atg7_value,	      \
-						    __atg7_oldv), 0));	      \
-									      \
-     __atg7_oldv; })
-#endif
-
 
 #ifndef atomic_max
 # define atomic_max(mem, value) \
@@ -247,24 +183,6 @@ 
 #endif
 
 
-#ifndef catomic_max
-# define catomic_max(mem, value) \
-  do {									      \
-    __typeof (*(mem)) __atg9_oldv;					      \
-    __typeof (mem) __atg9_memp = (mem);					      \
-    __typeof (*(mem)) __atg9_value = (value);				      \
-    do {								      \
-      __atg9_oldv = *__atg9_memp;					      \
-      if (__atg9_oldv >= __atg9_value)					      \
-	break;								      \
-    } while (__builtin_expect						      \
-	     (catomic_compare_and_exchange_bool_acq (__atg9_memp,	      \
-						     __atg9_value,	      \
-						     __atg9_oldv), 0));	      \
-  } while (0)
-#endif
-
-
 #ifndef atomic_min
 # define atomic_min(mem, value) \
   do {									      \
@@ -288,32 +206,16 @@ 
 #endif
 
 
-#ifndef catomic_add
-# define catomic_add(mem, value) \
-  (void) catomic_exchange_and_add ((mem), (value))
-#endif
-
-
 #ifndef atomic_increment
 # define atomic_increment(mem) atomic_add ((mem), 1)
 #endif
 
 
-#ifndef catomic_increment
-# define catomic_increment(mem) catomic_add ((mem), 1)
-#endif
-
-
 #ifndef atomic_increment_val
 # define atomic_increment_val(mem) (atomic_exchange_and_add ((mem), 1) + 1)
 #endif
 
 
-#ifndef catomic_increment_val
-# define catomic_increment_val(mem) (catomic_exchange_and_add ((mem), 1) + 1)
-#endif
-
-
 /* Add one to *MEM and return true iff it's now zero.  */
 #ifndef atomic_increment_and_test
 # define atomic_increment_and_test(mem) \
@@ -326,21 +228,11 @@ 
 #endif
 
 
-#ifndef catomic_decrement
-# define catomic_decrement(mem) catomic_add ((mem), -1)
-#endif
-
-
 #ifndef atomic_decrement_val
 # define atomic_decrement_val(mem) (atomic_exchange_and_add ((mem), -1) - 1)
 #endif
 
 
-#ifndef catomic_decrement_val
-# define catomic_decrement_val(mem) (catomic_exchange_and_add ((mem), -1) - 1)
-#endif
-
-
 /* Subtract 1 from *MEM and return true iff it's now zero.  */
 #ifndef atomic_decrement_and_test
 # define atomic_decrement_and_test(mem) \
@@ -421,22 +313,6 @@ 
   } while (0)
 #endif
 
-#ifndef catomic_and
-# define catomic_and(mem, mask) \
-  do {									      \
-    __typeof (*(mem)) __atg20_old;					      \
-    __typeof (mem) __atg20_memp = (mem);				      \
-    __typeof (*(mem)) __atg20_mask = (mask);				      \
-									      \
-    do									      \
-      __atg20_old = (*__atg20_memp);					      \
-    while (__builtin_expect						      \
-	   (catomic_compare_and_exchange_bool_acq (__atg20_memp,	      \
-						   __atg20_old & __atg20_mask,\
-						   __atg20_old), 0));	      \
-  } while (0)
-#endif
-
 /* Atomically *mem &= mask and return the old value of *mem.  */
 #ifndef atomic_and_val
 # define atomic_and_val(mem, mask) \
@@ -471,22 +347,6 @@ 
   } while (0)
 #endif
 
-#ifndef catomic_or
-# define catomic_or(mem, mask) \
-  do {									      \
-    __typeof (*(mem)) __atg18_old;					      \
-    __typeof (mem) __atg18_memp = (mem);				      \
-    __typeof (*(mem)) __atg18_mask = (mask);				      \
-									      \
-    do									      \
-      __atg18_old = (*__atg18_memp);					      \
-    while (__builtin_expect						      \
-	   (catomic_compare_and_exchange_bool_acq (__atg18_memp,	      \
-						   __atg18_old | __atg18_mask,\
-						   __atg18_old), 0));	      \
-  } while (0)
-#endif
-
 /* Atomically *mem |= mask and return the old value of *mem.  */
 #ifndef atomic_or_val
 # define atomic_or_val(mem, mask) \
diff --git a/malloc/arena.c b/malloc/arena.c
index 36786530edafe02cab0177b6afb8bcbe32323a96..3826caa04d99bf6bcf33dae46aefffca7607a33c 100644
--- a/malloc/arena.c
+++ b/malloc/arena.c
@@ -950,11 +950,11 @@  arena_get2 (size_t size, mstate avoid_arena)
          enough address space to create that many arenas.  */
       if (__glibc_unlikely (n <= narenas_limit - 1))
         {
-          if (catomic_compare_and_exchange_bool_acq (&narenas, n + 1, n))
+          if (atomic_compare_and_exchange_bool_acq (&narenas, n + 1, n))
             goto repeat;
           a = _int_new_arena (size);
 	  if (__glibc_unlikely (a == NULL))
-            catomic_decrement (&narenas);
+            atomic_fetch_add_relaxed (&narenas, -1);
         }
       else
         a = reused_arena (avoid_arena);
diff --git a/malloc/malloc.c b/malloc/malloc.c
index 953183e9567b65b5e1f308f1fe8c9323c0fc2e1f..4059f856b5b9cd6490e67cef38760733592fb5f6 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -3808,7 +3808,7 @@  _int_malloc (mstate av, size_t bytes)
       if (__glibc_unlikely (pp != NULL && misaligned_chunk (pp)))       \
 	malloc_printerr ("malloc(): unaligned fastbin chunk detected"); \
     }							\
-  while ((pp = catomic_compare_and_exchange_val_acq (fb, pp, victim)) \
+  while ((pp = atomic_compare_and_exchange_val_acq (fb, pp, victim)) \
 	 != victim);					\
 
   if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
@@ -4526,7 +4526,7 @@  _int_free (mstate av, mchunkptr p, int have_lock)
 	  old2 = old;
 	  p->fd = PROTECT_PTR (&p->fd, old);
 	}
-      while ((old = catomic_compare_and_exchange_val_rel (fb, p, old2))
+      while ((old = atomic_compare_and_exchange_val_rel (fb, p, old2))
 	     != old2);
 
     /* Check that size of fastbin chunk at the top is the same as
diff --git a/malloc/memusage.c b/malloc/memusage.c
index f30906dffb2731c104ea375af48f59c65bcc7c9c..74712834fa8b96fb2d9589d34b34ab07d05a84ca 100644
--- a/malloc/memusage.c
+++ b/malloc/memusage.c
@@ -148,8 +148,8 @@  update_data (struct header *result, size_t len, size_t old_len)
 
   /* Compute current heap usage and compare it with the maximum value.  */
   size_t heap
-    = catomic_exchange_and_add (&current_heap, len - old_len) + len - old_len;
-  catomic_max (&peak_heap, heap);
+    = atomic_fetch_add_relaxed (&current_heap, len - old_len) + len - old_len;
+  atomic_max (&peak_heap, heap);
 
   /* Compute current stack usage and compare it with the maximum
      value.  The base stack pointer might not be set if this is not
@@ -172,15 +172,15 @@  update_data (struct header *result, size_t len, size_t old_len)
     start_sp = sp;
   size_t current_stack = start_sp - sp;
 #endif
-  catomic_max (&peak_stack, current_stack);
+  atomic_max (&peak_stack, current_stack);
 
   /* Add up heap and stack usage and compare it with the maximum value.  */
-  catomic_max (&peak_total, heap + current_stack);
+  atomic_max (&peak_total, heap + current_stack);
 
   /* Store the value only if we are writing to a file.  */
   if (fd != -1)
     {
-      uint32_t idx = catomic_exchange_and_add (&buffer_cnt, 1);
+      uint32_t idx = atomic_fetch_add_relaxed (&buffer_cnt, 1);
       if (idx + 1 >= 2 * buffer_size)
         {
           /* We try to reset the counter to the correct range.  If
@@ -188,7 +188,7 @@  update_data (struct header *result, size_t len, size_t old_len)
              counter it does not matter since that thread will take
              care of the correction.  */
           uint32_t reset = (idx + 1) % (2 * buffer_size);
-          catomic_compare_and_exchange_val_acq (&buffer_cnt, reset, idx + 1);
+          atomic_compare_and_exchange_val_acq (&buffer_cnt, reset, idx + 1);
           if (idx >= 2 * buffer_size)
             idx = reset - 1;
         }
@@ -362,24 +362,24 @@  malloc (size_t len)
     return (*mallocp)(len);
 
   /* Keep track of number of calls.  */
-  catomic_increment (&calls[idx_malloc]);
+  atomic_fetch_add_relaxed (&calls[idx_malloc], 1);
   /* Keep track of total memory consumption for `malloc'.  */
-  catomic_add (&total[idx_malloc], len);
+  atomic_fetch_add_relaxed (&total[idx_malloc], len);
   /* Keep track of total memory requirement.  */
-  catomic_add (&grand_total, len);
+  atomic_fetch_add_relaxed (&grand_total, len);
   /* Remember the size of the request.  */
   if (len < 65536)
-    catomic_increment (&histogram[len / 16]);
+    atomic_fetch_add_relaxed (&histogram[len / 16], 1);
   else
-    catomic_increment (&large);
+    atomic_fetch_add_relaxed (&large, 1);
   /* Total number of calls of any of the functions.  */
-  catomic_increment (&calls_total);
+  atomic_fetch_add_relaxed (&calls_total, 1);
 
   /* Do the real work.  */
   result = (struct header *) (*mallocp)(len + sizeof (struct header));
   if (result == NULL)
     {
-      catomic_increment (&failed[idx_malloc]);
+      atomic_fetch_add_relaxed (&failed[idx_malloc], 1);
       return NULL;
     }
 
@@ -430,21 +430,21 @@  realloc (void *old, size_t len)
     }
 
   /* Keep track of number of calls.  */
-  catomic_increment (&calls[idx_realloc]);
+  atomic_fetch_add_relaxed (&calls[idx_realloc], 1);
   if (len > old_len)
     {
       /* Keep track of total memory consumption for `realloc'.  */
-      catomic_add (&total[idx_realloc], len - old_len);
+      atomic_fetch_add_relaxed (&total[idx_realloc], len - old_len);
       /* Keep track of total memory requirement.  */
-      catomic_add (&grand_total, len - old_len);
+      atomic_fetch_add_relaxed (&grand_total, len - old_len);
     }
 
   if (len == 0 && old != NULL)
     {
       /* Special case.  */
-      catomic_increment (&realloc_free);
+      atomic_fetch_add_relaxed (&realloc_free, 1);
       /* Keep track of total memory freed using `free'.  */
-      catomic_add (&total[idx_free], real->length);
+      atomic_fetch_add_relaxed (&total[idx_free], real->length);
 
       /* Update the allocation data and write out the records if necessary.  */
       update_data (NULL, 0, old_len);
@@ -457,26 +457,26 @@  realloc (void *old, size_t len)
 
   /* Remember the size of the request.  */
   if (len < 65536)
-    catomic_increment (&histogram[len / 16]);
+    atomic_fetch_add_relaxed (&histogram[len / 16], 1);
   else
-    catomic_increment (&large);
+    atomic_fetch_add_relaxed (&large, 1);
   /* Total number of calls of any of the functions.  */
-  catomic_increment (&calls_total);
+  atomic_fetch_add_relaxed (&calls_total, 1);
 
   /* Do the real work.  */
   result = (struct header *) (*reallocp)(real, len + sizeof (struct header));
   if (result == NULL)
     {
-      catomic_increment (&failed[idx_realloc]);
+      atomic_fetch_add_relaxed (&failed[idx_realloc], 1);
       return NULL;
     }
 
   /* Record whether the reduction/increase happened in place.  */
   if (real == result)
-    catomic_increment (&inplace);
+    atomic_fetch_add_relaxed (&inplace, 1);
   /* Was the buffer increased?  */
   if (old_len > len)
-    catomic_increment (&decreasing);
+    atomic_fetch_add_relaxed (&decreasing, 1);
 
   /* Update the allocation data and write out the records if necessary.  */
   update_data (result, len, old_len);
@@ -508,16 +508,16 @@  calloc (size_t n, size_t len)
     return (*callocp)(n, len);
 
   /* Keep track of number of calls.  */
-  catomic_increment (&calls[idx_calloc]);
+  atomic_fetch_add_relaxed (&calls[idx_calloc], 1);
   /* Keep track of total memory consumption for `calloc'.  */
-  catomic_add (&total[idx_calloc], size);
+  atomic_fetch_add_relaxed (&total[idx_calloc], size);
   /* Keep track of total memory requirement.  */
-  catomic_add (&grand_total, size);
+  atomic_fetch_add_relaxed (&grand_total, size);
   /* Remember the size of the request.  */
   if (size < 65536)
-    catomic_increment (&histogram[size / 16]);
+    atomic_fetch_add_relaxed (&histogram[size / 16], 1);
   else
-    catomic_increment (&large);
+    atomic_fetch_add_relaxed (&large, 1);
   /* Total number of calls of any of the functions.  */
   ++calls_total;
 
@@ -525,7 +525,7 @@  calloc (size_t n, size_t len)
   result = (struct header *) (*mallocp)(size + sizeof (struct header));
   if (result == NULL)
     {
-      catomic_increment (&failed[idx_calloc]);
+      atomic_fetch_add_relaxed (&failed[idx_calloc], 1);
       return NULL;
     }
 
@@ -563,7 +563,7 @@  free (void *ptr)
   /* `free (NULL)' has no effect.  */
   if (ptr == NULL)
     {
-      catomic_increment (&calls[idx_free]);
+      atomic_fetch_add_relaxed (&calls[idx_free], 1);
       return;
     }
 
@@ -577,9 +577,9 @@  free (void *ptr)
     }
 
   /* Keep track of number of calls.  */
-  catomic_increment (&calls[idx_free]);
+  atomic_fetch_add_relaxed (&calls[idx_free], 1);
   /* Keep track of total memory freed using `free'.  */
-  catomic_add (&total[idx_free], real->length);
+  atomic_fetch_add_relaxed (&total[idx_free], real->length);
 
   /* Update the allocation data and write out the records if necessary.  */
   update_data (NULL, 0, real->length);
@@ -614,22 +614,22 @@  mmap (void *start, size_t len, int prot, int flags, int fd, off_t offset)
                  ? idx_mmap_a : prot & PROT_WRITE ? idx_mmap_w : idx_mmap_r);
 
       /* Keep track of number of calls.  */
-      catomic_increment (&calls[idx]);
+      atomic_fetch_add_relaxed (&calls[idx], 1);
       /* Keep track of total memory consumption for `malloc'.  */
-      catomic_add (&total[idx], len);
+      atomic_fetch_add_relaxed (&total[idx], len);
       /* Keep track of total memory requirement.  */
-      catomic_add (&grand_total, len);
+      atomic_fetch_add_relaxed (&grand_total, len);
       /* Remember the size of the request.  */
       if (len < 65536)
-        catomic_increment (&histogram[len / 16]);
+        atomic_fetch_add_relaxed (&histogram[len / 16], 1);
       else
-        catomic_increment (&large);
+        atomic_fetch_add_relaxed (&large, 1);
       /* Total number of calls of any of the functions.  */
-      catomic_increment (&calls_total);
+      atomic_fetch_add_relaxed (&calls_total, 1);
 
       /* Check for failures.  */
       if (result == NULL)
-        catomic_increment (&failed[idx]);
+        atomic_fetch_add_relaxed (&failed[idx], 1);
       else if (idx == idx_mmap_w)
         /* Update the allocation data and write out the records if
            necessary.  Note the first parameter is NULL which means
@@ -667,22 +667,22 @@  mmap64 (void *start, size_t len, int prot, int flags, int fd, off64_t offset)
                  ? idx_mmap_a : prot & PROT_WRITE ? idx_mmap_w : idx_mmap_r);
 
       /* Keep track of number of calls.  */
-      catomic_increment (&calls[idx]);
+      atomic_fetch_add_relaxed (&calls[idx], 1);
       /* Keep track of total memory consumption for `malloc'.  */
-      catomic_add (&total[idx], len);
+      atomic_fetch_add_relaxed (&total[idx], len);
       /* Keep track of total memory requirement.  */
-      catomic_add (&grand_total, len);
+      atomic_fetch_add_relaxed (&grand_total, len);
       /* Remember the size of the request.  */
       if (len < 65536)
-        catomic_increment (&histogram[len / 16]);
+        atomic_fetch_add_relaxed (&histogram[len / 16], 1);
       else
-        catomic_increment (&large);
+        atomic_fetch_add_relaxed (&large, 1);
       /* Total number of calls of any of the functions.  */
-      catomic_increment (&calls_total);
+      atomic_fetch_add_relaxed (&calls_total, 1);
 
       /* Check for failures.  */
       if (result == NULL)
-        catomic_increment (&failed[idx]);
+        atomic_fetch_add_relaxed (&failed[idx], 1);
       else if (idx == idx_mmap_w)
         /* Update the allocation data and write out the records if
            necessary.  Note the first parameter is NULL which means
@@ -722,33 +722,33 @@  mremap (void *start, size_t old_len, size_t len, int flags, ...)
   if (!not_me && trace_mmap)
     {
       /* Keep track of number of calls.  */
-      catomic_increment (&calls[idx_mremap]);
+      atomic_fetch_add_relaxed (&calls[idx_mremap], 1);
       if (len > old_len)
         {
           /* Keep track of total memory consumption for `malloc'.  */
-          catomic_add (&total[idx_mremap], len - old_len);
+          atomic_fetch_add_relaxed (&total[idx_mremap], len - old_len);
           /* Keep track of total memory requirement.  */
-          catomic_add (&grand_total, len - old_len);
+          atomic_fetch_add_relaxed (&grand_total, len - old_len);
         }
       /* Remember the size of the request.  */
       if (len < 65536)
-        catomic_increment (&histogram[len / 16]);
+        atomic_fetch_add_relaxed (&histogram[len / 16], 1);
       else
-        catomic_increment (&large);
+        atomic_fetch_add_relaxed (&large, 1);
       /* Total number of calls of any of the functions.  */
-      catomic_increment (&calls_total);
+      atomic_fetch_add_relaxed (&calls_total, 1);
 
       /* Check for failures.  */
       if (result == NULL)
-        catomic_increment (&failed[idx_mremap]);
+        atomic_fetch_add_relaxed (&failed[idx_mremap], 1);
       else
         {
           /* Record whether the reduction/increase happened in place.  */
           if (start == result)
-            catomic_increment (&inplace_mremap);
+            atomic_fetch_add_relaxed (&inplace_mremap, 1);
           /* Was the buffer increased?  */
           if (old_len > len)
-            catomic_increment (&decreasing_mremap);
+            atomic_fetch_add_relaxed (&decreasing_mremap, 1);
 
           /* Update the allocation data and write out the records if
              necessary.  Note the first parameter is NULL which means
@@ -783,19 +783,19 @@  munmap (void *start, size_t len)
   if (!not_me && trace_mmap)
     {
       /* Keep track of number of calls.  */
-      catomic_increment (&calls[idx_munmap]);
+      atomic_fetch_add_relaxed (&calls[idx_munmap], 1);
 
       if (__glibc_likely (result == 0))
         {
           /* Keep track of total memory freed using `free'.  */
-          catomic_add (&total[idx_munmap], len);
+          atomic_fetch_add_relaxed (&total[idx_munmap], len);
 
           /* Update the allocation data and write out the records if
              necessary.  */
           update_data (NULL, 0, len);
         }
       else
-        catomic_increment (&failed[idx_munmap]);
+        atomic_fetch_add_relaxed (&failed[idx_munmap], 1);
     }
 
   return result;
diff --git a/manual/memory.texi b/manual/memory.texi
index f69824841e5e352e8562f87b6c5dd4d96424f59f..2dfd09ea4aace004067e2e1d51c9e1292d1f7452 100644
--- a/manual/memory.texi
+++ b/manual/memory.texi
@@ -354,7 +354,7 @@  this function is in @file{stdlib.h}.
 @c that's protected by list_lock; next_free is only modified while
 @c list_lock is held too.  All other data members of an arena, as well
 @c as the metadata of the memory areas assigned to it, are only modified
-@c while holding the arena's mutex (fastbin pointers use catomic ops
+@c while holding the arena's mutex (fastbin pointers use atomic ops
 @c because they may be modified by free without taking the arena's
 @c lock).  Some reassurance was needed for fastbins, for it wasn't clear
 @c how they were initialized.  It turns out they are always
@@ -383,7 +383,7 @@  this function is in @file{stdlib.h}.
 @c     mutex_lock (arena lock) dup @asulock @aculock [returns locked]
 @c    __get_nprocs ext ok @acsfd
 @c    NARENAS_FROM_NCORES ok
-@c    catomic_compare_and_exchange_bool_acq ok
+@c    atomic_compare_and_exchange_bool_acq ok
 @c    _int_new_arena ok @asulock @aculock @acsmem
 @c     new_heap ok @acsmem
 @c      mmap ok @acsmem
@@ -397,7 +397,7 @@  this function is in @file{stdlib.h}.
 @c     mutex_lock (list_lock) dup @asulock @aculock
 @c     atomic_thread_fence_release ok
 @c     mutex_unlock (list_lock) @aculock
-@c    catomic_decrement ok
+@c    atomic_fetch_add_relaxed ok
 @c    reused_arena @asulock @aculock
 @c      reads&writes next_to_use and iterates over arena next without guards
 @c      those are harmless as long as we don't drop arenas from the
@@ -414,7 +414,7 @@  this function is in @file{stdlib.h}.
 @c   get_max_fast ok
 @c   fastbin_index ok
 @c   fastbin ok
-@c   catomic_compare_and_exhange_val_acq ok
+@c   atomic_compare_and_exhange_val_acq ok
 @c   malloc_printerr dup @mtsenv
 @c     if we get to it, we're toast already, undefined behavior must have
 @c     been invoked before
@@ -521,10 +521,10 @@  this function is in @file{stdlib.h}.
 @c     chunk2mem dup ok
 @c     free_perturb ok
 @c     set_fastchunks ok
-@c      catomic_and ok
+@c      atomic_and ok
 @c     fastbin_index dup ok
 @c     fastbin dup ok
-@c     catomic_compare_and_exchange_val_rel ok
+@c     atomic_compare_and_exchange_val_rel ok
 @c     chunk_is_mmapped ok
 @c     contiguous dup ok
 @c     prev_inuse ok
@@ -706,7 +706,7 @@  The prototype for this function is in @file{stdlib.h}.
 @safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}}
 @c __libc_free @asulock @aculock @acsfd @acsmem
 @c   releasing memory into fastbins modifies the arena without taking
-@c   its mutex, but catomic operations ensure safety.  If two (or more)
+@c   its mutex, but atomic operations ensure safety.  If two (or more)
 @c   threads are running malloc and have their own arenas locked when
 @c   each gets a signal whose handler free()s large (non-fastbin-able)
 @c   blocks from each other's arena, we deadlock; this is a more general
diff --git a/misc/tst-atomic.c b/misc/tst-atomic.c
index 6d681a7bfdf4f48b4c04a073ebd480326dbd3cc8..4f9d2c1a46b363d346dbc2fa0962ae196844a43a 100644
--- a/misc/tst-atomic.c
+++ b/misc/tst-atomic.c
@@ -393,117 +393,6 @@  do_test (void)
     }
 #endif
 
-#ifdef catomic_compare_and_exchange_val_acq
-  mem = 24;
-  if (catomic_compare_and_exchange_val_acq (&mem, 35, 24) != 24
-      || mem != 35)
-    {
-      puts ("catomic_compare_and_exchange_val_acq test 1 failed");
-      ret = 1;
-    }
-
-  mem = 12;
-  if (catomic_compare_and_exchange_val_acq (&mem, 10, 15) != 12
-      || mem != 12)
-    {
-      puts ("catomic_compare_and_exchange_val_acq test 2 failed");
-      ret = 1;
-    }
-
-  mem = -15;
-  if (catomic_compare_and_exchange_val_acq (&mem, -56, -15) != -15
-      || mem != -56)
-    {
-      puts ("catomic_compare_and_exchange_val_acq test 3 failed");
-      ret = 1;
-    }
-
-  mem = -1;
-  if (catomic_compare_and_exchange_val_acq (&mem, 17, 0) != -1
-      || mem != -1)
-    {
-      puts ("catomic_compare_and_exchange_val_acq test 4 failed");
-      ret = 1;
-    }
-#endif
-
-  mem = 24;
-  if (catomic_compare_and_exchange_bool_acq (&mem, 35, 24)
-      || mem != 35)
-    {
-      puts ("catomic_compare_and_exchange_bool_acq test 1 failed");
-      ret = 1;
-    }
-
-  mem = 12;
-  if (! catomic_compare_and_exchange_bool_acq (&mem, 10, 15)
-      || mem != 12)
-    {
-      puts ("catomic_compare_and_exchange_bool_acq test 2 failed");
-      ret = 1;
-    }
-
-  mem = -15;
-  if (catomic_compare_and_exchange_bool_acq (&mem, -56, -15)
-      || mem != -56)
-    {
-      puts ("catomic_compare_and_exchange_bool_acq test 3 failed");
-      ret = 1;
-    }
-
-  mem = -1;
-  if (! catomic_compare_and_exchange_bool_acq (&mem, 17, 0)
-      || mem != -1)
-    {
-      puts ("catomic_compare_and_exchange_bool_acq test 4 failed");
-      ret = 1;
-    }
-
-  mem = 2;
-  if (catomic_exchange_and_add (&mem, 11) != 2
-      || mem != 13)
-    {
-      puts ("catomic_exchange_and_add test failed");
-      ret = 1;
-    }
-
-  mem = -21;
-  catomic_add (&mem, 22);
-  if (mem != 1)
-    {
-      puts ("catomic_add test failed");
-      ret = 1;
-    }
-
-  mem = -1;
-  catomic_increment (&mem);
-  if (mem != 0)
-    {
-      puts ("catomic_increment test failed");
-      ret = 1;
-    }
-
-  mem = 2;
-  if (catomic_increment_val (&mem) != 3)
-    {
-      puts ("catomic_increment_val test failed");
-      ret = 1;
-    }
-
-  mem = 17;
-  catomic_decrement (&mem);
-  if (mem != 16)
-    {
-      puts ("catomic_decrement test failed");
-      ret = 1;
-    }
-
-  if (catomic_decrement_val (&mem) != 15)
-    {
-      puts ("catomic_decrement_val test failed");
-      ret = 1;
-    }
-
   /* Tests for C11-like atomics.  */
   mem = 11;
   if (atomic_load_relaxed (&mem) != 11 || atomic_load_acquire (&mem) != 11)
diff --git a/sysdeps/hppa/dl-fptr.c b/sysdeps/hppa/dl-fptr.c
index 97882a8106a719642b2778d3ca8a69ef202cce2d..0562467d6f79f76b78b2cf169fdd059a993296d3 100644
--- a/sysdeps/hppa/dl-fptr.c
+++ b/sysdeps/hppa/dl-fptr.c
@@ -41,10 +41,8 @@ 
 # error "ELF_MACHINE_LOAD_ADDRESS is not defined."
 #endif
 
-#ifndef COMPARE_AND_SWAP
-# define COMPARE_AND_SWAP(ptr, old, new) \
-  (catomic_compare_and_exchange_bool_acq (ptr, new, old) == 0)
-#endif
+#define COMPARE_AND_SWAP(ptr, old, new) \
+  (atomic_compare_and_exchange_bool_acq (ptr, new, old) == 0)
 
 ElfW(Addr) _dl_boot_fptr_table [ELF_MACHINE_BOOT_FPTR_TABLE_LEN];