malloc: Use C11 like atomics on memusage

Message ID 20220831181435.3875859-1-adhemerval.zanella@linaro.org
State Superseded
Headers
Series malloc: Use C11 like atomics on memusage |

Commit Message

Adhemerval Zanella Netto Aug. 31, 2022, 6:14 p.m. UTC
  Checked on x86_64-linux-gnu.
---
 malloc/memusage.c | 132 +++++++++++++++++++++++++---------------------
 1 file changed, 73 insertions(+), 59 deletions(-)
  

Comments

Adhemerval Zanella Netto Feb. 14, 2023, 6:47 p.m. UTC | #1
Ping.

On 31/08/22 15:14, Adhemerval Zanella wrote:
> Checked on x86_64-linux-gnu.
> ---
>  malloc/memusage.c | 132 +++++++++++++++++++++++++---------------------
>  1 file changed, 73 insertions(+), 59 deletions(-)
> 
> diff --git a/malloc/memusage.c b/malloc/memusage.c
> index f30906dffb..ddc487422c 100644
> --- a/malloc/memusage.c
> +++ b/malloc/memusage.c
> @@ -134,6 +134,19 @@ gettime (struct entry *e)
>  #endif
>  }
>  
> +static inline void
> +peak_atomic_max (size_t *peak, size_t val)
> +{
> +  size_t v;
> +  do
> +    {
> +      v = atomic_load_relaxed (peak);
> +      if (v >= val)
> +	break;
> +    }
> +  while (! atomic_compare_exchange_weak_acquire (peak, &v, val));
> +}
> +
>  /* Update the global data after a successful function call.  */
>  static void
>  update_data (struct header *result, size_t len, size_t old_len)
> @@ -148,8 +161,8 @@ update_data (struct header *result, size_t len, size_t old_len)
>  
>    /* Compute current heap usage and compare it with the maximum value.  */
>    size_t heap
> -    = catomic_exchange_and_add (&current_heap, len - old_len) + len - old_len;
> -  catomic_max (&peak_heap, heap);
> +    = atomic_fetch_add_acquire (&current_heap, len - old_len) + len - old_len;
> +  peak_atomic_max (&peak_heap, heap);
>  
>    /* Compute current stack usage and compare it with the maximum
>       value.  The base stack pointer might not be set if this is not
> @@ -172,15 +185,15 @@ update_data (struct header *result, size_t len, size_t old_len)
>      start_sp = sp;
>    size_t current_stack = start_sp - sp;
>  #endif
> -  catomic_max (&peak_stack, current_stack);
> +  peak_atomic_max (&peak_stack, current_stack);
>  
>    /* Add up heap and stack usage and compare it with the maximum value.  */
> -  catomic_max (&peak_total, heap + current_stack);
> +  peak_atomic_max (&peak_total, heap + current_stack);
>  
>    /* Store the value only if we are writing to a file.  */
>    if (fd != -1)
>      {
> -      uint32_t idx = catomic_exchange_and_add (&buffer_cnt, 1);
> +      uint32_t idx = atomic_fetch_add_acquire (&buffer_cnt, 1);
>        if (idx + 1 >= 2 * buffer_size)
>          {
>            /* We try to reset the counter to the correct range.  If
> @@ -188,7 +201,8 @@ update_data (struct header *result, size_t len, size_t old_len)
>               counter it does not matter since that thread will take
>               care of the correction.  */
>            uint32_t reset = (idx + 1) % (2 * buffer_size);
> -          catomic_compare_and_exchange_val_acq (&buffer_cnt, reset, idx + 1);
> +	  uint32_t expected = idx + 1;
> +	  atomic_compare_exchange_weak_acquire (&buffer_cnt, &expected, reset);
>            if (idx >= 2 * buffer_size)
>              idx = reset - 1;
>          }
> @@ -362,24 +376,24 @@ malloc (size_t len)
>      return (*mallocp)(len);
>  
>    /* Keep track of number of calls.  */
> -  catomic_increment (&calls[idx_malloc]);
> +  atomic_fetch_add_acquire (&calls[idx_malloc], 1);
>    /* Keep track of total memory consumption for `malloc'.  */
> -  catomic_add (&total[idx_malloc], len);
> +  atomic_fetch_add_acquire (&total[idx_malloc], len);
>    /* Keep track of total memory requirement.  */
> -  catomic_add (&grand_total, len);
> +  atomic_fetch_add_acquire (&grand_total, len);
>    /* Remember the size of the request.  */
>    if (len < 65536)
> -    catomic_increment (&histogram[len / 16]);
> +    atomic_fetch_add_acquire (&histogram[len / 16], 1);
>    else
> -    catomic_increment (&large);
> +    atomic_fetch_add_acquire (&large, 1);
>    /* Total number of calls of any of the functions.  */
> -  catomic_increment (&calls_total);
> +  atomic_fetch_add_acquire (&calls_total, 1);
>  
>    /* Do the real work.  */
>    result = (struct header *) (*mallocp)(len + sizeof (struct header));
>    if (result == NULL)
>      {
> -      catomic_increment (&failed[idx_malloc]);
> +      atomic_fetch_add_acquire (&failed[idx_malloc], 1);
>        return NULL;
>      }
>  
> @@ -430,21 +444,21 @@ realloc (void *old, size_t len)
>      }
>  
>    /* Keep track of number of calls.  */
> -  catomic_increment (&calls[idx_realloc]);
> +  atomic_fetch_add_acquire (&calls[idx_realloc], 1);
>    if (len > old_len)
>      {
>        /* Keep track of total memory consumption for `realloc'.  */
> -      catomic_add (&total[idx_realloc], len - old_len);
> +      atomic_fetch_add_acquire (&total[idx_realloc], len - old_len);
>        /* Keep track of total memory requirement.  */
> -      catomic_add (&grand_total, len - old_len);
> +      atomic_fetch_add_acquire (&grand_total, len - old_len);
>      }
>  
>    if (len == 0 && old != NULL)
>      {
>        /* Special case.  */
> -      catomic_increment (&realloc_free);
> +      atomic_fetch_add_acquire (&realloc_free, 1);
>        /* Keep track of total memory freed using `free'.  */
> -      catomic_add (&total[idx_free], real->length);
> +      atomic_fetch_add_acquire (&total[idx_free], real->length);
>  
>        /* Update the allocation data and write out the records if necessary.  */
>        update_data (NULL, 0, old_len);
> @@ -457,26 +471,26 @@ realloc (void *old, size_t len)
>  
>    /* Remember the size of the request.  */
>    if (len < 65536)
> -    catomic_increment (&histogram[len / 16]);
> +    atomic_fetch_add_acquire (&histogram[len / 16], 1);
>    else
> -    catomic_increment (&large);
> +    atomic_fetch_add_acquire (&large, 1);
>    /* Total number of calls of any of the functions.  */
> -  catomic_increment (&calls_total);
> +  atomic_fetch_add_acquire (&calls_total, 1);
>  
>    /* Do the real work.  */
>    result = (struct header *) (*reallocp)(real, len + sizeof (struct header));
>    if (result == NULL)
>      {
> -      catomic_increment (&failed[idx_realloc]);
> +      atomic_fetch_add_acquire (&failed[idx_realloc], 1);
>        return NULL;
>      }
>  
>    /* Record whether the reduction/increase happened in place.  */
>    if (real == result)
> -    catomic_increment (&inplace);
> +    atomic_fetch_add_acquire (&inplace, 1);
>    /* Was the buffer increased?  */
>    if (old_len > len)
> -    catomic_increment (&decreasing);
> +    atomic_fetch_add_acquire (&decreasing, 1);
>  
>    /* Update the allocation data and write out the records if necessary.  */
>    update_data (result, len, old_len);
> @@ -508,16 +522,16 @@ calloc (size_t n, size_t len)
>      return (*callocp)(n, len);
>  
>    /* Keep track of number of calls.  */
> -  catomic_increment (&calls[idx_calloc]);
> +  atomic_fetch_add_acquire (&calls[idx_calloc], 1);
>    /* Keep track of total memory consumption for `calloc'.  */
> -  catomic_add (&total[idx_calloc], size);
> +  atomic_fetch_add_acquire (&total[idx_calloc], size);
>    /* Keep track of total memory requirement.  */
> -  catomic_add (&grand_total, size);
> +  atomic_fetch_add_acquire (&grand_total, size);
>    /* Remember the size of the request.  */
>    if (size < 65536)
> -    catomic_increment (&histogram[size / 16]);
> +    atomic_fetch_add_acquire (&histogram[size / 16], 1);
>    else
> -    catomic_increment (&large);
> +    atomic_fetch_add_acquire (&large, 1);
>    /* Total number of calls of any of the functions.  */
>    ++calls_total;
>  
> @@ -525,7 +539,7 @@ calloc (size_t n, size_t len)
>    result = (struct header *) (*mallocp)(size + sizeof (struct header));
>    if (result == NULL)
>      {
> -      catomic_increment (&failed[idx_calloc]);
> +      atomic_fetch_add_acquire (&failed[idx_calloc], 1);
>        return NULL;
>      }
>  
> @@ -563,7 +577,7 @@ free (void *ptr)
>    /* `free (NULL)' has no effect.  */
>    if (ptr == NULL)
>      {
> -      catomic_increment (&calls[idx_free]);
> +      atomic_fetch_add_acquire (&calls[idx_free], 1);
>        return;
>      }
>  
> @@ -577,9 +591,9 @@ free (void *ptr)
>      }
>  
>    /* Keep track of number of calls.  */
> -  catomic_increment (&calls[idx_free]);
> +  atomic_fetch_add_acquire (&calls[idx_free], 1);
>    /* Keep track of total memory freed using `free'.  */
> -  catomic_add (&total[idx_free], real->length);
> +  atomic_fetch_add_acquire (&total[idx_free], real->length);
>  
>    /* Update the allocation data and write out the records if necessary.  */
>    update_data (NULL, 0, real->length);
> @@ -614,22 +628,22 @@ mmap (void *start, size_t len, int prot, int flags, int fd, off_t offset)
>                   ? idx_mmap_a : prot & PROT_WRITE ? idx_mmap_w : idx_mmap_r);
>  
>        /* Keep track of number of calls.  */
> -      catomic_increment (&calls[idx]);
> +      atomic_fetch_add_acquire (&calls[idx], 1);
>        /* Keep track of total memory consumption for `malloc'.  */
> -      catomic_add (&total[idx], len);
> +      atomic_fetch_add_acquire (&total[idx], len);
>        /* Keep track of total memory requirement.  */
> -      catomic_add (&grand_total, len);
> +      atomic_fetch_add_acquire (&grand_total, len);
>        /* Remember the size of the request.  */
>        if (len < 65536)
> -        catomic_increment (&histogram[len / 16]);
> +        atomic_fetch_add_acquire (&histogram[len / 16], 1);
>        else
> -        catomic_increment (&large);
> +        atomic_fetch_add_acquire (&large, 1);
>        /* Total number of calls of any of the functions.  */
> -      catomic_increment (&calls_total);
> +      atomic_fetch_add_acquire (&calls_total, 1);
>  
>        /* Check for failures.  */
>        if (result == NULL)
> -        catomic_increment (&failed[idx]);
> +        atomic_fetch_add_acquire (&failed[idx], 1);
>        else if (idx == idx_mmap_w)
>          /* Update the allocation data and write out the records if
>             necessary.  Note the first parameter is NULL which means
> @@ -667,22 +681,22 @@ mmap64 (void *start, size_t len, int prot, int flags, int fd, off64_t offset)
>                   ? idx_mmap_a : prot & PROT_WRITE ? idx_mmap_w : idx_mmap_r);
>  
>        /* Keep track of number of calls.  */
> -      catomic_increment (&calls[idx]);
> +      atomic_fetch_add_acquire (&calls[idx], 1);
>        /* Keep track of total memory consumption for `malloc'.  */
> -      catomic_add (&total[idx], len);
> +      atomic_fetch_add_acquire (&total[idx], len);
>        /* Keep track of total memory requirement.  */
> -      catomic_add (&grand_total, len);
> +      atomic_fetch_add_acquire (&grand_total, len);
>        /* Remember the size of the request.  */
>        if (len < 65536)
> -        catomic_increment (&histogram[len / 16]);
> +        atomic_fetch_add_acquire (&histogram[len / 16], 1);
>        else
> -        catomic_increment (&large);
> +        atomic_fetch_add_acquire (&large, 1);
>        /* Total number of calls of any of the functions.  */
> -      catomic_increment (&calls_total);
> +      atomic_fetch_add_acquire (&calls_total, 1);
>  
>        /* Check for failures.  */
>        if (result == NULL)
> -        catomic_increment (&failed[idx]);
> +        atomic_fetch_add_acquire (&failed[idx], 1);
>        else if (idx == idx_mmap_w)
>          /* Update the allocation data and write out the records if
>             necessary.  Note the first parameter is NULL which means
> @@ -722,33 +736,33 @@ mremap (void *start, size_t old_len, size_t len, int flags, ...)
>    if (!not_me && trace_mmap)
>      {
>        /* Keep track of number of calls.  */
> -      catomic_increment (&calls[idx_mremap]);
> +      atomic_fetch_add_acquire (&calls[idx_mremap], 1);
>        if (len > old_len)
>          {
>            /* Keep track of total memory consumption for `malloc'.  */
> -          catomic_add (&total[idx_mremap], len - old_len);
> +          atomic_fetch_add_acquire (&total[idx_mremap], len - old_len);
>            /* Keep track of total memory requirement.  */
> -          catomic_add (&grand_total, len - old_len);
> +          atomic_fetch_add_acquire (&grand_total, len - old_len);
>          }
>        /* Remember the size of the request.  */
>        if (len < 65536)
> -        catomic_increment (&histogram[len / 16]);
> +        atomic_fetch_add_acquire (&histogram[len / 16], 1);
>        else
> -        catomic_increment (&large);
> +        atomic_fetch_add_acquire (&large, 1);
>        /* Total number of calls of any of the functions.  */
> -      catomic_increment (&calls_total);
> +      atomic_fetch_add_acquire (&calls_total, 1);
>  
>        /* Check for failures.  */
>        if (result == NULL)
> -        catomic_increment (&failed[idx_mremap]);
> +        atomic_fetch_add_acquire (&failed[idx_mremap], 1);
>        else
>          {
>            /* Record whether the reduction/increase happened in place.  */
>            if (start == result)
> -            catomic_increment (&inplace_mremap);
> +            atomic_fetch_add_acquire (&inplace_mremap, 1);
>            /* Was the buffer increased?  */
>            if (old_len > len)
> -            catomic_increment (&decreasing_mremap);
> +            atomic_fetch_add_acquire (&decreasing_mremap, 1);
>  
>            /* Update the allocation data and write out the records if
>               necessary.  Note the first parameter is NULL which means
> @@ -783,19 +797,19 @@ munmap (void *start, size_t len)
>    if (!not_me && trace_mmap)
>      {
>        /* Keep track of number of calls.  */
> -      catomic_increment (&calls[idx_munmap]);
> +      atomic_fetch_add_acquire (&calls[idx_munmap], 1);
>  
>        if (__glibc_likely (result == 0))
>          {
>            /* Keep track of total memory freed using `free'.  */
> -          catomic_add (&total[idx_munmap], len);
> +          atomic_fetch_add_acquire (&total[idx_munmap], len);
>  
>            /* Update the allocation data and write out the records if
>               necessary.  */
>            update_data (NULL, 0, len);
>          }
>        else
> -        catomic_increment (&failed[idx_munmap]);
> +        atomic_fetch_add_acquire (&failed[idx_munmap], 1);
>      }
>  
>    return result;
  
DJ Delorie Feb. 23, 2023, 4:20 a.m. UTC | #2
LGTM.

Reviewed-by: DJ Delorie <dj@redhat.com>

Adhemerval Zanella via Libc-alpha <libc-alpha@sourceware.org> writes:
> +static inline void
> +peak_atomic_max (size_t *peak, size_t val)
> +{
> +  size_t v;
> +  do
> +    {
> +      v = atomic_load_relaxed (peak);
> +      if (v >= val)
> +	break;
> +    }
> +  while (! atomic_compare_exchange_weak_acquire (peak, &v, val));
> +}
> +

This is the only call without a direct replacement.  This inline
replicates what <atomic.h> does.  Ok.

> -    = catomic_exchange_and_add (&current_heap, len - old_len) + len - old_len;
> -  catomic_max (&peak_heap, heap);
> +    = atomic_fetch_add_acquire (&current_heap, len - old_len) + len - old_len;
> +  peak_atomic_max (&peak_heap, heap);

Ok.

> -  catomic_max (&peak_stack, current_stack);
> +  peak_atomic_max (&peak_stack, current_stack);

Ok.

> -  catomic_max (&peak_total, heap + current_stack);
> +  peak_atomic_max (&peak_total, heap + current_stack);

Ok.

> -      uint32_t idx = catomic_exchange_and_add (&buffer_cnt, 1);
> +      uint32_t idx = atomic_fetch_add_acquire (&buffer_cnt, 1);

Ok.

> -          catomic_compare_and_exchange_val_acq (&buffer_cnt, reset, idx + 1);
> +	  uint32_t expected = idx + 1;
> +	  atomic_compare_exchange_weak_acquire (&buffer_cnt, &expected, reset);

Ok.

> -  catomic_increment (&calls[idx_malloc]);
> +  atomic_fetch_add_acquire (&calls[idx_malloc], 1);

Ok.

> -  catomic_add (&total[idx_malloc], len);
> +  atomic_fetch_add_acquire (&total[idx_malloc], len);

Ok.

> -  catomic_add (&grand_total, len);
> +  atomic_fetch_add_acquire (&grand_total, len);

Ok.

>    if (len < 65536)
> -    catomic_increment (&histogram[len / 16]);
> +    atomic_fetch_add_acquire (&histogram[len / 16], 1);
>    else
> -    catomic_increment (&large);
> +    atomic_fetch_add_acquire (&large, 1);

Ok.

> -  catomic_increment (&calls_total);
> +  atomic_fetch_add_acquire (&calls_total, 1);

Ok.

> -      catomic_increment (&failed[idx_malloc]);
> +      atomic_fetch_add_acquire (&failed[idx_malloc], 1);

Ok.

> -  catomic_increment (&calls[idx_realloc]);
> +  atomic_fetch_add_acquire (&calls[idx_realloc], 1);

Ok.

> -      catomic_add (&total[idx_realloc], len - old_len);
> +      atomic_fetch_add_acquire (&total[idx_realloc], len - old_len);

Ok.

> -      catomic_add (&grand_total, len - old_len);
> +      atomic_fetch_add_acquire (&grand_total, len - old_len);

Ok.

> -      catomic_increment (&realloc_free);
> +      atomic_fetch_add_acquire (&realloc_free, 1);

Ok.

> -      catomic_add (&total[idx_free], real->length);
> +      atomic_fetch_add_acquire (&total[idx_free], real->length);

Ok.

>    if (len < 65536)
> -    catomic_increment (&histogram[len / 16]);
> +    atomic_fetch_add_acquire (&histogram[len / 16], 1);
>    else
> -    catomic_increment (&large);
> +    atomic_fetch_add_acquire (&large, 1);

Ok.

> -  catomic_increment (&calls_total);
> +  atomic_fetch_add_acquire (&calls_total, 1);

Ok.

> -      catomic_increment (&failed[idx_realloc]);
> +      atomic_fetch_add_acquire (&failed[idx_realloc], 1);

Ok.

> -    catomic_increment (&inplace);
> +    atomic_fetch_add_acquire (&inplace, 1);

Ok.

> -    catomic_increment (&decreasing);
> +    atomic_fetch_add_acquire (&decreasing, 1);

Ok.

> -  catomic_increment (&calls[idx_calloc]);
> +  atomic_fetch_add_acquire (&calls[idx_calloc], 1);

Ok.

> -  catomic_add (&total[idx_calloc], size);
> +  atomic_fetch_add_acquire (&total[idx_calloc], size);

Ok.

> -  catomic_add (&grand_total, size);
> +  atomic_fetch_add_acquire (&grand_total, size);

Ok.

>    if (size < 65536)
> -    catomic_increment (&histogram[size / 16]);
> +    atomic_fetch_add_acquire (&histogram[size / 16], 1);
>    else
> -    catomic_increment (&large);
> +    atomic_fetch_add_acquire (&large, 1);

Ok.

> -      catomic_increment (&failed[idx_calloc]);
> +      atomic_fetch_add_acquire (&failed[idx_calloc], 1);

Ok.

> -      catomic_increment (&calls[idx_free]);
> +      atomic_fetch_add_acquire (&calls[idx_free], 1);

Ok.

> -  catomic_increment (&calls[idx_free]);
> +  atomic_fetch_add_acquire (&calls[idx_free], 1);

Ok.

> -  catomic_add (&total[idx_free], real->length);
> +  atomic_fetch_add_acquire (&total[idx_free], real->length);

Ok.

> -      catomic_increment (&calls[idx]);
> +      atomic_fetch_add_acquire (&calls[idx], 1);

Ok.

> -      catomic_add (&total[idx], len);
> +      atomic_fetch_add_acquire (&total[idx], len);

Ok.

> -      catomic_add (&grand_total, len);
> +      atomic_fetch_add_acquire (&grand_total, len);

Ok.

>        if (len < 65536)
> -        catomic_increment (&histogram[len / 16]);
> +        atomic_fetch_add_acquire (&histogram[len / 16], 1);
>        else
> -        catomic_increment (&large);
> +        atomic_fetch_add_acquire (&large, 1);

Ok.

> -      catomic_increment (&calls_total);
> +      atomic_fetch_add_acquire (&calls_total, 1);

Ok.

> -        catomic_increment (&failed[idx]);
> +        atomic_fetch_add_acquire (&failed[idx], 1);

Ok.

> -      catomic_increment (&calls[idx]);
> +      atomic_fetch_add_acquire (&calls[idx], 1);

Ok.

> -      catomic_add (&total[idx], len);
> +      atomic_fetch_add_acquire (&total[idx], len);

Ok.

> -      catomic_add (&grand_total, len);
> +      atomic_fetch_add_acquire (&grand_total, len);

Ok.

>        if (len < 65536)
> -        catomic_increment (&histogram[len / 16]);
> +        atomic_fetch_add_acquire (&histogram[len / 16], 1);
>        else
> -        catomic_increment (&large);
> +        atomic_fetch_add_acquire (&large, 1);

Ok.

> -      catomic_increment (&calls_total);
> +      atomic_fetch_add_acquire (&calls_total, 1);

Ok.

> -        catomic_increment (&failed[idx]);
> +        atomic_fetch_add_acquire (&failed[idx], 1);

Ok.

> -      catomic_increment (&calls[idx_mremap]);
> +      atomic_fetch_add_acquire (&calls[idx_mremap], 1);

Ok.

> -          catomic_add (&total[idx_mremap], len - old_len);
> +          atomic_fetch_add_acquire (&total[idx_mremap], len - old_len);

Ok.

> -          catomic_add (&grand_total, len - old_len);
> +          atomic_fetch_add_acquire (&grand_total, len - old_len);

Ok.


>        if (len < 65536)
> -        catomic_increment (&histogram[len / 16]);
> +        atomic_fetch_add_acquire (&histogram[len / 16], 1);
>        else
> -        catomic_increment (&large);
> +        atomic_fetch_add_acquire (&large, 1);

Ok.

> -      catomic_increment (&calls_total);
> +      atomic_fetch_add_acquire (&calls_total, 1);

Ok.

> -        catomic_increment (&failed[idx_mremap]);
> +        atomic_fetch_add_acquire (&failed[idx_mremap], 1);

Ok.

> -            catomic_increment (&inplace_mremap);
> +            atomic_fetch_add_acquire (&inplace_mremap, 1);

Ok.

> -            catomic_increment (&decreasing_mremap);
> +            atomic_fetch_add_acquire (&decreasing_mremap, 1);

Ok.

> -      catomic_increment (&calls[idx_munmap]);
> +      atomic_fetch_add_acquire (&calls[idx_munmap], 1);

Ok.

> -          catomic_add (&total[idx_munmap], len);
> +          atomic_fetch_add_acquire (&total[idx_munmap], len);

Ok.

> -        catomic_increment (&failed[idx_munmap]);
> +        atomic_fetch_add_acquire (&failed[idx_munmap], 1);

Ok.
  
Adhemerval Zanella Netto Feb. 23, 2023, 4:39 p.m. UTC | #3
Hi D.J,

Wilco has objected that these should be relaxed MO [1], so I plan to
send an updated version to fix it.

[1] https://sourceware.org/pipermail/libc-alpha/2023-February/145665.html

On 23/02/23 01:20, DJ Delorie wrote:
> 
> LGTM.
> 
> Reviewed-by: DJ Delorie <dj@redhat.com>
> 
> Adhemerval Zanella via Libc-alpha <libc-alpha@sourceware.org> writes:
>> +static inline void
>> +peak_atomic_max (size_t *peak, size_t val)
>> +{
>> +  size_t v;
>> +  do
>> +    {
>> +      v = atomic_load_relaxed (peak);
>> +      if (v >= val)
>> +	break;
>> +    }
>> +  while (! atomic_compare_exchange_weak_acquire (peak, &v, val));
>> +}
>> +
> 
> This is the only call without a direct replacement.  This inline
> replicates what <atomic.h> does.  Ok.
> 
>> -    = catomic_exchange_and_add (&current_heap, len - old_len) + len - old_len;
>> -  catomic_max (&peak_heap, heap);
>> +    = atomic_fetch_add_acquire (&current_heap, len - old_len) + len - old_len;
>> +  peak_atomic_max (&peak_heap, heap);
> 
> Ok.
> 
>> -  catomic_max (&peak_stack, current_stack);
>> +  peak_atomic_max (&peak_stack, current_stack);
> 
> Ok.
> 
>> -  catomic_max (&peak_total, heap + current_stack);
>> +  peak_atomic_max (&peak_total, heap + current_stack);
> 
> Ok.
> 
>> -      uint32_t idx = catomic_exchange_and_add (&buffer_cnt, 1);
>> +      uint32_t idx = atomic_fetch_add_acquire (&buffer_cnt, 1);
> 
> Ok.
> 
>> -          catomic_compare_and_exchange_val_acq (&buffer_cnt, reset, idx + 1);
>> +	  uint32_t expected = idx + 1;
>> +	  atomic_compare_exchange_weak_acquire (&buffer_cnt, &expected, reset);
> 
> Ok.
> 
>> -  catomic_increment (&calls[idx_malloc]);
>> +  atomic_fetch_add_acquire (&calls[idx_malloc], 1);
> 
> Ok.
> 
>> -  catomic_add (&total[idx_malloc], len);
>> +  atomic_fetch_add_acquire (&total[idx_malloc], len);
> 
> Ok.
> 
>> -  catomic_add (&grand_total, len);
>> +  atomic_fetch_add_acquire (&grand_total, len);
> 
> Ok.
> 
>>    if (len < 65536)
>> -    catomic_increment (&histogram[len / 16]);
>> +    atomic_fetch_add_acquire (&histogram[len / 16], 1);
>>    else
>> -    catomic_increment (&large);
>> +    atomic_fetch_add_acquire (&large, 1);
> 
> Ok.
> 
>> -  catomic_increment (&calls_total);
>> +  atomic_fetch_add_acquire (&calls_total, 1);
> 
> Ok.
> 
>> -      catomic_increment (&failed[idx_malloc]);
>> +      atomic_fetch_add_acquire (&failed[idx_malloc], 1);
> 
> Ok.
> 
>> -  catomic_increment (&calls[idx_realloc]);
>> +  atomic_fetch_add_acquire (&calls[idx_realloc], 1);
> 
> Ok.
> 
>> -      catomic_add (&total[idx_realloc], len - old_len);
>> +      atomic_fetch_add_acquire (&total[idx_realloc], len - old_len);
> 
> Ok.
> 
>> -      catomic_add (&grand_total, len - old_len);
>> +      atomic_fetch_add_acquire (&grand_total, len - old_len);
> 
> Ok.
> 
>> -      catomic_increment (&realloc_free);
>> +      atomic_fetch_add_acquire (&realloc_free, 1);
> 
> Ok.
> 
>> -      catomic_add (&total[idx_free], real->length);
>> +      atomic_fetch_add_acquire (&total[idx_free], real->length);
> 
> Ok.
> 
>>    if (len < 65536)
>> -    catomic_increment (&histogram[len / 16]);
>> +    atomic_fetch_add_acquire (&histogram[len / 16], 1);
>>    else
>> -    catomic_increment (&large);
>> +    atomic_fetch_add_acquire (&large, 1);
> 
> Ok.
> 
>> -  catomic_increment (&calls_total);
>> +  atomic_fetch_add_acquire (&calls_total, 1);
> 
> Ok.
> 
>> -      catomic_increment (&failed[idx_realloc]);
>> +      atomic_fetch_add_acquire (&failed[idx_realloc], 1);
> 
> Ok.
> 
>> -    catomic_increment (&inplace);
>> +    atomic_fetch_add_acquire (&inplace, 1);
> 
> Ok.
> 
>> -    catomic_increment (&decreasing);
>> +    atomic_fetch_add_acquire (&decreasing, 1);
> 
> Ok.
> 
>> -  catomic_increment (&calls[idx_calloc]);
>> +  atomic_fetch_add_acquire (&calls[idx_calloc], 1);
> 
> Ok.
> 
>> -  catomic_add (&total[idx_calloc], size);
>> +  atomic_fetch_add_acquire (&total[idx_calloc], size);
> 
> Ok.
> 
>> -  catomic_add (&grand_total, size);
>> +  atomic_fetch_add_acquire (&grand_total, size);
> 
> Ok.
> 
>>    if (size < 65536)
>> -    catomic_increment (&histogram[size / 16]);
>> +    atomic_fetch_add_acquire (&histogram[size / 16], 1);
>>    else
>> -    catomic_increment (&large);
>> +    atomic_fetch_add_acquire (&large, 1);
> 
> Ok.
> 
>> -      catomic_increment (&failed[idx_calloc]);
>> +      atomic_fetch_add_acquire (&failed[idx_calloc], 1);
> 
> Ok.
> 
>> -      catomic_increment (&calls[idx_free]);
>> +      atomic_fetch_add_acquire (&calls[idx_free], 1);
> 
> Ok.
> 
>> -  catomic_increment (&calls[idx_free]);
>> +  atomic_fetch_add_acquire (&calls[idx_free], 1);
> 
> Ok.
> 
>> -  catomic_add (&total[idx_free], real->length);
>> +  atomic_fetch_add_acquire (&total[idx_free], real->length);
> 
> Ok.
> 
>> -      catomic_increment (&calls[idx]);
>> +      atomic_fetch_add_acquire (&calls[idx], 1);
> 
> Ok.
> 
>> -      catomic_add (&total[idx], len);
>> +      atomic_fetch_add_acquire (&total[idx], len);
> 
> Ok.
> 
>> -      catomic_add (&grand_total, len);
>> +      atomic_fetch_add_acquire (&grand_total, len);
> 
> Ok.
> 
>>        if (len < 65536)
>> -        catomic_increment (&histogram[len / 16]);
>> +        atomic_fetch_add_acquire (&histogram[len / 16], 1);
>>        else
>> -        catomic_increment (&large);
>> +        atomic_fetch_add_acquire (&large, 1);
> 
> Ok.
> 
>> -      catomic_increment (&calls_total);
>> +      atomic_fetch_add_acquire (&calls_total, 1);
> 
> Ok.
> 
>> -        catomic_increment (&failed[idx]);
>> +        atomic_fetch_add_acquire (&failed[idx], 1);
> 
> Ok.
> 
>> -      catomic_increment (&calls[idx]);
>> +      atomic_fetch_add_acquire (&calls[idx], 1);
> 
> Ok.
> 
>> -      catomic_add (&total[idx], len);
>> +      atomic_fetch_add_acquire (&total[idx], len);
> 
> Ok.
> 
>> -      catomic_add (&grand_total, len);
>> +      atomic_fetch_add_acquire (&grand_total, len);
> 
> Ok.
> 
>>        if (len < 65536)
>> -        catomic_increment (&histogram[len / 16]);
>> +        atomic_fetch_add_acquire (&histogram[len / 16], 1);
>>        else
>> -        catomic_increment (&large);
>> +        atomic_fetch_add_acquire (&large, 1);
> 
> Ok.
> 
>> -      catomic_increment (&calls_total);
>> +      atomic_fetch_add_acquire (&calls_total, 1);
> 
> Ok.
> 
>> -        catomic_increment (&failed[idx]);
>> +        atomic_fetch_add_acquire (&failed[idx], 1);
> 
> Ok.
> 
>> -      catomic_increment (&calls[idx_mremap]);
>> +      atomic_fetch_add_acquire (&calls[idx_mremap], 1);
> 
> Ok.
> 
>> -          catomic_add (&total[idx_mremap], len - old_len);
>> +          atomic_fetch_add_acquire (&total[idx_mremap], len - old_len);
> 
> Ok.
> 
>> -          catomic_add (&grand_total, len - old_len);
>> +          atomic_fetch_add_acquire (&grand_total, len - old_len);
> 
> Ok.
> 
> 
>>        if (len < 65536)
>> -        catomic_increment (&histogram[len / 16]);
>> +        atomic_fetch_add_acquire (&histogram[len / 16], 1);
>>        else
>> -        catomic_increment (&large);
>> +        atomic_fetch_add_acquire (&large, 1);
> 
> Ok.
> 
>> -      catomic_increment (&calls_total);
>> +      atomic_fetch_add_acquire (&calls_total, 1);
> 
> Ok.
> 
>> -        catomic_increment (&failed[idx_mremap]);
>> +        atomic_fetch_add_acquire (&failed[idx_mremap], 1);
> 
> Ok.
> 
>> -            catomic_increment (&inplace_mremap);
>> +            atomic_fetch_add_acquire (&inplace_mremap, 1);
> 
> Ok.
> 
>> -            catomic_increment (&decreasing_mremap);
>> +            atomic_fetch_add_acquire (&decreasing_mremap, 1);
> 
> Ok.
> 
>> -      catomic_increment (&calls[idx_munmap]);
>> +      atomic_fetch_add_acquire (&calls[idx_munmap], 1);
> 
> Ok.
> 
>> -          catomic_add (&total[idx_munmap], len);
>> +          atomic_fetch_add_acquire (&total[idx_munmap], len);
> 
> Ok.
> 
>> -        catomic_increment (&failed[idx_munmap]);
>> +        atomic_fetch_add_acquire (&failed[idx_munmap], 1);
> 
> Ok.
>
  
DJ Delorie Feb. 23, 2023, 7:17 p.m. UTC | #4
Adhemerval Zanella Netto <adhemerval.zanella@linaro.org> writes:
> Wilco has objected that these should be relaxed MO [1], so I plan to
> send an updated version to fix it.

If the only difference is that change, my review still stands.  I was
mostly checking for API and "don't break".  Thanks!
  

Patch

diff --git a/malloc/memusage.c b/malloc/memusage.c
index f30906dffb..ddc487422c 100644
--- a/malloc/memusage.c
+++ b/malloc/memusage.c
@@ -134,6 +134,19 @@  gettime (struct entry *e)
 #endif
 }
 
+static inline void
+peak_atomic_max (size_t *peak, size_t val)
+{
+  size_t v;
+  do
+    {
+      v = atomic_load_relaxed (peak);
+      if (v >= val)
+	break;
+    }
+  while (! atomic_compare_exchange_weak_acquire (peak, &v, val));
+}
+
 /* Update the global data after a successful function call.  */
 static void
 update_data (struct header *result, size_t len, size_t old_len)
@@ -148,8 +161,8 @@  update_data (struct header *result, size_t len, size_t old_len)
 
   /* Compute current heap usage and compare it with the maximum value.  */
   size_t heap
-    = catomic_exchange_and_add (&current_heap, len - old_len) + len - old_len;
-  catomic_max (&peak_heap, heap);
+    = atomic_fetch_add_acquire (&current_heap, len - old_len) + len - old_len;
+  peak_atomic_max (&peak_heap, heap);
 
   /* Compute current stack usage and compare it with the maximum
      value.  The base stack pointer might not be set if this is not
@@ -172,15 +185,15 @@  update_data (struct header *result, size_t len, size_t old_len)
     start_sp = sp;
   size_t current_stack = start_sp - sp;
 #endif
-  catomic_max (&peak_stack, current_stack);
+  peak_atomic_max (&peak_stack, current_stack);
 
   /* Add up heap and stack usage and compare it with the maximum value.  */
-  catomic_max (&peak_total, heap + current_stack);
+  peak_atomic_max (&peak_total, heap + current_stack);
 
   /* Store the value only if we are writing to a file.  */
   if (fd != -1)
     {
-      uint32_t idx = catomic_exchange_and_add (&buffer_cnt, 1);
+      uint32_t idx = atomic_fetch_add_acquire (&buffer_cnt, 1);
       if (idx + 1 >= 2 * buffer_size)
         {
           /* We try to reset the counter to the correct range.  If
@@ -188,7 +201,8 @@  update_data (struct header *result, size_t len, size_t old_len)
              counter it does not matter since that thread will take
              care of the correction.  */
           uint32_t reset = (idx + 1) % (2 * buffer_size);
-          catomic_compare_and_exchange_val_acq (&buffer_cnt, reset, idx + 1);
+	  uint32_t expected = idx + 1;
+	  atomic_compare_exchange_weak_acquire (&buffer_cnt, &expected, reset);
           if (idx >= 2 * buffer_size)
             idx = reset - 1;
         }
@@ -362,24 +376,24 @@  malloc (size_t len)
     return (*mallocp)(len);
 
   /* Keep track of number of calls.  */
-  catomic_increment (&calls[idx_malloc]);
+  atomic_fetch_add_acquire (&calls[idx_malloc], 1);
   /* Keep track of total memory consumption for `malloc'.  */
-  catomic_add (&total[idx_malloc], len);
+  atomic_fetch_add_acquire (&total[idx_malloc], len);
   /* Keep track of total memory requirement.  */
-  catomic_add (&grand_total, len);
+  atomic_fetch_add_acquire (&grand_total, len);
   /* Remember the size of the request.  */
   if (len < 65536)
-    catomic_increment (&histogram[len / 16]);
+    atomic_fetch_add_acquire (&histogram[len / 16], 1);
   else
-    catomic_increment (&large);
+    atomic_fetch_add_acquire (&large, 1);
   /* Total number of calls of any of the functions.  */
-  catomic_increment (&calls_total);
+  atomic_fetch_add_acquire (&calls_total, 1);
 
   /* Do the real work.  */
   result = (struct header *) (*mallocp)(len + sizeof (struct header));
   if (result == NULL)
     {
-      catomic_increment (&failed[idx_malloc]);
+      atomic_fetch_add_acquire (&failed[idx_malloc], 1);
       return NULL;
     }
 
@@ -430,21 +444,21 @@  realloc (void *old, size_t len)
     }
 
   /* Keep track of number of calls.  */
-  catomic_increment (&calls[idx_realloc]);
+  atomic_fetch_add_acquire (&calls[idx_realloc], 1);
   if (len > old_len)
     {
       /* Keep track of total memory consumption for `realloc'.  */
-      catomic_add (&total[idx_realloc], len - old_len);
+      atomic_fetch_add_acquire (&total[idx_realloc], len - old_len);
       /* Keep track of total memory requirement.  */
-      catomic_add (&grand_total, len - old_len);
+      atomic_fetch_add_acquire (&grand_total, len - old_len);
     }
 
   if (len == 0 && old != NULL)
     {
       /* Special case.  */
-      catomic_increment (&realloc_free);
+      atomic_fetch_add_acquire (&realloc_free, 1);
       /* Keep track of total memory freed using `free'.  */
-      catomic_add (&total[idx_free], real->length);
+      atomic_fetch_add_acquire (&total[idx_free], real->length);
 
       /* Update the allocation data and write out the records if necessary.  */
       update_data (NULL, 0, old_len);
@@ -457,26 +471,26 @@  realloc (void *old, size_t len)
 
   /* Remember the size of the request.  */
   if (len < 65536)
-    catomic_increment (&histogram[len / 16]);
+    atomic_fetch_add_acquire (&histogram[len / 16], 1);
   else
-    catomic_increment (&large);
+    atomic_fetch_add_acquire (&large, 1);
   /* Total number of calls of any of the functions.  */
-  catomic_increment (&calls_total);
+  atomic_fetch_add_acquire (&calls_total, 1);
 
   /* Do the real work.  */
   result = (struct header *) (*reallocp)(real, len + sizeof (struct header));
   if (result == NULL)
     {
-      catomic_increment (&failed[idx_realloc]);
+      atomic_fetch_add_acquire (&failed[idx_realloc], 1);
       return NULL;
     }
 
   /* Record whether the reduction/increase happened in place.  */
   if (real == result)
-    catomic_increment (&inplace);
+    atomic_fetch_add_acquire (&inplace, 1);
   /* Was the buffer increased?  */
   if (old_len > len)
-    catomic_increment (&decreasing);
+    atomic_fetch_add_acquire (&decreasing, 1);
 
   /* Update the allocation data and write out the records if necessary.  */
   update_data (result, len, old_len);
@@ -508,16 +522,16 @@  calloc (size_t n, size_t len)
     return (*callocp)(n, len);
 
   /* Keep track of number of calls.  */
-  catomic_increment (&calls[idx_calloc]);
+  atomic_fetch_add_acquire (&calls[idx_calloc], 1);
   /* Keep track of total memory consumption for `calloc'.  */
-  catomic_add (&total[idx_calloc], size);
+  atomic_fetch_add_acquire (&total[idx_calloc], size);
   /* Keep track of total memory requirement.  */
-  catomic_add (&grand_total, size);
+  atomic_fetch_add_acquire (&grand_total, size);
   /* Remember the size of the request.  */
   if (size < 65536)
-    catomic_increment (&histogram[size / 16]);
+    atomic_fetch_add_acquire (&histogram[size / 16], 1);
   else
-    catomic_increment (&large);
+    atomic_fetch_add_acquire (&large, 1);
   /* Total number of calls of any of the functions.  */
   ++calls_total;
 
@@ -525,7 +539,7 @@  calloc (size_t n, size_t len)
   result = (struct header *) (*mallocp)(size + sizeof (struct header));
   if (result == NULL)
     {
-      catomic_increment (&failed[idx_calloc]);
+      atomic_fetch_add_acquire (&failed[idx_calloc], 1);
       return NULL;
     }
 
@@ -563,7 +577,7 @@  free (void *ptr)
   /* `free (NULL)' has no effect.  */
   if (ptr == NULL)
     {
-      catomic_increment (&calls[idx_free]);
+      atomic_fetch_add_acquire (&calls[idx_free], 1);
       return;
     }
 
@@ -577,9 +591,9 @@  free (void *ptr)
     }
 
   /* Keep track of number of calls.  */
-  catomic_increment (&calls[idx_free]);
+  atomic_fetch_add_acquire (&calls[idx_free], 1);
   /* Keep track of total memory freed using `free'.  */
-  catomic_add (&total[idx_free], real->length);
+  atomic_fetch_add_acquire (&total[idx_free], real->length);
 
   /* Update the allocation data and write out the records if necessary.  */
   update_data (NULL, 0, real->length);
@@ -614,22 +628,22 @@  mmap (void *start, size_t len, int prot, int flags, int fd, off_t offset)
                  ? idx_mmap_a : prot & PROT_WRITE ? idx_mmap_w : idx_mmap_r);
 
       /* Keep track of number of calls.  */
-      catomic_increment (&calls[idx]);
+      atomic_fetch_add_acquire (&calls[idx], 1);
       /* Keep track of total memory consumption for `malloc'.  */
-      catomic_add (&total[idx], len);
+      atomic_fetch_add_acquire (&total[idx], len);
       /* Keep track of total memory requirement.  */
-      catomic_add (&grand_total, len);
+      atomic_fetch_add_acquire (&grand_total, len);
       /* Remember the size of the request.  */
       if (len < 65536)
-        catomic_increment (&histogram[len / 16]);
+        atomic_fetch_add_acquire (&histogram[len / 16], 1);
       else
-        catomic_increment (&large);
+        atomic_fetch_add_acquire (&large, 1);
       /* Total number of calls of any of the functions.  */
-      catomic_increment (&calls_total);
+      atomic_fetch_add_acquire (&calls_total, 1);
 
       /* Check for failures.  */
       if (result == NULL)
-        catomic_increment (&failed[idx]);
+        atomic_fetch_add_acquire (&failed[idx], 1);
       else if (idx == idx_mmap_w)
         /* Update the allocation data and write out the records if
            necessary.  Note the first parameter is NULL which means
@@ -667,22 +681,22 @@  mmap64 (void *start, size_t len, int prot, int flags, int fd, off64_t offset)
                  ? idx_mmap_a : prot & PROT_WRITE ? idx_mmap_w : idx_mmap_r);
 
       /* Keep track of number of calls.  */
-      catomic_increment (&calls[idx]);
+      atomic_fetch_add_acquire (&calls[idx], 1);
       /* Keep track of total memory consumption for `malloc'.  */
-      catomic_add (&total[idx], len);
+      atomic_fetch_add_acquire (&total[idx], len);
       /* Keep track of total memory requirement.  */
-      catomic_add (&grand_total, len);
+      atomic_fetch_add_acquire (&grand_total, len);
       /* Remember the size of the request.  */
       if (len < 65536)
-        catomic_increment (&histogram[len / 16]);
+        atomic_fetch_add_acquire (&histogram[len / 16], 1);
       else
-        catomic_increment (&large);
+        atomic_fetch_add_acquire (&large, 1);
       /* Total number of calls of any of the functions.  */
-      catomic_increment (&calls_total);
+      atomic_fetch_add_acquire (&calls_total, 1);
 
       /* Check for failures.  */
       if (result == NULL)
-        catomic_increment (&failed[idx]);
+        atomic_fetch_add_acquire (&failed[idx], 1);
       else if (idx == idx_mmap_w)
         /* Update the allocation data and write out the records if
            necessary.  Note the first parameter is NULL which means
@@ -722,33 +736,33 @@  mremap (void *start, size_t old_len, size_t len, int flags, ...)
   if (!not_me && trace_mmap)
     {
       /* Keep track of number of calls.  */
-      catomic_increment (&calls[idx_mremap]);
+      atomic_fetch_add_acquire (&calls[idx_mremap], 1);
       if (len > old_len)
         {
           /* Keep track of total memory consumption for `malloc'.  */
-          catomic_add (&total[idx_mremap], len - old_len);
+          atomic_fetch_add_acquire (&total[idx_mremap], len - old_len);
           /* Keep track of total memory requirement.  */
-          catomic_add (&grand_total, len - old_len);
+          atomic_fetch_add_acquire (&grand_total, len - old_len);
         }
       /* Remember the size of the request.  */
       if (len < 65536)
-        catomic_increment (&histogram[len / 16]);
+        atomic_fetch_add_acquire (&histogram[len / 16], 1);
       else
-        catomic_increment (&large);
+        atomic_fetch_add_acquire (&large, 1);
       /* Total number of calls of any of the functions.  */
-      catomic_increment (&calls_total);
+      atomic_fetch_add_acquire (&calls_total, 1);
 
       /* Check for failures.  */
       if (result == NULL)
-        catomic_increment (&failed[idx_mremap]);
+        atomic_fetch_add_acquire (&failed[idx_mremap], 1);
       else
         {
           /* Record whether the reduction/increase happened in place.  */
           if (start == result)
-            catomic_increment (&inplace_mremap);
+            atomic_fetch_add_acquire (&inplace_mremap, 1);
           /* Was the buffer increased?  */
           if (old_len > len)
-            catomic_increment (&decreasing_mremap);
+            atomic_fetch_add_acquire (&decreasing_mremap, 1);
 
           /* Update the allocation data and write out the records if
              necessary.  Note the first parameter is NULL which means
@@ -783,19 +797,19 @@  munmap (void *start, size_t len)
   if (!not_me && trace_mmap)
     {
       /* Keep track of number of calls.  */
-      catomic_increment (&calls[idx_munmap]);
+      atomic_fetch_add_acquire (&calls[idx_munmap], 1);
 
       if (__glibc_likely (result == 0))
         {
           /* Keep track of total memory freed using `free'.  */
-          catomic_add (&total[idx_munmap], len);
+          atomic_fetch_add_acquire (&total[idx_munmap], len);
 
           /* Update the allocation data and write out the records if
              necessary.  */
           update_data (NULL, 0, len);
         }
       else
-        catomic_increment (&failed[idx_munmap]);
+        atomic_fetch_add_acquire (&failed[idx_munmap], 1);
     }
 
   return result;