Remove atomic_forced_read

Message ID AM5PR0801MB16688EAC29BF06DEC4FCD9CC838E9@AM5PR0801MB1668.eurprd08.prod.outlook.com
State New
Headers
Series Remove atomic_forced_read |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent
dj/TryBot-32bit success Build for i686
redhat-pt-bot/TryBot-still_applies warning Patch no longer applies to master

Commit Message

Wilco Dijkstra July 20, 2022, 4:50 p.m. UTC
  Remove the odd atomic_forced_read which is neither atomic nor forced.
Some uses are completely redundant, so simply remove them. In other cases
the intended use is to force a memory ordering, so use acquire load for those.
In yet other cases their purpose is unclear, for example __nscd_cache_search
appears to allow concurrent accesses to the cache while it is being garbage
collected by another thread! Use relaxed atomic loads here to block spills
from accidentally reloading memory that is being changed - however given
there are multiple accesses without any synchronization, it is unclear how this
could ever work reliably...

Passes regress on AArch64, OK for commit?

---
  

Comments

Carlos O'Donell Aug. 1, 2022, 1:18 p.m. UTC | #1
On 7/20/22 12:50, Wilco Dijkstra via Libc-alpha wrote:
> Remove the odd atomic_forced_read which is neither atomic nor forced.
> Some uses are completely redundant, so simply remove them. In other cases
> the intended use is to force a memory ordering, so use acquire load for those.
> In yet other cases their purpose is unclear, for example __nscd_cache_search
> appears to allow concurrent accesses to the cache while it is being garbage
> collected by another thread! Use relaxed atomic loads here to block spills
> from accidentally reloading memory that is being changed - however given
> there are multiple accesses without any synchronization, it is unclear how this
> could ever work reliably...
> 
> Passes regress on AArch64, OK for commit?

This fails pre-commit CI in that it doesn't apply.

How are you generating these patches?
 
> ---
> 
> diff --git a/elf/dl-lookup.c b/elf/dl-lookup.c
> index 8cb32321da33ea4f41e4a6cee038c2a6697ad817..02c63a7062b2be0f37a412160fdb2b3468cc70cf 100644
> --- a/elf/dl-lookup.c
> +++ b/elf/dl-lookup.c
> @@ -346,12 +346,12 @@ do_lookup_x (const char *undef_name, unsigned int new_hash,
>  	     const struct r_found_version *const version, int flags,
>  	     struct link_map *skip, int type_class, struct link_map *undef_map)
>  {
> -  size_t n = scope->r_nlist;
> -  /* Make sure we read the value before proceeding.  Otherwise we
> +  /* Make sure we read r_nlist before r_list, or otherwise we
>       might use r_list pointing to the initial scope and r_nlist being
>       the value after a resize.  That is the only path in dl-open.c not
> -     protected by GSCOPE.  A read barrier here might be to expensive.  */
> -  __asm volatile ("" : "+r" (n), "+m" (scope->r_list));
> +     protected by GSCOPE.  This works if all updates also use a store-
> +     release or release barrier.  */
> +  size_t n = atomic_load_acquire (&scope->r_nlist);
>    struct link_map **list = scope->r_list;
>  
>    do
> @@ -528,15 +528,13 @@ add_dependency (struct link_map *undef_map, struct link_map *map, int flags)
>    if (is_nodelete (map, flags))
>      return 0;
>  
> -  struct link_map_reldeps *l_reldeps
> -    = atomic_forced_read (undef_map->l_reldeps);
> -
>    /* Make sure l_reldeps is read before l_initfini.  */
> -  atomic_read_barrier ();
> +  struct link_map_reldeps *l_reldeps
> +    = atomic_load_acquire (&undef_map->l_reldeps);
>  
>    /* Determine whether UNDEF_MAP already has a reference to MAP.  First
>       look in the normal dependencies.  */
> -  struct link_map **l_initfini = atomic_forced_read (undef_map->l_initfini);
> +  struct link_map **l_initfini = undef_map->l_initfini;
>    if (l_initfini != NULL)
>      {
>        for (i = 0; l_initfini[i] != NULL; ++i)
> @@ -570,7 +568,7 @@ add_dependency (struct link_map *undef_map, struct link_map *map, int flags)
>  	 it can e.g. point to unallocated memory.  So avoid the optimizer
>  	 treating the above read from MAP->l_serial as ensurance it
>  	 can safely dereference it.  */
> -      map = atomic_forced_read (map);
> +      __asm ("" : "=r" (map) : "0" (map));
>  
>        /* From this point on it is unsafe to dereference MAP, until it
>  	 has been found in one of the lists.  */
> diff --git a/include/atomic.h b/include/atomic.h
> index 53bbf0423344ceda6cf98653ffa90e8d4f5d81aa..8eb56362ba18eb4836070930d5f2e769fb6a0a1e 100644
> --- a/include/atomic.h
> +++ b/include/atomic.h
> @@ -119,11 +119,6 @@
>  #endif
>  
>  
> -#ifndef atomic_forced_read
> -# define atomic_forced_read(x) \
> -  ({ __typeof (x) __x; __asm ("" : "=r" (__x) : "0" (x)); __x; })
> -#endif
> -
>  /* This is equal to 1 iff the architecture supports 64b atomic operations.  */
>  #ifndef __HAVE_64B_ATOMICS
>  #error Unable to determine if 64-bit atomics are present.
> diff --git a/malloc/malloc-debug.c b/malloc/malloc-debug.c
> index 43604ac2641e2b80eb0e4f20747af895ab2e6d55..4e56ff71f0fd1895c770f58667db93c0372a5aee 100644
> --- a/malloc/malloc-debug.c
> +++ b/malloc/malloc-debug.c
> @@ -168,7 +168,7 @@ static mchunkptr dumped_main_arena_end;   /* Exclusive.  */
>  static void *
>  __debug_malloc (size_t bytes)
>  {
> -  void *(*hook) (size_t, const void *) = atomic_forced_read (__malloc_hook);
> +  void *(*hook) (size_t, const void *) = __malloc_hook;
>    if (__builtin_expect (hook != NULL, 0))
>      return (*hook)(bytes, RETURN_ADDRESS (0));
>  
> @@ -192,7 +192,7 @@ strong_alias (__debug_malloc, malloc)
>  static void
>  __debug_free (void *mem)
>  {
> -  void (*hook) (void *, const void *) = atomic_forced_read (__free_hook);
> +  void (*hook) (void *, const void *) = __free_hook;
>    if (__builtin_expect (hook != NULL, 0))
>      {
>        (*hook)(mem, RETURN_ADDRESS (0));
> @@ -216,8 +216,7 @@ strong_alias (__debug_free, free)
>  static void *
>  __debug_realloc (void *oldmem, size_t bytes)
>  {
> -  void *(*hook) (void *, size_t, const void *) =
> -    atomic_forced_read (__realloc_hook);
> +  void *(*hook) (void *, size_t, const void *) = __realloc_hook;
>    if (__builtin_expect (hook != NULL, 0))
>      return (*hook)(oldmem, bytes, RETURN_ADDRESS (0));
>  
> @@ -270,8 +269,7 @@ strong_alias (__debug_realloc, realloc)
>  static void *
>  _debug_mid_memalign (size_t alignment, size_t bytes, const void *address)
>  {
> -  void *(*hook) (size_t, size_t, const void *) =
> -    atomic_forced_read (__memalign_hook);
> +  void *(*hook) (size_t, size_t, const void *) = __memalign_hook;
>    if (__builtin_expect (hook != NULL, 0))
>      return (*hook)(alignment, bytes, address);
>  
> @@ -363,7 +361,7 @@ __debug_calloc (size_t nmemb, size_t size)
>        return NULL;
>      }
>  
> -  void *(*hook) (size_t, const void *) = atomic_forced_read (__malloc_hook);
> +  void *(*hook) (size_t, const void *) = __malloc_hook;
>    if (__builtin_expect (hook != NULL, 0))
>      {
>        void *mem = (*hook)(bytes, RETURN_ADDRESS (0));
> diff --git a/nptl/pthread_sigqueue.c b/nptl/pthread_sigqueue.c
> index 48dc3ca4ee5673b7b4b2543b823d6e9354ae9849..f4149fb1779eacea0ead107f7bfce32b22114f3b 100644
> --- a/nptl/pthread_sigqueue.c
> +++ b/nptl/pthread_sigqueue.c
> @@ -33,7 +33,7 @@ __pthread_sigqueue (pthread_t threadid, int signo, const union sigval value)
>    /* Force load of pd->tid into local variable or register.  Otherwise
>       if a thread exits between ESRCH test and tgkill, we might return
>       EINVAL, because pd->tid would be cleared by the kernel.  */
> -  pid_t tid = atomic_forced_read (pd->tid);
> +  pid_t tid = atomic_load_relaxed (&pd->tid);
>    if (__glibc_unlikely (tid <= 0))
>      /* Not a valid thread handle.  */
>      return ESRCH;
> diff --git a/nscd/nscd_helper.c b/nscd/nscd_helper.c
> index fc41bfdb6eebb880d6132ea5cf409ca657570f82..7a9a49955691e15079a94b78a12f9efed381ecb5 100644
> --- a/nscd/nscd_helper.c
> +++ b/nscd/nscd_helper.c
> @@ -454,7 +454,6 @@ __nscd_cache_search (request_type type, const char *key, size_t keylen,
>    size_t datasize = mapped->datasize;
>  
>    ref_t trail = mapped->head->array[hash];
> -  trail = atomic_forced_read (trail);
>    ref_t work = trail;
>    size_t loop_cnt = datasize / (MINIMUM_HASHENTRY_SIZE
>  				+ offsetof (struct datahead, data) / 2);
> @@ -465,32 +464,29 @@ __nscd_cache_search (request_type type, const char *key, size_t keylen,
>        struct hashentry *here = (struct hashentry *) (mapped->data + work);
>        ref_t here_key, here_packet;
>  
> -#if !_STRING_ARCH_unaligned
>        /* Although during garbage collection when moving struct hashentry
>  	 records around we first copy from old to new location and then
>  	 adjust pointer from previous hashentry to it, there is no barrier
> -	 between those memory writes.  It is very unlikely to hit it,
> -	 so check alignment only if a misaligned load can crash the
> -	 application.  */
> +	 between those memory writes!!! This is extremely risky on any
> +	 modern CPU which can reorder memory accesses very aggressively.
> +	 Check alignment, both as a partial consistency check and to avoid
> +	 crashes on targets which require atomic loads to be aligned.  */
>        if ((uintptr_t) here & (__alignof__ (*here) - 1))
>  	return NULL;
> -#endif
>  
>        if (type == here->type
>  	  && keylen == here->len
> -	  && (here_key = atomic_forced_read (here->key)) + keylen <= datasize
> +	  && (here_key = atomic_load_relaxed (&here->key)) + keylen <= datasize
>  	  && memcmp (key, mapped->data + here_key, keylen) == 0
> -	  && ((here_packet = atomic_forced_read (here->packet))
> +	  && ((here_packet = atomic_load_relaxed (&here->packet))
>  	      + sizeof (struct datahead) <= datasize))
>  	{
>  	  /* We found the entry.  Increment the appropriate counter.  */
>  	  struct datahead *dh
>  	    = (struct datahead *) (mapped->data + here_packet);
>  
> -#if !_STRING_ARCH_unaligned
>  	  if ((uintptr_t) dh & (__alignof__ (*dh) - 1))
>  	    return NULL;
> -#endif
>  
>  	  /* See whether we must ignore the entry or whether something
>  	     is wrong because garbage collection is in progress.  */
> @@ -501,7 +497,7 @@ __nscd_cache_search (request_type type, const char *key, size_t keylen,
>  	    return dh;
>  	}
>  
> -      work = atomic_forced_read (here->next);
> +      work = atomic_load_relaxed (&here->next);
>        /* Prevent endless loops.  This should never happen but perhaps
>  	 the database got corrupted, accidentally or deliberately.  */
>        if (work == trail || loop_cnt-- == 0)
> @@ -511,16 +507,14 @@ __nscd_cache_search (request_type type, const char *key, size_t keylen,
>  	  struct hashentry *trailelem;
>  	  trailelem = (struct hashentry *) (mapped->data + trail);
>  
> -#if !_STRING_ARCH_unaligned
>  	  /* We have to redo the checks.  Maybe the data changed.  */
>  	  if ((uintptr_t) trailelem & (__alignof__ (*trailelem) - 1))
>  	    return NULL;
> -#endif
>  
>  	  if (trail + MINIMUM_HASHENTRY_SIZE > datasize)
>  	    return NULL;
>  
> -	  trail = atomic_forced_read (trailelem->next);
> +	  trail = atomic_load_relaxed (&trailelem->next);
>  	}
>        tick = 1 - tick;
>      }
> 
> 
>
  
Wilco Dijkstra Aug. 1, 2022, 2:02 p.m. UTC | #2
Hi Carlos,

> This fails pre-commit CI in that it doesn't apply.
>
> How are you generating these patches?

These patches are independent but they do edit adjacent lines in the same files.
So it sounds like the merge tool is getting confused (which is stupid because no
context is needed when you remove lines, all you need is matching the actual
lines removed, and that's trivial since they didn't change). When earlier patches
get committed the context should match up again and then they should apply.

Cheers,
Wilco
  
Andreas Schwab Aug. 1, 2022, 2:08 p.m. UTC | #3
On Aug 01 2022, Wilco Dijkstra via Libc-alpha wrote:

> So it sounds like the merge tool is getting confused (which is stupid because no
> context is needed when you remove lines, all you need is matching the actual
> lines removed, and that's trivial since they didn't change).

That's not true if the same lines occur several times in a file, in
different contexts.
  
Wilco Dijkstra Aug. 1, 2022, 2:28 p.m. UTC | #4
Hi Andreas,

> That's not true if the same lines occur several times in a file, in
> different contexts.

Sure but that's not the case here (and not for any of my other patches
either I believe). Anyway, it seems this patch did merge without issues:

https://patchwork.sourceware.org/project/glibc/list/?series=11045

Cheers,
Wilco
  
Andreas Schwab Aug. 1, 2022, 2:51 p.m. UTC | #5
On Aug 01 2022, Wilco Dijkstra wrote:

> Hi Andreas,
>
>> That's not true if the same lines occur several times in a file, in
>> different contexts.
>
> Sure but that's not the case here (and not for any of my other patches
> either I believe).

If the context changes, maybe the lines need to be handled differently.
There is nothing stupid about checking the context.
  

Patch

diff --git a/elf/dl-lookup.c b/elf/dl-lookup.c
index 8cb32321da33ea4f41e4a6cee038c2a6697ad817..02c63a7062b2be0f37a412160fdb2b3468cc70cf 100644
--- a/elf/dl-lookup.c
+++ b/elf/dl-lookup.c
@@ -346,12 +346,12 @@  do_lookup_x (const char *undef_name, unsigned int new_hash,
 	     const struct r_found_version *const version, int flags,
 	     struct link_map *skip, int type_class, struct link_map *undef_map)
 {
-  size_t n = scope->r_nlist;
-  /* Make sure we read the value before proceeding.  Otherwise we
+  /* Make sure we read r_nlist before r_list, or otherwise we
      might use r_list pointing to the initial scope and r_nlist being
      the value after a resize.  That is the only path in dl-open.c not
-     protected by GSCOPE.  A read barrier here might be to expensive.  */
-  __asm volatile ("" : "+r" (n), "+m" (scope->r_list));
+     protected by GSCOPE.  This works if all updates also use a store-
+     release or release barrier.  */
+  size_t n = atomic_load_acquire (&scope->r_nlist);
   struct link_map **list = scope->r_list;
 
   do
@@ -528,15 +528,13 @@  add_dependency (struct link_map *undef_map, struct link_map *map, int flags)
   if (is_nodelete (map, flags))
     return 0;
 
-  struct link_map_reldeps *l_reldeps
-    = atomic_forced_read (undef_map->l_reldeps);
-
   /* Make sure l_reldeps is read before l_initfini.  */
-  atomic_read_barrier ();
+  struct link_map_reldeps *l_reldeps
+    = atomic_load_acquire (&undef_map->l_reldeps);
 
   /* Determine whether UNDEF_MAP already has a reference to MAP.  First
      look in the normal dependencies.  */
-  struct link_map **l_initfini = atomic_forced_read (undef_map->l_initfini);
+  struct link_map **l_initfini = undef_map->l_initfini;
   if (l_initfini != NULL)
     {
       for (i = 0; l_initfini[i] != NULL; ++i)
@@ -570,7 +568,7 @@  add_dependency (struct link_map *undef_map, struct link_map *map, int flags)
 	 it can e.g. point to unallocated memory.  So avoid the optimizer
 	 treating the above read from MAP->l_serial as ensurance it
 	 can safely dereference it.  */
-      map = atomic_forced_read (map);
+      __asm ("" : "=r" (map) : "0" (map));
 
       /* From this point on it is unsafe to dereference MAP, until it
 	 has been found in one of the lists.  */
diff --git a/include/atomic.h b/include/atomic.h
index 53bbf0423344ceda6cf98653ffa90e8d4f5d81aa..8eb56362ba18eb4836070930d5f2e769fb6a0a1e 100644
--- a/include/atomic.h
+++ b/include/atomic.h
@@ -119,11 +119,6 @@ 
 #endif
 
 
-#ifndef atomic_forced_read
-# define atomic_forced_read(x) \
-  ({ __typeof (x) __x; __asm ("" : "=r" (__x) : "0" (x)); __x; })
-#endif
-
 /* This is equal to 1 iff the architecture supports 64b atomic operations.  */
 #ifndef __HAVE_64B_ATOMICS
 #error Unable to determine if 64-bit atomics are present.
diff --git a/malloc/malloc-debug.c b/malloc/malloc-debug.c
index 43604ac2641e2b80eb0e4f20747af895ab2e6d55..4e56ff71f0fd1895c770f58667db93c0372a5aee 100644
--- a/malloc/malloc-debug.c
+++ b/malloc/malloc-debug.c
@@ -168,7 +168,7 @@  static mchunkptr dumped_main_arena_end;   /* Exclusive.  */
 static void *
 __debug_malloc (size_t bytes)
 {
-  void *(*hook) (size_t, const void *) = atomic_forced_read (__malloc_hook);
+  void *(*hook) (size_t, const void *) = __malloc_hook;
   if (__builtin_expect (hook != NULL, 0))
     return (*hook)(bytes, RETURN_ADDRESS (0));
 
@@ -192,7 +192,7 @@  strong_alias (__debug_malloc, malloc)
 static void
 __debug_free (void *mem)
 {
-  void (*hook) (void *, const void *) = atomic_forced_read (__free_hook);
+  void (*hook) (void *, const void *) = __free_hook;
   if (__builtin_expect (hook != NULL, 0))
     {
       (*hook)(mem, RETURN_ADDRESS (0));
@@ -216,8 +216,7 @@  strong_alias (__debug_free, free)
 static void *
 __debug_realloc (void *oldmem, size_t bytes)
 {
-  void *(*hook) (void *, size_t, const void *) =
-    atomic_forced_read (__realloc_hook);
+  void *(*hook) (void *, size_t, const void *) = __realloc_hook;
   if (__builtin_expect (hook != NULL, 0))
     return (*hook)(oldmem, bytes, RETURN_ADDRESS (0));
 
@@ -270,8 +269,7 @@  strong_alias (__debug_realloc, realloc)
 static void *
 _debug_mid_memalign (size_t alignment, size_t bytes, const void *address)
 {
-  void *(*hook) (size_t, size_t, const void *) =
-    atomic_forced_read (__memalign_hook);
+  void *(*hook) (size_t, size_t, const void *) = __memalign_hook;
   if (__builtin_expect (hook != NULL, 0))
     return (*hook)(alignment, bytes, address);
 
@@ -363,7 +361,7 @@  __debug_calloc (size_t nmemb, size_t size)
       return NULL;
     }
 
-  void *(*hook) (size_t, const void *) = atomic_forced_read (__malloc_hook);
+  void *(*hook) (size_t, const void *) = __malloc_hook;
   if (__builtin_expect (hook != NULL, 0))
     {
       void *mem = (*hook)(bytes, RETURN_ADDRESS (0));
diff --git a/nptl/pthread_sigqueue.c b/nptl/pthread_sigqueue.c
index 48dc3ca4ee5673b7b4b2543b823d6e9354ae9849..f4149fb1779eacea0ead107f7bfce32b22114f3b 100644
--- a/nptl/pthread_sigqueue.c
+++ b/nptl/pthread_sigqueue.c
@@ -33,7 +33,7 @@  __pthread_sigqueue (pthread_t threadid, int signo, const union sigval value)
   /* Force load of pd->tid into local variable or register.  Otherwise
      if a thread exits between ESRCH test and tgkill, we might return
      EINVAL, because pd->tid would be cleared by the kernel.  */
-  pid_t tid = atomic_forced_read (pd->tid);
+  pid_t tid = atomic_load_relaxed (&pd->tid);
   if (__glibc_unlikely (tid <= 0))
     /* Not a valid thread handle.  */
     return ESRCH;
diff --git a/nscd/nscd_helper.c b/nscd/nscd_helper.c
index fc41bfdb6eebb880d6132ea5cf409ca657570f82..7a9a49955691e15079a94b78a12f9efed381ecb5 100644
--- a/nscd/nscd_helper.c
+++ b/nscd/nscd_helper.c
@@ -454,7 +454,6 @@  __nscd_cache_search (request_type type, const char *key, size_t keylen,
   size_t datasize = mapped->datasize;
 
   ref_t trail = mapped->head->array[hash];
-  trail = atomic_forced_read (trail);
   ref_t work = trail;
   size_t loop_cnt = datasize / (MINIMUM_HASHENTRY_SIZE
 				+ offsetof (struct datahead, data) / 2);
@@ -465,32 +464,29 @@  __nscd_cache_search (request_type type, const char *key, size_t keylen,
       struct hashentry *here = (struct hashentry *) (mapped->data + work);
       ref_t here_key, here_packet;
 
-#if !_STRING_ARCH_unaligned
       /* Although during garbage collection when moving struct hashentry
 	 records around we first copy from old to new location and then
 	 adjust pointer from previous hashentry to it, there is no barrier
-	 between those memory writes.  It is very unlikely to hit it,
-	 so check alignment only if a misaligned load can crash the
-	 application.  */
+	 between those memory writes!!! This is extremely risky on any
+	 modern CPU which can reorder memory accesses very aggressively.
+	 Check alignment, both as a partial consistency check and to avoid
+	 crashes on targets which require atomic loads to be aligned.  */
       if ((uintptr_t) here & (__alignof__ (*here) - 1))
 	return NULL;
-#endif
 
       if (type == here->type
 	  && keylen == here->len
-	  && (here_key = atomic_forced_read (here->key)) + keylen <= datasize
+	  && (here_key = atomic_load_relaxed (&here->key)) + keylen <= datasize
 	  && memcmp (key, mapped->data + here_key, keylen) == 0
-	  && ((here_packet = atomic_forced_read (here->packet))
+	  && ((here_packet = atomic_load_relaxed (&here->packet))
 	      + sizeof (struct datahead) <= datasize))
 	{
 	  /* We found the entry.  Increment the appropriate counter.  */
 	  struct datahead *dh
 	    = (struct datahead *) (mapped->data + here_packet);
 
-#if !_STRING_ARCH_unaligned
 	  if ((uintptr_t) dh & (__alignof__ (*dh) - 1))
 	    return NULL;
-#endif
 
 	  /* See whether we must ignore the entry or whether something
 	     is wrong because garbage collection is in progress.  */
@@ -501,7 +497,7 @@  __nscd_cache_search (request_type type, const char *key, size_t keylen,
 	    return dh;
 	}
 
-      work = atomic_forced_read (here->next);
+      work = atomic_load_relaxed (&here->next);
       /* Prevent endless loops.  This should never happen but perhaps
 	 the database got corrupted, accidentally or deliberately.  */
       if (work == trail || loop_cnt-- == 0)
@@ -511,16 +507,14 @@  __nscd_cache_search (request_type type, const char *key, size_t keylen,
 	  struct hashentry *trailelem;
 	  trailelem = (struct hashentry *) (mapped->data + trail);
 
-#if !_STRING_ARCH_unaligned
 	  /* We have to redo the checks.  Maybe the data changed.  */
 	  if ((uintptr_t) trailelem & (__alignof__ (*trailelem) - 1))
 	    return NULL;
-#endif
 
 	  if (trail + MINIMUM_HASHENTRY_SIZE > datasize)
 	    return NULL;
 
-	  trail = atomic_forced_read (trailelem->next);
+	  trail = atomic_load_relaxed (&trailelem->next);
 	}
       tick = 1 - tick;
     }