[v3] Use atomic_thread_fence

Message ID AS4PR08MB7901C99B312CCDE06FDB66FD83259@AS4PR08MB7901.eurprd08.prod.outlook.com (mailing list archive)
State New
Delegated to: Adhemerval Zanella Netto
Headers
Series [v3] Use atomic_thread_fence |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent
dj/TryBot-32bit success Build for i686

Commit Message

Wilco Dijkstra Oct. 13, 2022, 4:12 p.m. UTC
  v3: rebased to latest GLIBC

Replace atomic barriers based on sync primitives with atomic_thread_fence.
Many uses appear suspect and in the future fixing these to use load_acquire
or store_release would be useful.

Passes GLIBC testsuite.

---
  

Patch

diff --git a/crypt/crypt_util.c b/crypt/crypt_util.c
index be925e3484e65d2180e07915f5d91b47f6b96393..a8c2b26ed13c27804a9465f2a54caa09aaec3814 100644
--- a/crypt/crypt_util.c
+++ b/crypt/crypt_util.c
@@ -453,14 +453,14 @@  __init_des_r (struct crypt_data * __restrict __data)
 	  efp[comes_from_word][word_value][o_long] |= mask2;
       }
     }
-    atomic_write_barrier ();
+    atomic_thread_fence_release ();
     small_tables_initialized = 1;
 #ifdef __GNU_LIBRARY__
 small_tables_done:
     __libc_lock_unlock(_ufc_tables_lock);
 #endif
   } else
-    atomic_read_barrier ();
+    atomic_thread_fence_acquire ();
 
   /*
    * Create the sb tables:
diff --git a/elf/dl-deps.c b/elf/dl-deps.c
index 06005a0cc8686cc7e63cd8e1b1e7deda01fe6688..11b3fda5fdeb3830d3d5a0031084b43847444e04 100644
--- a/elf/dl-deps.c
+++ b/elf/dl-deps.c
@@ -430,7 +430,7 @@  _dl_map_object_deps (struct link_map *map,
 	  memcpy (&l_initfini[1], needed, nneeded * sizeof needed[0]);
 	  memcpy (&l_initfini[nneeded + 1], l_initfini,
 		  nneeded * sizeof needed[0]);
-	  atomic_write_barrier ();
+	  atomic_thread_fence_release ();
 	  l->l_initfini = l_initfini;
 	  l->l_free_initfini = 1;
 	}
@@ -555,12 +555,12 @@  _dl_map_object_deps (struct link_map *map,
 
   /* Terminate the list of dependencies.  */
   l_initfini[nlist] = NULL;
-  atomic_write_barrier ();
+  atomic_thread_fence_release ();
   map->l_initfini = l_initfini;
   map->l_free_initfini = 1;
   if (l_reldeps != NULL)
     {
-      atomic_write_barrier ();
+      atomic_thread_fence_release ();
       void *old_l_reldeps = map->l_reldeps;
       map->l_reldeps = l_reldeps;
       _dl_scope_free (old_l_reldeps);
diff --git a/elf/dl-lookup.c b/elf/dl-lookup.c
index 4c86dc694e0eebb23421e08103ee7f3c0adaadf9..68c66f61bc57da2e4e8736bf171f59b3e1b9052f 100644
--- a/elf/dl-lookup.c
+++ b/elf/dl-lookup.c
@@ -532,7 +532,7 @@  add_dependency (struct link_map *undef_map, struct link_map *map, int flags)
     = atomic_forced_read (undef_map->l_reldeps);
 
   /* Make sure l_reldeps is read before l_initfini.  */
-  atomic_read_barrier ();
+  atomic_thread_fence_acquire ();
 
   /* Determine whether UNDEF_MAP already has a reference to MAP.  First
      look in the normal dependencies.  */
@@ -697,7 +697,7 @@  marking %s [%lu] as NODELETE due to memory allocation failure\n",
 			l_reldepsact * sizeof (struct link_map *));
 	      newp->list[l_reldepsact] = map;
 	      newp->act = l_reldepsact + 1;
-	      atomic_write_barrier ();
+	      atomic_thread_fence_release ();
 	      void *old = undef_map->l_reldeps;
 	      undef_map->l_reldeps = newp;
 	      undef_map->l_reldepsmax = max;
@@ -708,7 +708,7 @@  marking %s [%lu] as NODELETE due to memory allocation failure\n",
       else
 	{
 	  undef_map->l_reldeps->list[l_reldepsact] = map;
-	  atomic_write_barrier ();
+	  atomic_thread_fence_release ();
 	  undef_map->l_reldeps->act = l_reldepsact + 1;
 	}
 
diff --git a/elf/dl-open.c b/elf/dl-open.c
index 46e8066fd89f0627d7456210f6cc15f85b0cef06..8cb1e9717019dbdb5bb2fb75f367fc0289211388 100644
--- a/elf/dl-open.c
+++ b/elf/dl-open.c
@@ -202,7 +202,7 @@  add_to_global_update (struct link_map *new)
   assert (added <= ns->_ns_global_scope_pending_adds);
   ns->_ns_global_scope_pending_adds -= added;
 
-  atomic_write_barrier ();
+  atomic_thread_fence_release ();
   ns->_ns_main_searchlist->r_nlist = new_nlist;
 }
 
@@ -342,7 +342,7 @@  update_scopes (struct link_map *new)
 	     might use the new last element and then use the garbage
 	     at offset IDX+1.  */
 	  imap->l_scope[cnt + 1] = NULL;
-	  atomic_write_barrier ();
+	  atomic_thread_fence_release ();
 	  imap->l_scope[cnt] = &new->l_searchlist;
 
 	  from_scope = cnt;
diff --git a/include/list.h b/include/list.h
index 7bea2c50a3759c0c8640971eff1e80874e3b543f..31a8a93fa2491b38c368b9e07dd65c473e4eb19f 100644
--- a/include/list.h
+++ b/include/list.h
@@ -43,7 +43,7 @@  list_add (list_t *newp, list_t *head)
   newp->next = head->next;
   newp->prev = head;
   head->next->prev = newp;
-  atomic_write_barrier ();
+  atomic_thread_fence_release ();
   head->next = newp;
 }
 
diff --git a/malloc/arena.c b/malloc/arena.c
index 074ecbc09f77b566df442d099b48444f2a22fa5c..36786530edafe02cab0177b6afb8bcbe32323a96 100644
--- a/malloc/arena.c
+++ b/malloc/arena.c
@@ -111,7 +111,7 @@  static mstate free_list;
    malloc_state objects.
 
    Read access to the next member is supposed to synchronize with the
-   atomic_write_barrier and the write to the next member in
+   atomic_thread_fence_release and the write to the next member in
    _int_new_arena.  This suffers from data races; see the FIXME
    comments in _int_new_arena and reused_arena.
 
@@ -770,7 +770,7 @@  _int_new_arena (size_t size)
   /* FIXME: The barrier is an attempt to synchronize with read access
      in reused_arena, which does not acquire list_lock while
      traversing the list.  */
-  atomic_write_barrier ();
+  atomic_thread_fence_release ();
   main_arena.next = a;
 
   __libc_lock_unlock (list_lock);
diff --git a/manual/llio.texi b/manual/llio.texi
index 1b801ee817db2935d8866894be23ffa516690ca3..eb8711a0f8e28281baf68206b7891f16f662de0b 100644
--- a/manual/llio.texi
+++ b/manual/llio.texi
@@ -2543,14 +2543,14 @@  aiocb64}, since the LFS transparently replaces the old interface.
 @c    deallocate_stack @asulock @ascuheap @aculock @acsmem
 @c     lll_lock (state_cache_lock) @asulock @aculock
 @c     stack_list_del ok
-@c      atomic_write_barrier ok
+@c      atomic_thread_fence_release ok
 @c      list_del ok
-@c      atomic_write_barrier ok
+@c      atomic_thread_fence_release ok
 @c     queue_stack @ascuheap @acsmem
 @c      stack_list_add ok
-@c       atomic_write_barrier ok
+@c       atomic_thread_fence_release ok
 @c       list_add ok
-@c       atomic_write_barrier ok
+@c       atomic_thread_fence_release ok
 @c      free_stacks @ascuheap @acsmem
 @c       list_for_each_prev_safe ok
 @c       list_entry ok
diff --git a/manual/memory.texi b/manual/memory.texi
index 9d3398a326300f9ae693fe00ff3fecaff5fce014..f69824841e5e352e8562f87b6c5dd4d96424f59f 100644
--- a/manual/memory.texi
+++ b/manual/memory.texi
@@ -395,7 +395,7 @@  this function is in @file{stdlib.h}.
 @c     mutex_init ok
 @c     mutex_lock (just-created mutex) ok, returns locked
 @c     mutex_lock (list_lock) dup @asulock @aculock
-@c     atomic_write_barrier ok
+@c     atomic_thread_fence_release ok
 @c     mutex_unlock (list_lock) @aculock
 @c    catomic_decrement ok
 @c    reused_arena @asulock @aculock
diff --git a/manual/startup.texi b/manual/startup.texi
index 9bf24123f562f75ba27a4770c69147e003b94755..4c7c2976a92d772909203bca028347e8d798b2d7 100644
--- a/manual/startup.texi
+++ b/manual/startup.texi
@@ -947,7 +947,7 @@  using @code{atexit} or @code{on_exit}.
 @c     __libc_lock_lock @asulock @aculock
 @c     calloc dup @ascuheap @acsmem
 @c     __libc_lock_unlock @aculock
-@c    atomic_write_barrier dup ok
+@c    atomic_thread_fence_release dup ok
 The @code{atexit} function registers the function @var{function} to be
 called at normal program termination.  The @var{function} is called with
 no arguments.
@@ -961,7 +961,7 @@  the function cannot be registered.
 @safety{@prelim{}@mtsafe{}@asunsafe{@ascuheap{} @asulock{}}@acunsafe{@aculock{} @acsmem{}}}
 @c on_exit @ascuheap @asulock @aculock @acsmem
 @c  new_exitfn dup @ascuheap @asulock @aculock @acsmem
-@c  atomic_write_barrier dup ok
+@c  atomic_thread_fence_release dup ok
 This function is a somewhat more powerful variant of @code{atexit}.  It
 accepts two arguments, a function @var{function} and an arbitrary
 pointer @var{arg}.  At normal program termination, the @var{function} is
diff --git a/nptl/nptl-stack.c b/nptl/nptl-stack.c
index 20ce78eddbf100833d453d7032f63bc2ba8f01c7..7c04e7faaae5c15bf5ad98c32935bcba4849c1c1 100644
--- a/nptl/nptl-stack.c
+++ b/nptl/nptl-stack.c
@@ -27,11 +27,11 @@  __nptl_stack_list_del (list_t *elem)
 {
   GL (dl_in_flight_stack) = (uintptr_t) elem;
 
-  atomic_write_barrier ();
+  atomic_thread_fence_release ();
 
   list_del (elem);
 
-  atomic_write_barrier ();
+  atomic_thread_fence_release ();
 
   GL (dl_in_flight_stack) = 0;
 }
@@ -42,11 +42,11 @@  __nptl_stack_list_add (list_t *elem, list_t *list)
 {
   GL (dl_in_flight_stack) = (uintptr_t) elem | 1;
 
-  atomic_write_barrier ();
+  atomic_thread_fence_release ();
 
   list_add (elem, list);
 
-  atomic_write_barrier ();
+  atomic_thread_fence_release ();
 
   GL (dl_in_flight_stack) = 0;
 }
diff --git a/nptl/pthread_mutex_setprioceiling.c b/nptl/pthread_mutex_setprioceiling.c
index 2d71a750c8981e8ca271c265031887e2c510583a..b574a77250664bbb1487c932b9a32a9dee415072 100644
--- a/nptl/pthread_mutex_setprioceiling.c
+++ b/nptl/pthread_mutex_setprioceiling.c
@@ -113,7 +113,7 @@  __pthread_mutex_setprioceiling (pthread_mutex_t *mutex, int prioceiling,
     newlock = (mutex->__data.__lock & ~PTHREAD_MUTEX_PRIO_CEILING_MASK);
   mutex->__data.__lock = newlock
 			 | (prioceiling << PTHREAD_MUTEX_PRIO_CEILING_SHIFT);
-  atomic_full_barrier ();
+  atomic_thread_fence_seq_cst ();
 
   futex_wake ((unsigned int *)&mutex->__data.__lock, INT_MAX,
 	      PTHREAD_MUTEX_PSHARED (mutex));
diff --git a/nptl/sem_post.c b/nptl/sem_post.c
index 7ec21e92eb4c71d7f17764e96bc7603837f7522d..d4e37cb0888cb1004881e608b82147900bc420a5 100644
--- a/nptl/sem_post.c
+++ b/nptl/sem_post.c
@@ -90,7 +90,7 @@  __old_sem_post (sem_t *sem)
 
   /* We must need to synchronize with consumers of this token, so the atomic
      increment must have release MO semantics.  */
-  atomic_write_barrier ();
+  atomic_thread_fence_release ();
   atomic_fetch_add_release (futex, 1);
   /* We always have to assume it is a shared semaphore.  */
   futex_wake (futex, 1, LLL_SHARED);
diff --git a/stdlib/msort.c b/stdlib/msort.c
index cbe9a4a8fdb38113a4c18976c9f297be103d458f..e2f1eca94ad7e9005145c376b0de3dbd1ca14f18 100644
--- a/stdlib/msort.c
+++ b/stdlib/msort.c
@@ -197,7 +197,7 @@  __qsort_r (void *b, size_t n, size_t s, __compar_d_fn_t cmp, void *arg)
 	  phys_pages /= 4;
 
 	  /* Make sure phys_pages is written to memory.  */
-	  atomic_write_barrier ();
+	  atomic_thread_fence_release ();
 
 	  pagesize = __sysconf (_SC_PAGESIZE);
 	}
diff --git a/sysdeps/aarch64/nptl/tls.h b/sysdeps/aarch64/nptl/tls.h
index 08aa2eff891b7be32243e9955d998892807c7b2e..0e5b4ece6a118b4b066bd2fd024dc85e978cc786 100644
--- a/sysdeps/aarch64/nptl/tls.h
+++ b/sysdeps/aarch64/nptl/tls.h
@@ -108,7 +108,7 @@  typedef struct
   do									     \
     {									     \
       THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED;	     \
-      atomic_write_barrier ();						     \
+      atomic_thread_fence_release ();					     \
     }									     \
   while (0)
 
diff --git a/sysdeps/alpha/nptl/tls.h b/sysdeps/alpha/nptl/tls.h
index 8f5b69ad3b1b0c557fa1bae55278547572a374cc..914dba422c50e4531d22eb459b41c8b958a75263 100644
--- a/sysdeps/alpha/nptl/tls.h
+++ b/sysdeps/alpha/nptl/tls.h
@@ -105,7 +105,7 @@  typedef struct
   do									     \
     {									     \
       THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED;	     \
-      atomic_write_barrier ();						     \
+      atomic_thread_fence_release ();					     \
     }									     \
   while (0)
 
diff --git a/sysdeps/arc/nptl/tls.h b/sysdeps/arc/nptl/tls.h
index 7fc6602b236fa2455f8de4a0540442ae85d27c98..b2749f81d2980502043f507bf7c81da48f17aa9f 100644
--- a/sysdeps/arc/nptl/tls.h
+++ b/sysdeps/arc/nptl/tls.h
@@ -113,7 +113,7 @@  typedef struct
   do									     \
     {									     \
       THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED;	     \
-      atomic_write_barrier ();						     \
+      atomic_thread_fence_release ();					     \
     }									     \
   while (0)
 
diff --git a/sysdeps/arm/nptl/tls.h b/sysdeps/arm/nptl/tls.h
index 7657ca3dccc2d929c71236d42fc060a4b4902e2b..b1389ba034966aff17692f2b6d0e7b04a0baf9a0 100644
--- a/sysdeps/arm/nptl/tls.h
+++ b/sysdeps/arm/nptl/tls.h
@@ -99,7 +99,7 @@  typedef struct
   do									     \
     {									     \
       THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED;	     \
-      atomic_write_barrier ();						     \
+      atomic_thread_fence_release ();					     \
     }									     \
   while (0)
 
diff --git a/sysdeps/csky/nptl/tls.h b/sysdeps/csky/nptl/tls.h
index 58d6ab0fb2ae90de50cffd5b4a98426c6a793050..ac54606c3c0e28c1c8d57a6475d9249ca3566abe 100644
--- a/sysdeps/csky/nptl/tls.h
+++ b/sysdeps/csky/nptl/tls.h
@@ -128,7 +128,7 @@  typedef struct
   do									      \
     {									      \
       THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED;	      \
-      atomic_write_barrier ();						      \
+      atomic_thread_fence_release ();					      \
     }									      \
   while (0)
 
diff --git a/sysdeps/generic/malloc-machine.h b/sysdeps/generic/malloc-machine.h
index 001a8e7e606c584dabacc9cbf6713f137bb9b4a7..ebd6983ecc14b5b314f457fc1766a9f86561d32f 100644
--- a/sysdeps/generic/malloc-machine.h
+++ b/sysdeps/generic/malloc-machine.h
@@ -22,18 +22,6 @@ 
 
 #include <atomic.h>
 
-#ifndef atomic_full_barrier
-# define atomic_full_barrier() __asm ("" ::: "memory")
-#endif
-
-#ifndef atomic_read_barrier
-# define atomic_read_barrier() atomic_full_barrier ()
-#endif
-
-#ifndef atomic_write_barrier
-# define atomic_write_barrier() atomic_full_barrier ()
-#endif
-
 #ifndef DEFAULT_TOP_PAD
 # define DEFAULT_TOP_PAD 131072
 #endif
diff --git a/sysdeps/hppa/dl-fptr.c b/sysdeps/hppa/dl-fptr.c
index 9ed21602d6155d4b960278f8d1fac4ffa885b9d5..97882a8106a719642b2778d3ca8a69ef202cce2d 100644
--- a/sysdeps/hppa/dl-fptr.c
+++ b/sysdeps/hppa/dl-fptr.c
@@ -371,7 +371,7 @@  _dl_lookup_address (const void *address)
 
   /* First load the relocation offset.  */
   reloc_arg = (ElfW(Word)) desc[1];
-  atomic_full_barrier();
+  atomic_thread_fence_seq_cst ();
 
   /* Then load first word of candidate descriptor.  It should be a pointer
      with word alignment and point to memory that can be read.  */
diff --git a/sysdeps/hppa/dl-machine.h b/sysdeps/hppa/dl-machine.h
index 1d5194856601e025cb4355c94c2b49358fc81076..7a729de7647db626e74c513b5819dddeac4c30c8 100644
--- a/sysdeps/hppa/dl-machine.h
+++ b/sysdeps/hppa/dl-machine.h
@@ -136,7 +136,7 @@  elf_machine_fixup_plt (struct link_map *map, lookup_t t,
       /* Need to ensure that the gp is visible before the code
          entry point is updated */
       rfdesc[1] = value.gp;
-      atomic_full_barrier();
+      atomic_thread_fence_seq_cst ();
       rfdesc[0] = value.ip;
     }
   else
diff --git a/sysdeps/hppa/nptl/tls.h b/sysdeps/hppa/nptl/tls.h
index e6b0bd5c7182b497aaf0d2bb08f62551a223c403..5b2495637b8aec9df3c8b3d47a2b46aa632c84da 100644
--- a/sysdeps/hppa/nptl/tls.h
+++ b/sysdeps/hppa/nptl/tls.h
@@ -133,7 +133,7 @@  static inline void __set_cr27(struct pthread *cr27)
   do									     \
     {									     \
       THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED;	     \
-      atomic_write_barrier ();						     \
+      atomic_thread_fence_release ();					     \
     }									     \
   while (0)
 
diff --git a/sysdeps/htl/pt-once.c b/sysdeps/htl/pt-once.c
index b85b196645958fc7f47b08b39e91077b82817cdc..55db6c3d7176a9ca6cf2d0caccc1cf01aeb5ea2f 100644
--- a/sysdeps/htl/pt-once.c
+++ b/sysdeps/htl/pt-once.c
@@ -33,7 +33,7 @@  __pthread_once (pthread_once_t *once_control, void (*init_routine) (void))
 {
   ASSERT_TYPE_SIZE (pthread_once_t, __SIZEOF_PTHREAD_ONCE_T);
 
-  atomic_full_barrier ();
+  atomic_thread_fence_seq_cst ();
   if (once_control->__run == 0)
     {
       __pthread_spin_wait (&once_control->__lock);
@@ -44,7 +44,7 @@  __pthread_once (pthread_once_t *once_control, void (*init_routine) (void))
 	  init_routine ();
 	  pthread_cleanup_pop (0);
 
-	  atomic_full_barrier ();
+	  atomic_thread_fence_seq_cst ();
 	  once_control->__run = 1;
 	}
 
diff --git a/sysdeps/ia64/nptl/tls.h b/sysdeps/ia64/nptl/tls.h
index d2411b3c1ac29733c0bb3683d83388e2e0e8e277..7709e644ee04ebc935dc659806481eebcb4129f0 100644
--- a/sysdeps/ia64/nptl/tls.h
+++ b/sysdeps/ia64/nptl/tls.h
@@ -157,7 +157,7 @@  register struct pthread *__thread_self __asm__("r13");
   do									     \
     {									     \
       THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED;	     \
-      atomic_write_barrier ();						     \
+      atomic_thread_fence_release ();					     \
     }									     \
   while (0)
 
diff --git a/sysdeps/loongarch/nptl/tls.h b/sysdeps/loongarch/nptl/tls.h
index 24bffe3a0adb91cda193463e2d0aa7d5a4d61b6f..65ae94cedf379d603d9a0c547f0cca3c4e272351 100644
--- a/sysdeps/loongarch/nptl/tls.h
+++ b/sysdeps/loongarch/nptl/tls.h
@@ -129,7 +129,7 @@  typedef struct
   do \
     { \
       THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED; \
-      atomic_write_barrier (); \
+      atomic_thread_fence_release (); \
     } \
   while (0)
 
diff --git a/sysdeps/m68k/nptl/tls.h b/sysdeps/m68k/nptl/tls.h
index 742e1b6767d99fa6011ac1d207264c7b82e53787..dfba7a568016b8e10dac6c21d65c785eaab12a09 100644
--- a/sysdeps/m68k/nptl/tls.h
+++ b/sysdeps/m68k/nptl/tls.h
@@ -132,7 +132,7 @@  extern void * __m68k_read_tp (void);
   do									\
     {									\
       THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED;	\
-      atomic_write_barrier ();						\
+      atomic_thread_fence_release ();					\
     }									\
   while (0)
 
diff --git a/sysdeps/mach/hurd/htl/pt-mutex-destroy.c b/sysdeps/mach/hurd/htl/pt-mutex-destroy.c
index 71f789cbdfa20b79f9e3cef5a2523d4e243b4f19..feb9085af9499b5d954a1bf4f04d9cbcb804e8cf 100644
--- a/sysdeps/mach/hurd/htl/pt-mutex-destroy.c
+++ b/sysdeps/mach/hurd/htl/pt-mutex-destroy.c
@@ -26,7 +26,7 @@ 
 int
 __pthread_mutex_destroy (pthread_mutex_t *mtxp)
 {
-  atomic_read_barrier ();
+  atomic_thread_fence_acquire ();
   if (*(volatile unsigned int *) &mtxp->__lock != 0)
     return EBUSY;
 
diff --git a/sysdeps/mach/hurd/htl/pt-mutex.h b/sysdeps/mach/hurd/htl/pt-mutex.h
index 4021e72a6e8d15316336296ff732a4e7fd1acdff..ebdf8a5fbde0f755c7625cd38ad185ee0c977b5f 100644
--- a/sysdeps/mach/hurd/htl/pt-mutex.h
+++ b/sysdeps/mach/hurd/htl/pt-mutex.h
@@ -54,7 +54,7 @@ 
           if (ret == EOWNERDEAD)   \
             {   \
               mtxp->__lock = mtxp->__lock | LLL_DEAD_OWNER;   \
-              atomic_write_barrier ();   \
+              atomic_thread_fence_release ();   \
             }   \
         }   \
     }   \
diff --git a/sysdeps/microblaze/nptl/tls.h b/sysdeps/microblaze/nptl/tls.h
index 588fd1c5d63ee4e6a1b284cc19e216b6730a2091..30e5d628be8b78cf9c7b8e9386ab1b2355819f4a 100644
--- a/sysdeps/microblaze/nptl/tls.h
+++ b/sysdeps/microblaze/nptl/tls.h
@@ -110,7 +110,7 @@  typedef struct
   do                                                                        \
     {                                                                       \
       THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED;            \
-      atomic_write_barrier ();                                              \
+      atomic_thread_fence_release ();                                       \
     }                                                                       \
   while (0)
 
diff --git a/sysdeps/mips/nptl/tls.h b/sysdeps/mips/nptl/tls.h
index 2aa7cb4bb8d0b5a31889aa33d5751104ff1e4f45..e4c5d2a876db7943b38daa270f4681d17b441c58 100644
--- a/sysdeps/mips/nptl/tls.h
+++ b/sysdeps/mips/nptl/tls.h
@@ -160,7 +160,7 @@  typedef struct
   do									     \
     {									     \
       THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED;	     \
-      atomic_write_barrier ();						     \
+      atomic_thread_fence_release ();					     \
     }									     \
   while (0)
 
diff --git a/sysdeps/nios2/nptl/tls.h b/sysdeps/nios2/nptl/tls.h
index cb231e2a4bbfa52495c4c017a7d3e1c6dd7937ca..50dbbef119af30112136a9bef187c79037d0849d 100644
--- a/sysdeps/nios2/nptl/tls.h
+++ b/sysdeps/nios2/nptl/tls.h
@@ -140,7 +140,7 @@  register struct pthread *__thread_self __asm__("r23");
   do									     \
     {									     \
       THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED;	     \
-      atomic_write_barrier ();						     \
+      atomic_thread_fence_release ();					     \
     }									     \
   while (0)
 
diff --git a/sysdeps/or1k/nptl/tls.h b/sysdeps/or1k/nptl/tls.h
index e82f444738de222c0e4866d5a2ab8191ce99ddc9..886c017be12d06ee63198a36917c59e24be77a60 100644
--- a/sysdeps/or1k/nptl/tls.h
+++ b/sysdeps/or1k/nptl/tls.h
@@ -175,7 +175,7 @@  register tcbhead_t *__thread_self __asm__("r10");
   do									\
     {									\
       THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED;	\
-      atomic_write_barrier ();						\
+      atomic_thread_fence_release ();					\
     }									\
   while (0)
 
diff --git a/sysdeps/powerpc/nptl/tls.h b/sysdeps/powerpc/nptl/tls.h
index e62a96238aa95c79ac1f749b4dbf03985b6e15d4..c8d233a7347f609b4cdbffb5dafa2f55e18ac18e 100644
--- a/sysdeps/powerpc/nptl/tls.h
+++ b/sysdeps/powerpc/nptl/tls.h
@@ -224,7 +224,7 @@  typedef struct
   do									     \
     {									     \
       THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED;	     \
-      atomic_write_barrier ();						     \
+      atomic_thread_fence_release ();					     \
     }									     \
   while (0)
 
diff --git a/sysdeps/riscv/nptl/tls.h b/sysdeps/riscv/nptl/tls.h
index 700c2f51899b0385d7ebaa4810c84de4fa6f2b45..020a986ceee89e1feb8f76c51f224a8faea71bbb 100644
--- a/sysdeps/riscv/nptl/tls.h
+++ b/sysdeps/riscv/nptl/tls.h
@@ -123,7 +123,7 @@  typedef struct
   do									     \
     {									     \
       THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED;	     \
-      atomic_write_barrier ();						     \
+      atomic_thread_fence_release ();					     \
     }									     \
   while (0)
 
diff --git a/sysdeps/s390/nptl/tls.h b/sysdeps/s390/nptl/tls.h
index 98d7870148ce6bc1d6397b1465dfabe96f7280b2..f1664d9ade6fd562db38c3dddd3fa6237a47faea 100644
--- a/sysdeps/s390/nptl/tls.h
+++ b/sysdeps/s390/nptl/tls.h
@@ -167,7 +167,7 @@  typedef struct
   do									     \
     {									     \
       THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED;	     \
-      atomic_write_barrier ();						     \
+      atomic_thread_fence_release ();					     \
     }									     \
   while (0)
 
diff --git a/sysdeps/sh/nptl/tls.h b/sysdeps/sh/nptl/tls.h
index 1530489a6ce4286bc5146e6cd83e3b463b965467..00ae1b998b9e0b1e6c347c4be4e99a90e530e924 100644
--- a/sysdeps/sh/nptl/tls.h
+++ b/sysdeps/sh/nptl/tls.h
@@ -139,7 +139,7 @@  typedef struct
   do									     \
     {									     \
       THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED;	     \
-      atomic_write_barrier ();						     \
+      atomic_thread_fence_release ();					     \
     }									     \
   while (0)
 
diff --git a/sysdeps/sparc/nptl/tls.h b/sysdeps/sparc/nptl/tls.h
index 95a69cb8249dc79c3a063637a21d976d2660c48f..bc7ada0d3fe66751506e1cf5516f14ec2c205af8 100644
--- a/sysdeps/sparc/nptl/tls.h
+++ b/sysdeps/sparc/nptl/tls.h
@@ -140,7 +140,7 @@  register struct pthread *__thread_self __asm__("%g7");
   do									     \
     {									     \
       THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED;	     \
-      atomic_write_barrier ();						     \
+      atomic_thread_fence_release ();					     \
     }									     \
   while (0)