libstdc++: Clear padding bits in atomic compare_exchange

Message ID 20210927141031.651313-1-rodgert@appliantology.com
State Committed
Delegated to: Jonathan Wakely
Headers
Series libstdc++: Clear padding bits in atomic compare_exchange |

Commit Message

Thomas Rodgers Sept. 27, 2021, 2:10 p.m. UTC
  From: Thomas Rodgers <rodgert@twrodgers.com>

Now with checks for __has_builtin(__builtin_clear_padding)

This change implements P0528 which requires that padding bits not
participate in atomic compare exchange operations. All arguments to the
generic template are 'sanitized' by the __builtin_clearpadding intrisic
before they are used in comparisons. This alrequires that any stores
also sanitize the incoming value.

Signed-off-by: Thomas Rodgers <trodgers@redhat.com>

libstdc++=v3/ChangeLog:

	* include/std/atomic (atomic<T>::atomic(_Tp) clear padding for
	__cplusplus > 201703L.
	(atomic<T>::store()) Clear padding.
	(atomic<T>::exchange()) Likewise.
	(atomic<T>::compare_exchange_weak()) Likewise.
	(atomic<T>::compare_exchange_strong()) Likewise.
	* testsuite/29_atomics/atomic/compare_exchange_padding.cc: New
	test.
---
 libstdc++-v3/include/std/atomic               | 41 +++++++++++++++++-
 .../atomic/compare_exchange_padding.cc        | 42 +++++++++++++++++++
 2 files changed, 81 insertions(+), 2 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/29_atomics/atomic/compare_exchange_padding.cc
  

Comments

Jonathan Wakely Sept. 29, 2021, 12:13 p.m. UTC | #1
On Mon, 27 Sept 2021 at 15:11, Thomas Rodgers <rodgert@appliantology.com> wrote:
>
> From: Thomas Rodgers <rodgert@twrodgers.com>
>
> Now with checks for __has_builtin(__builtin_clear_padding)
>
> This change implements P0528 which requires that padding bits not
> participate in atomic compare exchange operations. All arguments to the
> generic template are 'sanitized' by the __builtin_clearpadding intrisic
> before they are used in comparisons. This alrequires that any stores
> also sanitize the incoming value.
>
> Signed-off-by: Thomas Rodgers <trodgers@redhat.com>
>
> libstdc++=v3/ChangeLog:
>
>         * include/std/atomic (atomic<T>::atomic(_Tp) clear padding for
>         __cplusplus > 201703L.
>         (atomic<T>::store()) Clear padding.
>         (atomic<T>::exchange()) Likewise.
>         (atomic<T>::compare_exchange_weak()) Likewise.
>         (atomic<T>::compare_exchange_strong()) Likewise.

Don't we also need this for std::atomic_ref, i.e. for the
__atomic_impl free functions in <bits/atomic_base.h>?

There we don't have any distinction between atomic_ref<integral type>
and atomic_ref<struct with possible padding>, they both use the same
implementations. But I think that's OK, as I think the built-in is
smart enough to be a no-op for types with no padding.

>         * testsuite/29_atomics/atomic/compare_exchange_padding.cc: New
>         test.
> ---
>  libstdc++-v3/include/std/atomic               | 41 +++++++++++++++++-
>  .../atomic/compare_exchange_padding.cc        | 42 +++++++++++++++++++
>  2 files changed, 81 insertions(+), 2 deletions(-)
>  create mode 100644 libstdc++-v3/testsuite/29_atomics/atomic/compare_exchange_padding.cc
>
> diff --git a/libstdc++-v3/include/std/atomic b/libstdc++-v3/include/std/atomic
> index 936dd50ba1c..4ac9ccdc1ab 100644
> --- a/libstdc++-v3/include/std/atomic
> +++ b/libstdc++-v3/include/std/atomic
> @@ -228,7 +228,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>        atomic& operator=(const atomic&) = delete;
>        atomic& operator=(const atomic&) volatile = delete;
>
> -      constexpr atomic(_Tp __i) noexcept : _M_i(__i) { }
> +#if __cplusplus > 201703L && __has_builtin(__builtin_clear_padding)
> +      constexpr atomic(_Tp __i) noexcept : _M_i(__i)
> +      { __builtin_clear_padding(std::__addressof(_M_i)); }
> +#else
> +      constexpr atomic(_Tp __i) noexcept : _M_i(__i)
> +      { }
> +#endif

Please write this as a single function with the preprocessor
conditions in the body:

      constexpr atomic(_Tp __i) noexcept : _M_i(__i)
      {
#if __cplusplus > 201703L && __has_builtin(__builtin_clear_padding)
        __builtin_clear_padding(std::__addressof(_M_i)); }
#endif
      }

This not only avoids duplication of the identical parts, but it avoids
warnings from ld.gold if you use --detect-odr-violations. Otherwise,
the linker can see a definition of that constructor on two different
lines (233 and 236), and so warns about possible ODR violations,
something like "warning: while linking foo: symbol
'std::atomic<int>::atomic(int)' defined in multiple places (possible
ODR violation): ...atomic:233 ... atomic:236"

Can't we clear the padding for >= 201402L instead of only C++20? Only
C++11 has a problem with the built-in in a constexpr function, right?
So we can DTRT for C++14 upwards.


>
>        operator _Tp() const noexcept
>        { return load(); }
> @@ -268,12 +274,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>        void
>        store(_Tp __i, memory_order __m = memory_order_seq_cst) noexcept
>        {
> +#if __has_builtin(__builtin_clear_padding)
> +       __builtin_clear_padding(std::__addressof(__i));
> +#endif

We repeat this *a lot*. When I started work on this I defined a
non-member function in the __atomic_impl namespace:

    template<typename _Tp>
      _GLIBCXX_ALWAYS_INLINE void
      __clear_padding(_Tp& __val) noexcept
      {
#if __has_builtin(__builtin_clear_padding)
       __builtin_clear_padding(std::__addressof(__val));
#endif
      }

Then you can just use that everywhere (except the constexpr
constructor), without all the #if checks.



>         __atomic_store(std::__addressof(_M_i), std::__addressof(__i), int(__m));
>        }
>
>        void
>        store(_Tp __i, memory_order __m = memory_order_seq_cst) volatile noexcept
>        {
> +#if __has_builtin(__builtin_clear_padding)
> +       __builtin_clear_padding(std::__addressof(__i));
> +#endif
>         __atomic_store(std::__addressof(_M_i), std::__addressof(__i), int(__m));
>        }
>
> @@ -300,6 +312,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>        {
>          alignas(_Tp) unsigned char __buf[sizeof(_Tp)];
>         _Tp* __ptr = reinterpret_cast<_Tp*>(__buf);
> +#if __has_builtin(__builtin_clear_padding)
> +       __builtin_clear_padding(std::__addressof(__i));
> +#endif
>         __atomic_exchange(std::__addressof(_M_i), std::__addressof(__i),
>                           __ptr, int(__m));
>         return *__ptr;
> @@ -311,6 +326,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>        {
>          alignas(_Tp) unsigned char __buf[sizeof(_Tp)];
>         _Tp* __ptr = reinterpret_cast<_Tp*>(__buf);
> +#if __has_builtin(__builtin_clear_padding)
> +       __builtin_clear_padding(std::__addressof(__i));
> +#endif
>         __atomic_exchange(std::__addressof(_M_i), std::__addressof(__i),
>                           __ptr, int(__m));
>         return *__ptr;
> @@ -322,6 +340,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>        {
>         __glibcxx_assert(__is_valid_cmpexch_failure_order(__f));
>
> +#if __has_builtin(__builtin_clear_padding)
> +       __builtin_clear_padding(std::__addressof(__e));

This unconditionally clears the padding of __e, which I don't think is
allowed. It potentially introduces a data race if another thread is
doing the CAS at the same time, and the program assumes that only the
CAS that fails will update expected.

See the thread I started at https://lists.isocpp.org/parallel/2020/12/3443.php
("atomic compare_exchange and padding bits", 2020-12-03)

The conclusion was that writing to __e is not allowed in the failure
case, so you need to make a copy of it (into a buffer, using memcpy),
then clear the padding in the copy, then try the
__atomic_compare_exchange and if it fails, copy back from the buffer
to __e. If all that extra work doesn't get inlined then we want to
only do it for types which might have padding bits, so I had
__atomic_impl::__maybe_has_padding in my unfinished patch:

   template<typename _Tp>
     constexpr bool
     __maybe_has_padding()
     {
#if __has_builtin(__has_unique_object_representations)
      return !__has_unique_object_representations(_Tp);
#else
      return true;
#endif
     }

The MSVC implementation uses !__has_unique_object_representations(_Tp)
&& !is_floating_point<_Tp>::value here, which is better than mine
above (FP types don't have unique object reps, but also don't have
padding bits).

And then do something like this in compare_exchange_weak:


+      {
+#if __has_builtin(__builtin_clear_padding)
+       if _GLIBCXX_CONSTEXPR17 (__maybe_has_padding<_Tp>())
+         {
+           _Val<_Tp> __expected0 = __expected; // XXX should use memcpy
+           auto* __exp = __atomic_impl::__clear_padding(__expected0);
+           auto* __des = __atomic_impl::__clear_padding(__desired);
+           if (__atomic_compare_exchange(__ptr, __exp, __des, true,
+                                         int(__success), int(__failure)))
+             return true;
+           __builtin_memcpy(std::__addressof(__expected), __exp, sizeof(_Tp));
+           return false;
+         }
+#endif
       return __atomic_compare_exchange(__ptr, std::__addressof(__expected),

And similarly for compare_exchange_strong (or refactor them into one
function that takes a bool for weak/strong).

If you do all that in __atomic_impl::compare_exchange_weak (making it
take a bool for weak/strong) then you can reuse it from
__atomic_impl:compare_exchange_strong, and then change the gneric
atomic<T>::compare_exchange_{weak,strong} to use that as well.




> diff --git a/libstdc++-v3/testsuite/29_atomics/atomic/compare_exchange_padding.cc b/libstdc++-v3/testsuite/29_atomics/atomic/compare_exchange_padding.cc
> new file mode 100644
> index 00000000000..0875f168097
> --- /dev/null
> +++ b/libstdc++-v3/testsuite/29_atomics/atomic/compare_exchange_padding.cc
> @@ -0,0 +1,42 @@
> +// { dg-options "-std=gnu++2a" }
> +// { dg-do run { target c++2a } }

We can (and should) use "20" not "2a".

Does it need to be C++20 though, aren't all the clearings that are
being tested going to happen unconditionally? (well ... as long as the
builtin exists, which is true for GCC).

> +// { dg-add-options libatomic }
> +
> +#include <atomic>
> +
> +#include <testsuite_hooks.h>
> +
> +struct S { char c; short s; };
> +
> +void __attribute__((noinline,noipa))
> +fill_struct(S& s)
> +{ __builtin_memset(&s, 0xff, sizeof(S)); }
> +
> +bool
> +compare_struct(const S& a, const S& b)
> +{ return __builtin_memcmp(&a, &b, sizeof(S)) == 0; }
> +
> +int
> +main ()
> +{
> +  S s;
> +  fill_struct(s);
> +  s.c = 'a';
> +  s.s = 42;
> +
> +  std::atomic<S> as{ s };
> +  auto ts = as.load();
> +  VERIFY( !compare_struct(s, ts) ); // padding cleared on construction
> +  as.exchange(s);
> +  auto es = as.load();
> +  VERIFY( compare_struct(ts, es) ); // padding cleared on exchange
> +
> +  S n;
> +  fill_struct(n);
> +  n.c = 'b';
> +  n.s = 71;
> +  // padding cleared on compexchg
> +  VERIFY( as.compare_exchange_weak(s, n) );

Is it safe assume this won't fail spuriously? There is only one thread
doing the RMW operation, is that enough to avoid spurious failures?

> +  VERIFY( as.compare_exchange_strong(n, s) );
> +  return 0;
> +}
> --
> 2.31.1
>
  
Jonathan Wakely Sept. 29, 2021, 12:18 p.m. UTC | #2
On Wed, 29 Sept 2021 at 13:13, Jonathan Wakely wrote:
> We repeat this *a lot*. When I started work on this I defined a
> non-member function in the __atomic_impl namespace:

I've attached my incomplete patch from when I was working on this.
diff --cc libstdc++-v3/include/bits/atomic_base.h
index 71e1de078b5,35023ad30a8..00000000000
--- a/libstdc++-v3/include/bits/atomic_base.h
+++ b/libstdc++-v3/include/bits/atomic_base.h
@@@ -944,6 -952,28 +944,27 @@@ _GLIBCXX_BEGIN_NAMESPACE_VERSIO
      template<typename _Tp>
        using _Val = remove_volatile_t<_Tp>;
  
+     template<typename _Tp>
+       constexpr bool
+       __maybe_has_padding()
+       {
 -#if _GLIBCXX_HAVE_BUILTIN_HAS_UNIQ_OBJ_REP
 -	return !__has_unique_object_representations(_Tp);
++#if __has_builtin(__has_unique_object_representations)
++	return !__has_unique_object_representations(_Tp)
++	  && !is_floating_point<_Tp>::value;
+ #else
+ 	return true;
+ #endif
+       }
+ 
+     template<typename _Tp>
+       _GLIBCXX_ALWAYS_INLINE void
+       __clear_padding(_Tp& __val) noexcept
+       {
 -	auto* __ptr = std::__addressof(__val);
+ #if __has_builtin(__builtin_clear_padding)
 -	__builtin_clear_padding(__ptr);
++	__builtin_clear_padding(std::__addressof(__val));
+ #endif
 -	return __ptr;
+       }
+ 
      // As above, but for difference_type arguments.
      template<typename _Tp>
        using _Diff = conditional_t<is_pointer_v<_Tp>, ptrdiff_t, _Val<_Tp>>;
@@@ -984,13 -1015,26 +1006,26 @@@
      template<typename _Tp>
        _GLIBCXX_ALWAYS_INLINE bool
        compare_exchange_weak(_Tp* __ptr, _Val<_Tp>& __expected,
- 			    _Val<_Tp> __desired, memory_order __success,
- 			    memory_order __failure) noexcept
+ 			    _Val<_Tp> __desired,
+ 			    memory_order __success, memory_order __failure,
+ 			    bool __weak = true) noexcept
        {
 +	__glibcxx_assert(__is_valid_cmpexch_failure_order(__failure));
- 
+ #if __has_builtin(__builtin_clear_padding)
+ 	if _GLIBCXX_CONSTEXPR17 (__maybe_has_padding<_Tp>())
+ 	  {
+ 	    _Val<_Tp> __expected0 = __expected;
+ 	    auto* __exp = __atomic_impl::__clear_padding(__expected0);
+ 	    auto* __des = __atomic_impl::__clear_padding(__desired);
+ 	    if (__atomic_compare_exchange(__ptr, __exp, __des, __weak,
+ 					  int(__success), int(__failure)))
+ 	      return true;
+ 	    __builtin_memcpy(std::__addressof(__expected), __exp, sizeof(_Tp));
+ 	    return false;
+ 	  }
 -	else
+ #endif
  	return __atomic_compare_exchange(__ptr, std::__addressof(__expected),
- 					 std::__addressof(__desired), true,
+ 					 std::__addressof(__desired), __weak,
  					 int(__success), int(__failure));
        }
  
@@@ -1000,11 -1044,8 +1035,9 @@@
  			      _Val<_Tp> __desired, memory_order __success,
  			      memory_order __failure) noexcept
        {
 +	__glibcxx_assert(__is_valid_cmpexch_failure_order(__failure));
- 
- 	return __atomic_compare_exchange(__ptr, std::__addressof(__expected),
- 					 std::__addressof(__desired), false,
- 					 int(__success), int(__failure));
+ 	return compare_exchange_weak(__ptr, __expected, __desired, __success,
+ 				     __failure, false);
        }
  
  #if __cpp_lib_atomic_wait
  
Jakub Jelinek Sept. 29, 2021, 12:28 p.m. UTC | #3
On Wed, Sep 29, 2021 at 01:13:46PM +0100, Jonathan Wakely via Gcc-patches wrote:
> But I think that's OK, as I think the built-in is
> smart enough to be a no-op for types with no padding.

Yes.  The only effect it will have is that during the initial optimization
passes the variable/parameter will be addressable where without the call and
__addressof it wouldn't be; but as soon as __builtin_clear_padding is folded
to nothing if there is no padding or something if there is (which happens
quite early) and TODO_update_address_taken is done at the end
of some pass, it will no longer be addressable (unless something different
keeps taking its address).

	Jakub
  
Thomas Rodgers Sept. 29, 2021, 6:22 p.m. UTC | #4
On Wed, Sep 29, 2021 at 5:14 AM Jonathan Wakely <jwakely@redhat.com> wrote:

> On Mon, 27 Sept 2021 at 15:11, Thomas Rodgers <rodgert@appliantology.com>
> wrote:
> >
> > From: Thomas Rodgers <rodgert@twrodgers.com>
> >
> > Now with checks for __has_builtin(__builtin_clear_padding)
> >
> > This change implements P0528 which requires that padding bits not
> > participate in atomic compare exchange operations. All arguments to the
> > generic template are 'sanitized' by the __builtin_clearpadding intrisic
> > before they are used in comparisons. This alrequires that any stores
> > also sanitize the incoming value.
> >
> > Signed-off-by: Thomas Rodgers <trodgers@redhat.com>
> >
> > libstdc++=v3/ChangeLog:
> >
> >         * include/std/atomic (atomic<T>::atomic(_Tp) clear padding for
> >         __cplusplus > 201703L.
> >         (atomic<T>::store()) Clear padding.
> >         (atomic<T>::exchange()) Likewise.
> >         (atomic<T>::compare_exchange_weak()) Likewise.
> >         (atomic<T>::compare_exchange_strong()) Likewise.
>
> Don't we also need this for std::atomic_ref, i.e. for the
> __atomic_impl free functions in <bits/atomic_base.h>?
>
> There we don't have any distinction between atomic_ref<integral type>
> and atomic_ref<struct with possible padding>, they both use the same
> implementations. But I think that's OK, as I think the built-in is
> smart enough to be a no-op for types with no padding.
>
> >         * testsuite/29_atomics/atomic/compare_exchange_padding.cc: New
> >         test.
> > ---
> >  libstdc++-v3/include/std/atomic               | 41 +++++++++++++++++-
> >  .../atomic/compare_exchange_padding.cc        | 42 +++++++++++++++++++
> >  2 files changed, 81 insertions(+), 2 deletions(-)
> >  create mode 100644
> libstdc++-v3/testsuite/29_atomics/atomic/compare_exchange_padding.cc
> >
> > diff --git a/libstdc++-v3/include/std/atomic
> b/libstdc++-v3/include/std/atomic
> > index 936dd50ba1c..4ac9ccdc1ab 100644
> > --- a/libstdc++-v3/include/std/atomic
> > +++ b/libstdc++-v3/include/std/atomic
> > @@ -228,7 +228,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >        atomic& operator=(const atomic&) = delete;
> >        atomic& operator=(const atomic&) volatile = delete;
> >
> > -      constexpr atomic(_Tp __i) noexcept : _M_i(__i) { }
> > +#if __cplusplus > 201703L && __has_builtin(__builtin_clear_padding)
> > +      constexpr atomic(_Tp __i) noexcept : _M_i(__i)
> > +      { __builtin_clear_padding(std::__addressof(_M_i)); }
> > +#else
> > +      constexpr atomic(_Tp __i) noexcept : _M_i(__i)
> > +      { }
> > +#endif
>
> Please write this as a single function with the preprocessor
> conditions in the body:
>
>       constexpr atomic(_Tp __i) noexcept : _M_i(__i)
>       {
> #if __cplusplus > 201703L && __has_builtin(__builtin_clear_padding)
>         __builtin_clear_padding(std::__addressof(_M_i)); }
> #endif
>       }
>
> This not only avoids duplication of the identical parts, but it avoids
> warnings from ld.gold if you use --detect-odr-violations. Otherwise,
> the linker can see a definition of that constructor on two different
> lines (233 and 236), and so warns about possible ODR violations,
> something like "warning: while linking foo: symbol
> 'std::atomic<int>::atomic(int)' defined in multiple places (possible
> ODR violation): ...atomic:233 ... atomic:236"
>
> Can't we clear the padding for >= 201402L instead of only C++20? Only
> C++11 has a problem with the built-in in a constexpr function, right?
> So we can DTRT for C++14 upwards.
>
>
We can, I was being conservative expecting guiding elvish feedback :)


>
> >
> >        operator _Tp() const noexcept
> >        { return load(); }
> > @@ -268,12 +274,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >        void
> >        store(_Tp __i, memory_order __m = memory_order_seq_cst) noexcept
> >        {
> > +#if __has_builtin(__builtin_clear_padding)
> > +       __builtin_clear_padding(std::__addressof(__i));
> > +#endif
>
> We repeat this *a lot*. When I started work on this I defined a
> non-member function in the __atomic_impl namespace:
>
>     template<typename _Tp>
>       _GLIBCXX_ALWAYS_INLINE void
>       __clear_padding(_Tp& __val) noexcept
>       {
> #if __has_builtin(__builtin_clear_padding)
>        __builtin_clear_padding(std::__addressof(__val));
> #endif
>       }
>
> Then you can just use that everywhere (except the constexpr
> constructor), without all the #if checks.
>
>
>
> >         __atomic_store(std::__addressof(_M_i), std::__addressof(__i),
> int(__m));
> >        }
> >
> >        void
> >        store(_Tp __i, memory_order __m = memory_order_seq_cst) volatile
> noexcept
> >        {
> > +#if __has_builtin(__builtin_clear_padding)
> > +       __builtin_clear_padding(std::__addressof(__i));
> > +#endif
> >         __atomic_store(std::__addressof(_M_i), std::__addressof(__i),
> int(__m));
> >        }
> >
> > @@ -300,6 +312,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >        {
> >          alignas(_Tp) unsigned char __buf[sizeof(_Tp)];
> >         _Tp* __ptr = reinterpret_cast<_Tp*>(__buf);
> > +#if __has_builtin(__builtin_clear_padding)
> > +       __builtin_clear_padding(std::__addressof(__i));
> > +#endif
> >         __atomic_exchange(std::__addressof(_M_i), std::__addressof(__i),
> >                           __ptr, int(__m));
> >         return *__ptr;
> > @@ -311,6 +326,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >        {
> >          alignas(_Tp) unsigned char __buf[sizeof(_Tp)];
> >         _Tp* __ptr = reinterpret_cast<_Tp*>(__buf);
> > +#if __has_builtin(__builtin_clear_padding)
> > +       __builtin_clear_padding(std::__addressof(__i));
> > +#endif
> >         __atomic_exchange(std::__addressof(_M_i), std::__addressof(__i),
> >                           __ptr, int(__m));
> >         return *__ptr;
> > @@ -322,6 +340,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >        {
> >         __glibcxx_assert(__is_valid_cmpexch_failure_order(__f));
> >
> > +#if __has_builtin(__builtin_clear_padding)
> > +       __builtin_clear_padding(std::__addressof(__e));
>
> This unconditionally clears the padding of __e, which I don't think is
> allowed. It potentially introduces a data race if another thread is
> doing the CAS at the same time, and the program assumes that only the
> CAS that fails will update expected.
>
> See the thread I started at
> https://lists.isocpp.org/parallel/2020/12/3443.php
> ("atomic compare_exchange and padding bits", 2020-12-03)
>
> The conclusion was that writing to __e is not allowed in the failure
> case, so you need to make a copy of it (into a buffer, using memcpy),
> then clear the padding in the copy, then try the
> __atomic_compare_exchange and if it fails, copy back from the buffer
> to __e. If all that extra work doesn't get inlined then we want to
> only do it for types which might have padding bits, so I had
> __atomic_impl::__maybe_has_padding in my unfinished patch:
>
>    template<typename _Tp>
>      constexpr bool
>      __maybe_has_padding()
>      {
> #if __has_builtin(__has_unique_object_representations)
>       return !__has_unique_object_representations(_Tp);
> #else
>       return true;
> #endif
>      }
>
> The MSVC implementation uses !__has_unique_object_representations(_Tp)
> && !is_floating_point<_Tp>::value here, which is better than mine
> above (FP types don't have unique object reps, but also don't have
> padding bits).
>
> And then do something like this in compare_exchange_weak:
>
>
> +      {
> +#if __has_builtin(__builtin_clear_padding)
> +       if _GLIBCXX_CONSTEXPR17 (__maybe_has_padding<_Tp>())
> +         {
> +           _Val<_Tp> __expected0 = __expected; // XXX should use memcpy
> +           auto* __exp = __atomic_impl::__clear_padding(__expected0);
> +           auto* __des = __atomic_impl::__clear_padding(__desired);
> +           if (__atomic_compare_exchange(__ptr, __exp, __des, true,
> +                                         int(__success), int(__failure)))
> +             return true;
> +           __builtin_memcpy(std::__addressof(__expected), __exp,
> sizeof(_Tp));
> +           return false;
> +         }
> +#endif
>        return __atomic_compare_exchange(__ptr,
> std::__addressof(__expected),
>
> And similarly for compare_exchange_strong (or refactor them into one
> function that takes a bool for weak/strong).
>
> If you do all that in __atomic_impl::compare_exchange_weak (making it
> take a bool for weak/strong) then you can reuse it from
> __atomic_impl:compare_exchange_strong, and then change the gneric
> atomic<T>::compare_exchange_{weak,strong} to use that as well.
>
>
>
>
> > diff --git
> a/libstdc++-v3/testsuite/29_atomics/atomic/compare_exchange_padding.cc
> b/libstdc++-v3/testsuite/29_atomics/atomic/compare_exchange_padding.cc
> > new file mode 100644
> > index 00000000000..0875f168097
> > --- /dev/null
> > +++
> b/libstdc++-v3/testsuite/29_atomics/atomic/compare_exchange_padding.cc
> > @@ -0,0 +1,42 @@
> > +// { dg-options "-std=gnu++2a" }
> > +// { dg-do run { target c++2a } }
>
> We can (and should) use "20" not "2a".
>
> Does it need to be C++20 though, aren't all the clearings that are
> being tested going to happen unconditionally? (well ... as long as the
> builtin exists, which is true for GCC).
>
> > +// { dg-add-options libatomic }
> > +
> > +#include <atomic>
> > +
> > +#include <testsuite_hooks.h>
> > +
> > +struct S { char c; short s; };
> > +
> > +void __attribute__((noinline,noipa))
> > +fill_struct(S& s)
> > +{ __builtin_memset(&s, 0xff, sizeof(S)); }
> > +
> > +bool
> > +compare_struct(const S& a, const S& b)
> > +{ return __builtin_memcmp(&a, &b, sizeof(S)) == 0; }
> > +
> > +int
> > +main ()
> > +{
> > +  S s;
> > +  fill_struct(s);
> > +  s.c = 'a';
> > +  s.s = 42;
> > +
> > +  std::atomic<S> as{ s };
> > +  auto ts = as.load();
> > +  VERIFY( !compare_struct(s, ts) ); // padding cleared on construction
> > +  as.exchange(s);
> > +  auto es = as.load();
> > +  VERIFY( compare_struct(ts, es) ); // padding cleared on exchange
> > +
> > +  S n;
> > +  fill_struct(n);
> > +  n.c = 'b';
> > +  n.s = 71;
> > +  // padding cleared on compexchg
> > +  VERIFY( as.compare_exchange_weak(s, n) );
>
> Is it safe assume this won't fail spuriously? There is only one thread
> doing the RMW operation, is that enough to avoid spurious failures?
>
> > +  VERIFY( as.compare_exchange_strong(n, s) );
> > +  return 0;
> > +}
> > --
> > 2.31.1
> >
>
>
  
Jakub Jelinek Sept. 29, 2021, 6:29 p.m. UTC | #5
On Wed, Sep 29, 2021 at 11:22:29AM -0700, Thomas Rodgers via Gcc-patches wrote:
> > The MSVC implementation uses !__has_unique_object_representations(_Tp)
> > && !is_floating_point<_Tp>::value here, which is better than mine
> > above (FP types don't have unique object reps, but also don't have
> > padding bits).

Except the 80-bit long double, that one has padding bits (similarly
_Complex long double).
As I said earlier, if you want a builtin using the __builtin_clear_padding
infrastructure that will return bool if __builtin_clear_padding expands to
something or not, it can be easily added.  GCC already uses something like
that internally.

	Jakub
  
Thomas Rodgers Nov. 2, 2021, 1:25 a.m. UTC | #6
This should address Jonathan's feedback and adds support for atomic_ref<T>

On Wed, Sep 29, 2021 at 5:14 AM Jonathan Wakely <jwakely@redhat.com> wrote:

> On Mon, 27 Sept 2021 at 15:11, Thomas Rodgers <rodgert@appliantology.com>
> wrote:
> >
> > From: Thomas Rodgers <rodgert@twrodgers.com>
> >
> > Now with checks for __has_builtin(__builtin_clear_padding)
> >
> > This change implements P0528 which requires that padding bits not
> > participate in atomic compare exchange operations. All arguments to the
> > generic template are 'sanitized' by the __builtin_clearpadding intrisic
> > before they are used in comparisons. This alrequires that any stores
> > also sanitize the incoming value.
> >
> > Signed-off-by: Thomas Rodgers <trodgers@redhat.com>
> >
> > libstdc++=v3/ChangeLog:
> >
> >         * include/std/atomic (atomic<T>::atomic(_Tp) clear padding for
> >         __cplusplus > 201703L.
> >         (atomic<T>::store()) Clear padding.
> >         (atomic<T>::exchange()) Likewise.
> >         (atomic<T>::compare_exchange_weak()) Likewise.
> >         (atomic<T>::compare_exchange_strong()) Likewise.
>
> Don't we also need this for std::atomic_ref, i.e. for the
> __atomic_impl free functions in <bits/atomic_base.h>?
>
> There we don't have any distinction between atomic_ref<integral type>
> and atomic_ref<struct with possible padding>, they both use the same
> implementations. But I think that's OK, as I think the built-in is
> smart enough to be a no-op for types with no padding.
>
> >         * testsuite/29_atomics/atomic/compare_exchange_padding.cc: New
> >         test.
> > ---
> >  libstdc++-v3/include/std/atomic               | 41 +++++++++++++++++-
> >  .../atomic/compare_exchange_padding.cc        | 42 +++++++++++++++++++
> >  2 files changed, 81 insertions(+), 2 deletions(-)
> >  create mode 100644
> libstdc++-v3/testsuite/29_atomics/atomic/compare_exchange_padding.cc
> >
> > diff --git a/libstdc++-v3/include/std/atomic
> b/libstdc++-v3/include/std/atomic
> > index 936dd50ba1c..4ac9ccdc1ab 100644
> > --- a/libstdc++-v3/include/std/atomic
> > +++ b/libstdc++-v3/include/std/atomic
> > @@ -228,7 +228,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >        atomic& operator=(const atomic&) = delete;
> >        atomic& operator=(const atomic&) volatile = delete;
> >
> > -      constexpr atomic(_Tp __i) noexcept : _M_i(__i) { }
> > +#if __cplusplus > 201703L && __has_builtin(__builtin_clear_padding)
> > +      constexpr atomic(_Tp __i) noexcept : _M_i(__i)
> > +      { __builtin_clear_padding(std::__addressof(_M_i)); }
> > +#else
> > +      constexpr atomic(_Tp __i) noexcept : _M_i(__i)
> > +      { }
> > +#endif
>
> Please write this as a single function with the preprocessor
> conditions in the body:
>
>       constexpr atomic(_Tp __i) noexcept : _M_i(__i)
>       {
> #if __cplusplus > 201703L && __has_builtin(__builtin_clear_padding)
>         __builtin_clear_padding(std::__addressof(_M_i)); }
> #endif
>       }
>
> This not only avoids duplication of the identical parts, but it avoids
> warnings from ld.gold if you use --detect-odr-violations. Otherwise,
> the linker can see a definition of that constructor on two different
> lines (233 and 236), and so warns about possible ODR violations,
> something like "warning: while linking foo: symbol
> 'std::atomic<int>::atomic(int)' defined in multiple places (possible
> ODR violation): ...atomic:233 ... atomic:236"
>
> Can't we clear the padding for >= 201402L instead of only C++20? Only
> C++11 has a problem with the built-in in a constexpr function, right?
> So we can DTRT for C++14 upwards.
>
>
> >
> >        operator _Tp() const noexcept
> >        { return load(); }
> > @@ -268,12 +274,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >        void
> >        store(_Tp __i, memory_order __m = memory_order_seq_cst) noexcept
> >        {
> > +#if __has_builtin(__builtin_clear_padding)
> > +       __builtin_clear_padding(std::__addressof(__i));
> > +#endif
>
> We repeat this *a lot*. When I started work on this I defined a
> non-member function in the __atomic_impl namespace:
>
>     template<typename _Tp>
>       _GLIBCXX_ALWAYS_INLINE void
>       __clear_padding(_Tp& __val) noexcept
>       {
> #if __has_builtin(__builtin_clear_padding)
>        __builtin_clear_padding(std::__addressof(__val));
> #endif
>       }
>
> Then you can just use that everywhere (except the constexpr
> constructor), without all the #if checks.
>
>
>
> >         __atomic_store(std::__addressof(_M_i), std::__addressof(__i),
> int(__m));
> >        }
> >
> >        void
> >        store(_Tp __i, memory_order __m = memory_order_seq_cst) volatile
> noexcept
> >        {
> > +#if __has_builtin(__builtin_clear_padding)
> > +       __builtin_clear_padding(std::__addressof(__i));
> > +#endif
> >         __atomic_store(std::__addressof(_M_i), std::__addressof(__i),
> int(__m));
> >        }
> >
> > @@ -300,6 +312,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >        {
> >          alignas(_Tp) unsigned char __buf[sizeof(_Tp)];
> >         _Tp* __ptr = reinterpret_cast<_Tp*>(__buf);
> > +#if __has_builtin(__builtin_clear_padding)
> > +       __builtin_clear_padding(std::__addressof(__i));
> > +#endif
> >         __atomic_exchange(std::__addressof(_M_i), std::__addressof(__i),
> >                           __ptr, int(__m));
> >         return *__ptr;
> > @@ -311,6 +326,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >        {
> >          alignas(_Tp) unsigned char __buf[sizeof(_Tp)];
> >         _Tp* __ptr = reinterpret_cast<_Tp*>(__buf);
> > +#if __has_builtin(__builtin_clear_padding)
> > +       __builtin_clear_padding(std::__addressof(__i));
> > +#endif
> >         __atomic_exchange(std::__addressof(_M_i), std::__addressof(__i),
> >                           __ptr, int(__m));
> >         return *__ptr;
> > @@ -322,6 +340,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >        {
> >         __glibcxx_assert(__is_valid_cmpexch_failure_order(__f));
> >
> > +#if __has_builtin(__builtin_clear_padding)
> > +       __builtin_clear_padding(std::__addressof(__e));
>
> This unconditionally clears the padding of __e, which I don't think is
> allowed. It potentially introduces a data race if another thread is
> doing the CAS at the same time, and the program assumes that only the
> CAS that fails will update expected.
>
> See the thread I started at
> https://lists.isocpp.org/parallel/2020/12/3443.php
> ("atomic compare_exchange and padding bits", 2020-12-03)
>
> The conclusion was that writing to __e is not allowed in the failure
> case, so you need to make a copy of it (into a buffer, using memcpy),
> then clear the padding in the copy, then try the
> __atomic_compare_exchange and if it fails, copy back from the buffer
> to __e. If all that extra work doesn't get inlined then we want to
> only do it for types which might have padding bits, so I had
> __atomic_impl::__maybe_has_padding in my unfinished patch:
>
>    template<typename _Tp>
>      constexpr bool
>      __maybe_has_padding()
>      {
> #if __has_builtin(__has_unique_object_representations)
>       return !__has_unique_object_representations(_Tp);
> #else
>       return true;
> #endif
>      }
>
> The MSVC implementation uses !__has_unique_object_representations(_Tp)
> && !is_floating_point<_Tp>::value here, which is better than mine
> above (FP types don't have unique object reps, but also don't have
> padding bits).
>
> And then do something like this in compare_exchange_weak:
>
>
> +      {
> +#if __has_builtin(__builtin_clear_padding)
> +       if _GLIBCXX_CONSTEXPR17 (__maybe_has_padding<_Tp>())
> +         {
> +           _Val<_Tp> __expected0 = __expected; // XXX should use memcpy
> +           auto* __exp = __atomic_impl::__clear_padding(__expected0);
> +           auto* __des = __atomic_impl::__clear_padding(__desired);
> +           if (__atomic_compare_exchange(__ptr, __exp, __des, true,
> +                                         int(__success), int(__failure)))
> +             return true;
> +           __builtin_memcpy(std::__addressof(__expected), __exp,
> sizeof(_Tp));
> +           return false;
> +         }
> +#endif
>        return __atomic_compare_exchange(__ptr,
> std::__addressof(__expected),
>
> And similarly for compare_exchange_strong (or refactor them into one
> function that takes a bool for weak/strong).
>
> If you do all that in __atomic_impl::compare_exchange_weak (making it
> take a bool for weak/strong) then you can reuse it from
> __atomic_impl:compare_exchange_strong, and then change the gneric
> atomic<T>::compare_exchange_{weak,strong} to use that as well.
>
>
>
>
> > diff --git
> a/libstdc++-v3/testsuite/29_atomics/atomic/compare_exchange_padding.cc
> b/libstdc++-v3/testsuite/29_atomics/atomic/compare_exchange_padding.cc
> > new file mode 100644
> > index 00000000000..0875f168097
> > --- /dev/null
> > +++
> b/libstdc++-v3/testsuite/29_atomics/atomic/compare_exchange_padding.cc
> > @@ -0,0 +1,42 @@
> > +// { dg-options "-std=gnu++2a" }
> > +// { dg-do run { target c++2a } }
>
> We can (and should) use "20" not "2a".
>
> Does it need to be C++20 though, aren't all the clearings that are
> being tested going to happen unconditionally? (well ... as long as the
> builtin exists, which is true for GCC).
>
> > +// { dg-add-options libatomic }
> > +
> > +#include <atomic>
> > +
> > +#include <testsuite_hooks.h>
> > +
> > +struct S { char c; short s; };
> > +
> > +void __attribute__((noinline,noipa))
> > +fill_struct(S& s)
> > +{ __builtin_memset(&s, 0xff, sizeof(S)); }
> > +
> > +bool
> > +compare_struct(const S& a, const S& b)
> > +{ return __builtin_memcmp(&a, &b, sizeof(S)) == 0; }
> > +
> > +int
> > +main ()
> > +{
> > +  S s;
> > +  fill_struct(s);
> > +  s.c = 'a';
> > +  s.s = 42;
> > +
> > +  std::atomic<S> as{ s };
> > +  auto ts = as.load();
> > +  VERIFY( !compare_struct(s, ts) ); // padding cleared on construction
> > +  as.exchange(s);
> > +  auto es = as.load();
> > +  VERIFY( compare_struct(ts, es) ); // padding cleared on exchange
> > +
> > +  S n;
> > +  fill_struct(n);
> > +  n.c = 'b';
> > +  n.s = 71;
> > +  // padding cleared on compexchg
> > +  VERIFY( as.compare_exchange_weak(s, n) );
>
> Is it safe assume this won't fail spuriously? There is only one thread
> doing the RMW operation, is that enough to avoid spurious failures?
>
> > +  VERIFY( as.compare_exchange_strong(n, s) );
> > +  return 0;
> > +}
> > --
> > 2.31.1
> >
>
>
  
Jakub Jelinek Nov. 2, 2021, 7:49 a.m. UTC | #7
On Mon, Nov 01, 2021 at 06:25:45PM -0700, Thomas Rodgers via Gcc-patches wrote:
> +    template<typename _Tp>
> +      constexpr bool
> +      __maybe_has_padding()
> +      {
> +#if __has_builtin(__has_unique_object_representations)
> +	return !__has_unique_object_representations(_Tp)
> +	  && !is_floating_point<_Tp>::value;
> +#else
> +	return true;
> +#endif

I'm not sure I understand the && !is_floating_point<_Tp>::value.
Yes, float and double will never have padding, but long double often
will, e.g. on x86 or ia64 (but e.g. not on ppc, s390x, etc.).
So, unless we want to play with numeric_limits, it should be either
just return !__has_unique_object_representations(_Tp);
or return !__has_unique_object_representations(_Tp)
	  && (!is_floating_point<_Tp>::value
	      || is_same<__remove_cv_t<_Tp>,long double>::value);
or, with numeric_limits test numeric_limits<_Tp>::digits == 64
(but I'm sure Jonathan will not want including such a header dependency
unless it is already included).
Or I can always provide a new __builtin_clear_padding_p ...

	Jakub
  
Daniel Krügler Nov. 2, 2021, 8:49 a.m. UTC | #8
Am Di., 2. Nov. 2021 um 02:26 Uhr schrieb Thomas Rodgers via Libstdc++
<libstdc++@gcc.gnu.org>:
>
> This should address Jonathan's feedback and adds support for atomic_ref<T>
>

I'm wondering why __clear_padding doesn't refer to the computed __ptr
value in the case where __builtin_clear_padding is used?

Thanks,

- Daniel
  
Thomas Rodgers Nov. 3, 2021, 3:06 a.m. UTC | #9
This version of the patch specifically doesn’t deal with long double.
Mostly looking for Jonathan’s review to ensure his previous feedback is
addressed. I plan to rev the patch to handle long doubles after some
further discussion with you and Jonathan.

On Tue, Nov 2, 2021 at 12:49 AM Jakub Jelinek <jakub@redhat.com> wrote:

> On Mon, Nov 01, 2021 at 06:25:45PM -0700, Thomas Rodgers via Gcc-patches
> wrote:
> > +    template<typename _Tp>
> > +      constexpr bool
> > +      __maybe_has_padding()
> > +      {
> > +#if __has_builtin(__has_unique_object_representations)
> > +     return !__has_unique_object_representations(_Tp)
> > +       && !is_floating_point<_Tp>::value;
> > +#else
> > +     return true;
> > +#endif
>
> I'm not sure I understand the && !is_floating_point<_Tp>::value.
> Yes, float and double will never have padding, but long double often
> will, e.g. on x86 or ia64 (but e.g. not on ppc, s390x, etc.).
> So, unless we want to play with numeric_limits, it should be either
> just return !__has_unique_object_representations(_Tp);
> or return !__has_unique_object_representations(_Tp)
>           && (!is_floating_point<_Tp>::value
>               || is_same<__remove_cv_t<_Tp>,long double>::value);
> or, with numeric_limits test numeric_limits<_Tp>::digits == 64
> (but I'm sure Jonathan will not want including such a header dependency
> unless it is already included).
> Or I can always provide a new __builtin_clear_padding_p ...
>
>         Jakub
>
>
  
Jonathan Wakely Jan. 18, 2022, 9:48 p.m. UTC | #10
On Tue, 2 Nov 2021 at 01:26, Thomas Rodgers <trodgers@redhat.com> wrote:

> This should address Jonathan's feedback and adds support for atomic_ref<T>
>


>This change implements P0528 which requires that padding bits not
>participate in atomic compare exchange operations. All arguments to the
>generic template are 'sanitized' by the __builtin_clearpadding intrisic

The name of the intrinsic and the word "instrinsic" have typos.

>before they are used in comparisons. This alrequires that any stores
>also sanitize the incoming value.
>
>Signed-off-by: Thomas Rodgers <trodgers@redhat.com>
>
>libstdc++=v3/ChangeLog:

Typo

>
> * include/std/atomic (atomic<T>::atomic(_Tp): clear padding for

Unclosed paren.




>+#if __has_builtin(__builtin_clear_padding)

Instead of checking this built-in at every call site, can't we just make
__maybe_has_padding return false if the built-in isn't supported?
__clear_padding already handles the case where the built-in isn't supported.

>+    template<typename _Tp>
>+      constexpr bool
>+      __maybe_has_padding()
>+      {
>+#if __has_builtin(__has_unique_object_representations)
>+        return !__has_unique_object_representations(_Tp)
>+            && !is_floating_point<_Tp>::value;
>+#else
>+        return true;
>+#endif
>+      }

So make that:

    template<typename _Tp>
      constexpr bool
      __maybe_has_padding()
      {
#if ! __has_builtin(__builtin_clear_padding)
        return false;
#elif __has_builtin(__has_unique_object_representations)
        return !__has_unique_object_representations(_Tp)
            && !is_floating_point<_Tp>::value;
#else
        return true;
#endif
     }


>+ if _GLIBCXX14_CONSTEXPR (__atomic_impl::__maybe_has_padding<_Tp>())
>+  {

This needs to be _GLIBCXX17_CONSTEXPR (everywhere that `if constexpr` is
used).

>+  {
>+    alignas(_Tp) unsigned char __buf[sizeof(_Tp)];
>+    __builtin_memcpy(__buf, std::__addressof(__e), sizeof(_Tp));

alignas(_Tp) unsigned char __buf[sizeof(_Tp)];
_Tp* __exp = ::new((void*)__buf) _Tp(__e);

>+    auto* __exp =
__atomic_impl::__clear_padding(*reinterpret_cast<_Tp*>(__buf));

And then you don't need the reinterpret_cast:

__exp = __atomic_impl::__clear_padding(__exp);

>+    auto* __des = __atomic_impl::__clear_padding(__i);
>+    if (__atomic_compare_exchange(std::__addressof(__val), __exp, __des,
__weak,
>+  int(__s), int(__f)))
>+      return true;


>     template<typename _Tp>
>       _GLIBCXX_ALWAYS_INLINE void
>       store(_Tp* __ptr, _Val<_Tp> __t, memory_order __m) noexcept
>-      { __atomic_store(__ptr, std::__addressof(__t), int(__m)); }
>+      {
>+#if __has_builtin(__builtin_clear_padding)
>+ if _GLIBCXX14_CONSTEXPR (__maybe_has_padding<_Tp>())
>+  __clear_padding(__t);
>+#endif
>+ __atomic_store(__ptr, std::__addressof(__t), int(__m));
>+      }
>

All calls to __clear_padding need to be qualified.

>+ return __compare_exchange(*__ptr, __expected, __desired, true,
>+  __success, __failure);

So do calls to __compare_exchange.


>
>       explicit
>       __atomic_ref(_Tp& __t) : _M_ptr(std::__addressof(__t))
>-      { __glibcxx_assert(((uintptr_t)_M_ptr % required_alignment) == 0); }
>+      {
>+ __glibcxx_assert(((uintptr_t)_M_ptr % required_alignment) == 0);
>+#if __cplusplus > 201402L && __has_builtin(__builtin_clear_padding)
>+ __builtin_clear_padding(_M_ptr);
>+#endif
>+      }

Is this safe to do?

What if multiple threads all create a std::atomic_ref round the same object
at once, they'll all try to clear padding, and so race, won't they?
I don't think we can clear padding on atomic_ref construction, only on
store and RMW operations.


>--- a/libstdc++-v3/include/std/atomic
>+++ b/libstdc++-v3/include/std/atomic
>@@ -228,13 +228,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>       atomic& operator=(const atomic&) = delete;
>       atomic& operator=(const atomic&) volatile = delete;
>
>-#if __cplusplus > 201703L && __has_builtin(__builtin_clear_padding)
>       constexpr atomic(_Tp __i) noexcept : _M_i(__i)
>-      { __builtin_clear_padding(std::__addressof(_M_i)); }
>-#else
>-      constexpr atomic(_Tp __i) noexcept : _M_i(__i)
>-      { }
>+      {
>+#if __cplusplus > 201402L && __has_builtin(__builtin_clear_padding)
>+ __builtin_clear_padding(std::__addressof(_M_i));
> #endif
>+      }
>

Is this an incremental patch relative to the first one?
The changes to this file look correct.

>--- /dev/null
>+++
b/libstdc++-v3/testsuite/29_atomics/atomic_ref/compare_exchange_padding.cc
>@@ -0,0 +1,43 @@
>+// { dg-options "-std=gnu++2a" }
>+// { dg-do run { target c++2a } }

This new test is using "2a" not "20".
  

Patch

diff --git a/libstdc++-v3/include/std/atomic b/libstdc++-v3/include/std/atomic
index 936dd50ba1c..4ac9ccdc1ab 100644
--- a/libstdc++-v3/include/std/atomic
+++ b/libstdc++-v3/include/std/atomic
@@ -228,7 +228,13 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       atomic& operator=(const atomic&) = delete;
       atomic& operator=(const atomic&) volatile = delete;
 
-      constexpr atomic(_Tp __i) noexcept : _M_i(__i) { }
+#if __cplusplus > 201703L && __has_builtin(__builtin_clear_padding)
+      constexpr atomic(_Tp __i) noexcept : _M_i(__i)
+      { __builtin_clear_padding(std::__addressof(_M_i)); }
+#else
+      constexpr atomic(_Tp __i) noexcept : _M_i(__i)
+      { }
+#endif
 
       operator _Tp() const noexcept
       { return load(); }
@@ -268,12 +274,18 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       void
       store(_Tp __i, memory_order __m = memory_order_seq_cst) noexcept
       {
+#if __has_builtin(__builtin_clear_padding)
+	__builtin_clear_padding(std::__addressof(__i));
+#endif
 	__atomic_store(std::__addressof(_M_i), std::__addressof(__i), int(__m));
       }
 
       void
       store(_Tp __i, memory_order __m = memory_order_seq_cst) volatile noexcept
       {
+#if __has_builtin(__builtin_clear_padding)
+	__builtin_clear_padding(std::__addressof(__i));
+#endif
 	__atomic_store(std::__addressof(_M_i), std::__addressof(__i), int(__m));
       }
 
@@ -300,6 +312,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       {
         alignas(_Tp) unsigned char __buf[sizeof(_Tp)];
 	_Tp* __ptr = reinterpret_cast<_Tp*>(__buf);
+#if __has_builtin(__builtin_clear_padding)
+	__builtin_clear_padding(std::__addressof(__i));
+#endif
 	__atomic_exchange(std::__addressof(_M_i), std::__addressof(__i),
 			  __ptr, int(__m));
 	return *__ptr;
@@ -311,6 +326,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       {
         alignas(_Tp) unsigned char __buf[sizeof(_Tp)];
 	_Tp* __ptr = reinterpret_cast<_Tp*>(__buf);
+#if __has_builtin(__builtin_clear_padding)
+	__builtin_clear_padding(std::__addressof(__i));
+#endif
 	__atomic_exchange(std::__addressof(_M_i), std::__addressof(__i),
 			  __ptr, int(__m));
 	return *__ptr;
@@ -322,6 +340,10 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       {
 	__glibcxx_assert(__is_valid_cmpexch_failure_order(__f));
 
+#if __has_builtin(__builtin_clear_padding)
+	__builtin_clear_padding(std::__addressof(__e));
+	__builtin_clear_padding(std::__addressof(__i));
+#endif
 	return __atomic_compare_exchange(std::__addressof(_M_i),
 					 std::__addressof(__e),
 					 std::__addressof(__i),
@@ -334,6 +356,10 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       {
 	__glibcxx_assert(__is_valid_cmpexch_failure_order(__f));
 
+#if __has_builtin(__builtin_clear_padding)
+	__builtin_clear_padding(std::__addressof(__e));
+	__builtin_clear_padding(std::__addressof(__i));
+#endif
 	return __atomic_compare_exchange(std::__addressof(_M_i),
 					 std::__addressof(__e),
 					 std::__addressof(__i),
@@ -358,6 +384,10 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       {
 	__glibcxx_assert(__is_valid_cmpexch_failure_order(__f));
 
+#if __has_builtin(__builtin_clear_padding)
+	__builtin_clear_padding(std::__addressof(__e));
+	__builtin_clear_padding(std::__addressof(__i));
+#endif
 	return __atomic_compare_exchange(std::__addressof(_M_i),
 					 std::__addressof(__e),
 					 std::__addressof(__i),
@@ -370,6 +400,10 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       {
 	__glibcxx_assert(__is_valid_cmpexch_failure_order(__f));
 
+#if __has_builtin(__builtin_clear_padding)
+	__builtin_clear_padding(std::__addressof(__e));
+	__builtin_clear_padding(std::__addressof(__i));
+#endif
 	return __atomic_compare_exchange(std::__addressof(_M_i),
 					 std::__addressof(__e),
 					 std::__addressof(__i),
@@ -392,6 +426,9 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
     void
     wait(_Tp __old, memory_order __m = memory_order_seq_cst) const noexcept
     {
+#if __has_builtin(__builtin_clear_padding)
+      __builtin_clear_padding(std::__addressof(__old));
+#endif
       std::__atomic_wait_address_v(&_M_i, __old,
 			 [__m, this] { return this->load(__m); });
     }
@@ -407,7 +444,7 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
     { std::__atomic_notify_address(&_M_i, true); }
 #endif // __cpp_lib_atomic_wait 
 
-    };
+   };
 #undef _GLIBCXX20_INIT
 
   /// Partial specialization for pointer types.
diff --git a/libstdc++-v3/testsuite/29_atomics/atomic/compare_exchange_padding.cc b/libstdc++-v3/testsuite/29_atomics/atomic/compare_exchange_padding.cc
new file mode 100644
index 00000000000..0875f168097
--- /dev/null
+++ b/libstdc++-v3/testsuite/29_atomics/atomic/compare_exchange_padding.cc
@@ -0,0 +1,42 @@ 
+// { dg-options "-std=gnu++2a" }
+// { dg-do run { target c++2a } }
+// { dg-add-options libatomic }
+
+#include <atomic>
+
+#include <testsuite_hooks.h>
+
+struct S { char c; short s; };
+
+void __attribute__((noinline,noipa))
+fill_struct(S& s)
+{ __builtin_memset(&s, 0xff, sizeof(S)); }
+
+bool
+compare_struct(const S& a, const S& b)
+{ return __builtin_memcmp(&a, &b, sizeof(S)) == 0; }
+
+int
+main ()
+{
+  S s;
+  fill_struct(s);
+  s.c = 'a';
+  s.s = 42;
+
+  std::atomic<S> as{ s };
+  auto ts = as.load();
+  VERIFY( !compare_struct(s, ts) ); // padding cleared on construction
+  as.exchange(s);
+  auto es = as.load();
+  VERIFY( compare_struct(ts, es) ); // padding cleared on exchange
+
+  S n;
+  fill_struct(n);
+  n.c = 'b';
+  n.s = 71;
+  // padding cleared on compexchg
+  VERIFY( as.compare_exchange_weak(s, n) );
+  VERIFY( as.compare_exchange_strong(n, s) );
+  return 0;
+}