[5/8,RFC] Use new builtins in libstdc++

Message ID 20240919131204.3865854-6-mmalcomson@nvidia.com
State New
Headers
Series Introduce floating point fetch_add builtins |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm fail Build failed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Build passed

Commit Message

Matthew Malcomson Sept. 19, 2024, 1:12 p.m. UTC
  From: Matthew Malcomson <mmalcomson@nvidia.com>

Points to question here are:
1) Whether checking for this particular internal builtin is OK (this one
   happens to be the one implementing the operation for a `double`, we
   would have to rely on the approach that if anyone implements this
   operation for a `double` they implement it for all the floating point
   types that their C++ frontend and libstdc++ handle).
2) Whether the `#if` bit should be somewhere else instead of put in the
   `__fetch_add_flt` function.  I put it there because that's where it
   seemed natural, but am not familiar enough with libstdc++ to be
   confident in that decision.

We still need the CAS loop fallback for any compiler that doesn't
implement this builtin, and hence will still need some extra choice to
be made for floating point types.  Once all compilers we care about
implement this we can remove this special handling and merge the
floating point and integral operations into the same template.

Signed-off-by: Matthew Malcomson <mmalcomson@nvidia.com>
---
 libstdc++-v3/include/bits/atomic_base.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
  

Patch

diff --git a/libstdc++-v3/include/bits/atomic_base.h b/libstdc++-v3/include/bits/atomic_base.h
index 1c2367b39b6..d3b1a022db2 100644
--- a/libstdc++-v3/include/bits/atomic_base.h
+++ b/libstdc++-v3/include/bits/atomic_base.h
@@ -1217,30 +1217,41 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _Tp
       __fetch_add_flt(_Tp* __ptr, _Val<_Tp> __i, memory_order __m) noexcept
       {
+#if __has_builtin(__atomic_fetch_add_fp)
+	return __atomic_fetch_add(__ptr, __i, int(__m));
+#else
 	_Val<_Tp> __oldval = load(__ptr, memory_order_relaxed);
 	_Val<_Tp> __newval = __oldval + __i;
 	while (!compare_exchange_weak(__ptr, __oldval, __newval, __m,
 				      memory_order_relaxed))
 	  __newval = __oldval + __i;
 	return __oldval;
+#endif
       }
 
     template<typename _Tp>
       _Tp
       __fetch_sub_flt(_Tp* __ptr, _Val<_Tp> __i, memory_order __m) noexcept
       {
+#if __has_builtin(__atomic_fetch_sub)
+	return __atomic_fetch_sub(__ptr, __i, int(__m));
+#else
 	_Val<_Tp> __oldval = load(__ptr, memory_order_relaxed);
 	_Val<_Tp> __newval = __oldval - __i;
 	while (!compare_exchange_weak(__ptr, __oldval, __newval, __m,
 				      memory_order_relaxed))
 	  __newval = __oldval - __i;
 	return __oldval;
+#endif
       }
 
     template<typename _Tp>
       _Tp
       __add_fetch_flt(_Tp* __ptr, _Val<_Tp> __i) noexcept
       {
+#if __has_builtin(__atomic_add_fetch)
+	return __atomic_add_fetch(__ptr, __i, __ATOMIC_SEQ_CST);
+#else
 	_Val<_Tp> __oldval = load(__ptr, memory_order_relaxed);
 	_Val<_Tp> __newval = __oldval + __i;
 	while (!compare_exchange_weak(__ptr, __oldval, __newval,
@@ -1248,12 +1259,16 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 				      memory_order_relaxed))
 	  __newval = __oldval + __i;
 	return __newval;
+#endif
       }
 
     template<typename _Tp>
       _Tp
       __sub_fetch_flt(_Tp* __ptr, _Val<_Tp> __i) noexcept
       {
+#if __has_builtin(__atomic_sub_fetch)
+      return __atomic_sub_fetch(__ptr, __i, __ATOMIC_SEQ_CST);
+#else
 	_Val<_Tp> __oldval = load(__ptr, memory_order_relaxed);
 	_Val<_Tp> __newval = __oldval - __i;
 	while (!compare_exchange_weak(__ptr, __oldval, __newval,
@@ -1261,6 +1276,7 @@  _GLIBCXX_BEGIN_NAMESPACE_VERSION
 				      memory_order_relaxed))
 	  __newval = __oldval - __i;
 	return __newval;
+#endif
       }
   } // namespace __atomic_impl