[committed] libstdc++: Fix parallel std::exclusive_scan [PR108236]

Message ID 20241203213755.506489-1-jwakely@redhat.com
State New
Headers
Series [committed] libstdc++: Fix parallel std::exclusive_scan [PR108236] |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm fail Patch failed to apply
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 fail Patch failed to apply

Commit Message

Jonathan Wakely Dec. 3, 2024, 9:37 p.m. UTC
  The standard says that std::exclusive_scan can be used to work in
place, i.e. where the output range is the same as the input range. This
means that the first sum cannot be written to the output until after
reading the first input value, otherwise we'll already have overwritten
the first input value.

While writing a new testcase I also realised that the serial version of
std::exclusive_scan uses copy construction for the accumulator variable,
but the standard only requires Cpp17MoveConstructible. We also require
move assignable, which is missing from the standard's requirements, but
we should at least use move construction not copy construction.

A similar problem exists for some other new C++17 numeric algos, but
I'll fix the others in a subsequent commit.

libstdc++-v3/ChangeLog:

	PR libstdc++/108236
	* include/pstl/glue_numeric_impl.h (exclusive_scan): Pass __init
	as rvalue.
	* include/pstl/numeric_impl.h (__brick_transform_scan): Do not
	write through __result until after reading through __first. Move
	__init into return value.
	(__pattern_transform_scan): Pass __init as rvalue.
	* include/std/numeric (exclusive_scan): Move construct instead
	of copy constructing.
	* testsuite/26_numerics/exclusive_scan/2.cc: New test.
	* testsuite/26_numerics/pstl/numeric_ops/108236.cc: New test.
---

Tested x86_64-linux. Pushed to trunk.

 libstdc++-v3/include/pstl/glue_numeric_impl.h |  2 +-
 libstdc++-v3/include/pstl/numeric_impl.h      |  9 ++--
 libstdc++-v3/include/std/numeric              |  4 +-
 .../testsuite/26_numerics/exclusive_scan/2.cc | 46 +++++++++++++++++
 .../26_numerics/pstl/numeric_ops/108236.cc    | 50 +++++++++++++++++++
 5 files changed, 104 insertions(+), 7 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/26_numerics/exclusive_scan/2.cc
 create mode 100644 libstdc++-v3/testsuite/26_numerics/pstl/numeric_ops/108236.cc
  

Patch

diff --git a/libstdc++-v3/include/pstl/glue_numeric_impl.h b/libstdc++-v3/include/pstl/glue_numeric_impl.h
index 490175c8b83..10d4912deed 100644
--- a/libstdc++-v3/include/pstl/glue_numeric_impl.h
+++ b/libstdc++-v3/include/pstl/glue_numeric_impl.h
@@ -108,7 +108,7 @@  exclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIte
 
     using namespace __pstl;
     return __internal::__pattern_transform_scan(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last,
-                                                __result, __pstl::__internal::__no_op(), __init, __binary_op,
+                                                __result, __pstl::__internal::__no_op(), std::move(__init), __binary_op,
                                                 /*inclusive=*/std::false_type());
 }
 
diff --git a/libstdc++-v3/include/pstl/numeric_impl.h b/libstdc++-v3/include/pstl/numeric_impl.h
index 7ba83eeb714..e1ebec16039 100644
--- a/libstdc++-v3/include/pstl/numeric_impl.h
+++ b/libstdc++-v3/include/pstl/numeric_impl.h
@@ -160,11 +160,12 @@  __brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _Outpu
 {
     for (; __first != __last; ++__first, ++__result)
     {
-        *__result = __init;
+	_Tp __v = std::move(__init);
         _PSTL_PRAGMA_FORCEINLINE
-        __init = __binary_op(__init, __unary_op(*__first));
+        __init = __binary_op(__v, __unary_op(*__first));
+        *__result = std::move(__v);
     }
-    return std::make_pair(__result, __init);
+    return std::make_pair(__result, std::move(__init));
 }
 
 // Inclusive form
@@ -225,7 +226,7 @@  __pattern_transform_scan(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Fo
                          _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op,
                          _Inclusive) noexcept
 {
-    return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(),
+    return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, std::move(__init), __binary_op, _Inclusive(),
                                               typename _Tag::__is_vector{})
         .first;
 }
diff --git a/libstdc++-v3/include/std/numeric b/libstdc++-v3/include/std/numeric
index dd98f40c7a8..37579c589e1 100644
--- a/libstdc++-v3/include/std/numeric
+++ b/libstdc++-v3/include/std/numeric
@@ -491,8 +491,8 @@  namespace __detail
     {
       while (__first != __last)
 	{
-	  auto __v = __init;
-	  __init = __binary_op(__init, *__first);
+	  _Tp __v = std::move(__init);
+	  __init = __binary_op(__v, *__first);
 	  ++__first;
 	  *__result++ = std::move(__v);
 	}
diff --git a/libstdc++-v3/testsuite/26_numerics/exclusive_scan/2.cc b/libstdc++-v3/testsuite/26_numerics/exclusive_scan/2.cc
new file mode 100644
index 00000000000..c6e28210ce5
--- /dev/null
+++ b/libstdc++-v3/testsuite/26_numerics/exclusive_scan/2.cc
@@ -0,0 +1,46 @@ 
+// { dg-do run { target c++17 } }
+
+// C++17 29.8.7 [exclusive.scan]
+
+#include <numeric>
+#include <testsuite_hooks.h>
+
+struct Mint
+{
+  Mint(int i = 0) : val(i) { }
+  Mint(Mint&&) = default;
+  Mint& operator=(Mint&&) = default;
+
+  operator int() const { return val; }
+
+private:
+  int val;
+};
+
+void
+test_move_only()
+{
+  const int input[]{10, 20, 30};
+  int output[3];
+  std::exclusive_scan(input, input+3, output, Mint(5), std::plus<int>{});
+  VERIFY( output[0] == 5 );
+  VERIFY( output[1] == 15 );
+  VERIFY( output[2] == 35 );
+}
+
+void
+test_pr108236()
+{
+  int vals[]{1, 2, 3};
+  // Output range is the same as the input range:
+  std::exclusive_scan(vals, vals+3, vals, 99);
+  VERIFY( vals[0] == 99 );
+  VERIFY( vals[1] == 100 );
+  VERIFY( vals[2] == 102 );
+}
+
+int main()
+{
+  test_move_only();
+  test_pr108236();
+}
diff --git a/libstdc++-v3/testsuite/26_numerics/pstl/numeric_ops/108236.cc b/libstdc++-v3/testsuite/26_numerics/pstl/numeric_ops/108236.cc
new file mode 100644
index 00000000000..e0e3027b321
--- /dev/null
+++ b/libstdc++-v3/testsuite/26_numerics/pstl/numeric_ops/108236.cc
@@ -0,0 +1,50 @@ 
+// { dg-options "-ltbb" }
+// { dg-do run { target c++17 } }
+// { dg-require-effective-target tbb_backend }
+
+// Bug 108236 std::exclusive_scan with execution policy does not work in-place
+
+#include <numeric>
+#include <execution>
+#include <testsuite_hooks.h>
+
+struct Mint
+{
+  Mint(int i = 0) : val(i) { }
+  Mint(Mint&&) = default;
+  Mint& operator=(Mint&&) = default;
+
+  operator int() const { return val; }
+
+private:
+  int val;
+};
+
+void
+test_move_only()
+{
+  const int input[]{10, 20, 30};
+  int output[3];
+  std::exclusive_scan(std::execution::seq, input, input+3, output, Mint(5),
+		      std::plus<int>{});
+  VERIFY( output[0] == 5 );
+  VERIFY( output[1] == 15 );
+  VERIFY( output[2] == 35 );
+}
+
+void
+test_pr108236()
+{
+  int vals[]{1, 2, 3};
+  // Output range is the same as the input range:
+  std::exclusive_scan(std::execution::seq, vals, vals+3, vals, 99);
+  VERIFY( vals[0] == 99 );
+  VERIFY( vals[1] == 100 );
+  VERIFY( vals[2] == 102 );
+}
+
+int main()
+{
+  test_move_only();
+  test_pr108236();
+}