libstdc++: Correct NTTP and simd_mask ctor call

Message ID 25353264.6Emhk5qWAg@minbar
State Committed
Commit 668d43502f465d48adbc1fe2956b979f36657e5f
Headers
Series libstdc++: Correct NTTP and simd_mask ctor call |

Commit Message

Matthias Kretz May 26, 2023, 11:10 a.m. UTC
  OK for master and all backports (after 11.4 is done)?

tested on powerpc64le-linux-gnu and x86_64-pc-linux-gnu

----------- 8< ------------

Signed-off-by: Matthias Kretz <m.kretz@gsi.de>

libstdc++-v3/ChangeLog:

	PR libstdc++/109822
	* include/experimental/bits/simd.h (to_native): Use int NTTP
	as specified in PTS2.
	(to_compatible): Likewise. Add missing tag to call mask
	generator ctor.
	* testsuite/experimental/simd/pr109822_cast_functions.cc: New
	test.
---
 libstdc++-v3/include/experimental/bits/simd.h |  7 ++-
 .../simd/pr109822_cast_functions.cc           | 63 +++++++++++++++++++
 2 files changed, 67 insertions(+), 3 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/experimental/simd/
pr109822_cast_functions.cc


--
──────────────────────────────────────────────────────────────────────────
 Dr. Matthias Kretz                           https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research               https://gsi.de
 stdₓ::simd
──────────────────────────────────────────────────────────────────────────
  

Comments

Jonathan Wakely May 26, 2023, 3:46 p.m. UTC | #1
On Fri, 26 May 2023 at 12:11, Matthias Kretz via Libstdc++ <
libstdc++@gcc.gnu.org> wrote:

> OK for master and all backports (after 11.4 is done)?
>

OK



>
> tested on powerpc64le-linux-gnu and x86_64-pc-linux-gnu
>
> ----------- 8< ------------
>
> Signed-off-by: Matthias Kretz <m.kretz@gsi.de>
>
> libstdc++-v3/ChangeLog:
>
>         PR libstdc++/109822
>         * include/experimental/bits/simd.h (to_native): Use int NTTP
>         as specified in PTS2.
>         (to_compatible): Likewise. Add missing tag to call mask
>         generator ctor.
>         * testsuite/experimental/simd/pr109822_cast_functions.cc: New
>         test.
> ---
>  libstdc++-v3/include/experimental/bits/simd.h |  7 ++-
>  .../simd/pr109822_cast_functions.cc           | 63 +++++++++++++++++++
>  2 files changed, 67 insertions(+), 3 deletions(-)
>  create mode 100644 libstdc++-v3/testsuite/experimental/simd/
> pr109822_cast_functions.cc
>
>
> --
> ──────────────────────────────────────────────────────────────────────────
>  Dr. Matthias Kretz                           https://mattkretz.github.io
>  GSI Helmholtz Centre for Heavy Ion Research               https://gsi.de
>  stdₓ::simd
> ──────────────────────────────────────────────────────────────────────────
  
Alexandre Oliva June 2, 2023, 8:32 a.m. UTC | #2
Hello, Matthias,

On May 26, 2023, Matthias Kretz via Libstdc++ <libstdc++@gcc.gnu.org> wrote:

> OK for master and all backports (after 11.4 is done)?
> tested on powerpc64le-linux-gnu and x86_64-pc-linux-gnu

> 	* testsuite/experimental/simd/pr109822_cast_functions.cc: New
> 	test.

This testcase fails to compile on PowerPC targets without VSX: 64-bit
integer and floating-point types cannot be vectorized.

I wonder if the test is malformed (and should be amended to test for
available simd types), or whether a patch like this would be desirable
to make simd constructs more portable.  I'm not sure about the
requirements.


[libstdc++] [simd] [ppc] use nonvector intrinsic fallback types

From: Alexandre Oliva <oliva@adacore.com>

Compiling such tests as pr109822_cast_functions.cc on powerpc targets
that don't support VSX fails because some intrinsic types that are
expected to be vectorizable are not defined without VSX.

Introduce fallback non-vector types to enable the code to compile.


for  libstdc++-v3/ChangeLog

	* include/experimental/bits/simd.h: Introduce fallback
	non-vector intrinsic_type_impl specializations for PowerPC
	without VSX.
---
 libstdc++-v3/include/experimental/bits/simd.h |   12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index 834fe923065bd..2691823e869e8 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -2431,9 +2431,14 @@ template <typename _Tp>
 #define _GLIBCXX_SIMD_PPC_INTRIN(_Tp)                                          \
   template <>                                                                  \
     struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; }
+#define _GLIBCXX_SIMD_PPC_INTRIN_NOVEC(_Tp)				       \
+  template <>                                                                  \
+    struct __intrinsic_type_impl<_Tp> { using type = _Tp; }
 _GLIBCXX_SIMD_PPC_INTRIN(float);
 #ifdef __VSX__
 _GLIBCXX_SIMD_PPC_INTRIN(double);
+#else
+_GLIBCXX_SIMD_PPC_INTRIN_NOVEC(double);
 #endif
 _GLIBCXX_SIMD_PPC_INTRIN(signed char);
 _GLIBCXX_SIMD_PPC_INTRIN(unsigned char);
@@ -2444,12 +2449,19 @@ _GLIBCXX_SIMD_PPC_INTRIN(unsigned int);
 #if defined __VSX__ || __SIZEOF_LONG__ == 4
 _GLIBCXX_SIMD_PPC_INTRIN(signed long);
 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
+#else
+_GLIBCXX_SIMD_PPC_INTRIN_NOVEC(signed long);
+_GLIBCXX_SIMD_PPC_INTRIN_NOVEC(unsigned long);
 #endif
 #ifdef __VSX__
 _GLIBCXX_SIMD_PPC_INTRIN(signed long long);
 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long long);
+#else
+_GLIBCXX_SIMD_PPC_INTRIN_NOVEC(signed long long);
+_GLIBCXX_SIMD_PPC_INTRIN_NOVEC(unsigned long long);
 #endif
 #undef _GLIBCXX_SIMD_PPC_INTRIN
+#undef _GLIBCXX_SIMD_PPC_INTRIN_NOVEC
 
 template <typename _Tp, size_t _Bytes>
   struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
  
Matthias Kretz June 2, 2023, 8:50 a.m. UTC | #3
Hello Alexandre,

On Friday, 2 June 2023 10:32:40 CEST Alexandre Oliva wrote:
> On May 26, 2023, Matthias Kretz via Libstdc++ <libstdc++@gcc.gnu.org> wrote:
> > OK for master and all backports (after 11.4 is done)?
> > tested on powerpc64le-linux-gnu and x86_64-pc-linux-gnu
> > 
> > 	* testsuite/experimental/simd/pr109822_cast_functions.cc: New
> > 	test.
> 
> This testcase fails to compile on PowerPC targets without VSX: 64-bit
> integer and floating-point types cannot be vectorized.

Yes, and the simd implementation already encodes that both in 
__vectorized_sizeof<T>() and __intrinsic_type<T>.

> I wonder if the test is malformed (and should be amended to test for
> available simd types), or whether a patch like this would be desirable
> to make simd constructs more portable.  I'm not sure about the
> requirements.

The test is correct. The stdx::simd implementation has a latent bug (my 
dejagnu boards included only POWER7-POWER9; I'm at POWER5-POWER10 by now). The 
_S_store function is trying to work around bad code-gen but fails to notice 
that long long vectors can't be used.

I'm looking at that function again, also in light of recent improvements wrt. 
code-gen, and will remove that assumption, that long long is vectorizable.

__intrinsic_type_t<T> should never be T, but always the type that can be 
passed to corresponding platform intrinsics. There are traits for the 
implementation to detect whether the intrinsics types are available.

- Matthias

> 
> 
> [libstdc++] [simd] [ppc] use nonvector intrinsic fallback types
> 
> From: Alexandre Oliva <oliva@adacore.com>
> 
> Compiling such tests as pr109822_cast_functions.cc on powerpc targets
> that don't support VSX fails because some intrinsic types that are
> expected to be vectorizable are not defined without VSX.
> 
> Introduce fallback non-vector types to enable the code to compile.
> 
> 
> for  libstdc++-v3/ChangeLog
> 
> 	* include/experimental/bits/simd.h: Introduce fallback
> 	non-vector intrinsic_type_impl specializations for PowerPC
> 	without VSX.
> ---
>  libstdc++-v3/include/experimental/bits/simd.h |   12 ++++++++++++
>  1 file changed, 12 insertions(+)
> 
> diff --git a/libstdc++-v3/include/experimental/bits/simd.h
> b/libstdc++-v3/include/experimental/bits/simd.h index
> 834fe923065bd..2691823e869e8 100644
> --- a/libstdc++-v3/include/experimental/bits/simd.h
> +++ b/libstdc++-v3/include/experimental/bits/simd.h
> @@ -2431,9 +2431,14 @@ template <typename _Tp>
>  #define _GLIBCXX_SIMD_PPC_INTRIN(_Tp)                                      
>    \ template <>                                                           
>       \ struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; }
> +#define _GLIBCXX_SIMD_PPC_INTRIN_NOVEC(_Tp)				
       \
> +  template <>                                                              
>    \ +    struct __intrinsic_type_impl<_Tp> { using type = _Tp; }
>  _GLIBCXX_SIMD_PPC_INTRIN(float);
>  #ifdef __VSX__
>  _GLIBCXX_SIMD_PPC_INTRIN(double);
> +#else
> +_GLIBCXX_SIMD_PPC_INTRIN_NOVEC(double);
>  #endif
>  _GLIBCXX_SIMD_PPC_INTRIN(signed char);
>  _GLIBCXX_SIMD_PPC_INTRIN(unsigned char);
> @@ -2444,12 +2449,19 @@ _GLIBCXX_SIMD_PPC_INTRIN(unsigned int);
>  #if defined __VSX__ || __SIZEOF_LONG__ == 4
>  _GLIBCXX_SIMD_PPC_INTRIN(signed long);
>  _GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
> +#else
> +_GLIBCXX_SIMD_PPC_INTRIN_NOVEC(signed long);
> +_GLIBCXX_SIMD_PPC_INTRIN_NOVEC(unsigned long);
>  #endif
>  #ifdef __VSX__
>  _GLIBCXX_SIMD_PPC_INTRIN(signed long long);
>  _GLIBCXX_SIMD_PPC_INTRIN(unsigned long long);
> +#else
> +_GLIBCXX_SIMD_PPC_INTRIN_NOVEC(signed long long);
> +_GLIBCXX_SIMD_PPC_INTRIN_NOVEC(unsigned long long);
>  #endif
>  #undef _GLIBCXX_SIMD_PPC_INTRIN
> +#undef _GLIBCXX_SIMD_PPC_INTRIN_NOVEC
> 
>  template <typename _Tp, size_t _Bytes>
>    struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp>
> && _Bytes <= 16>>
  
Alexandre Oliva June 2, 2023, 9:30 a.m. UTC | #4
On Jun  2, 2023, Matthias Kretz <m.kretz@gsi.de> wrote:

> I'm looking at that function again, also in light of recent improvements wrt. 
> code-gen, and will remove that assumption, that long long is vectorizable.

Thanks, I'll leave that to you, then.


I also noticed the same test is failing on rtems6 (at least with gcc
11).  AFAICT the problem is that _GLIBCXX_SIMD_MATH_CALL* macros in
simd_math.h expect the named functions to be in std::, but I get such
errors as:

.../aarch64-rtems6/include/c++/11.4.1/experimental/bits/simd_math.h:1299: error: 'remainder' is not a member of 'std'
.../aarch64-rtems6/include/c++/11.4.1/experimental/bits/simd_math.h:1299: note: suggested alternatives:
[...]
.../aarch64-rtems6/include/math.h:346: note:   'remainder'
[...]
.../aarch64-rtems6/include/c++/11.4.1/experimental/bits/simd_math.h:1299: note:   'std::experimental::parallelism_v2::remainder'
.../aarch64-rtems6/include/c++/11.4.1/experimental/bits/simd_math.h:1299: error: template argument 1 is invalid
[...]
.../aarch64-rtems6/include/c++/11.4.1/experimental/bits/simd_math.h:1328: error: 'fmin' is not a member of 'std'; did you mean 'min'?
.../aarch64-rtems6/include/c++/11.4.1/experimental/bits/simd_math.h:1328: error: 'fmin' is not a member of 'std'; did you mean 'min'?
.../aarch64-rtems6/include/c++/11.4.1/experimental/bits/simd_math.h:1328: error: template argument 1 is invalid

ISTM that rtems is missing some of the math.h functions expected by
libstdc++, but also that even those that are present are not visible in
namespace ::std::, where the macros reasonably expect to find them.  Is
this known?  Should I file a PR about it?
  
Matthias Kretz June 2, 2023, 9:41 a.m. UTC | #5
On Friday, 2 June 2023 11:30:17 CEST Alexandre Oliva wrote:
> I also noticed the same test is failing on rtems6 (at least with gcc
> 11).  AFAICT the problem is that _GLIBCXX_SIMD_MATH_CALL* macros in
> simd_math.h expect the named functions to be in std::, but I get such
> errors as:
> 
> .../aarch64-rtems6/include/c++/11.4.1/experimental/bits/simd_math.h:1299:
> error: 'remainder' is not a member of 'std'
> .../aarch64-rtems6/include/c++/11.4.1/experimental/bits/simd_math.h:1299:
> note: suggested alternatives: [...]
> .../aarch64-rtems6/include/math.h:346: note:   'remainder'
> [...]
> .../aarch64-rtems6/include/c++/11.4.1/experimental/bits/simd_math.h:1299:
> note:   'std::experimental::parallelism_v2::remainder'
> .../aarch64-rtems6/include/c++/11.4.1/experimental/bits/simd_math.h:1299:
> error: template argument 1 is invalid [...]
> .../aarch64-rtems6/include/c++/11.4.1/experimental/bits/simd_math.h:1328:
> error: 'fmin' is not a member of 'std'; did you mean 'min'?
> .../aarch64-rtems6/include/c++/11.4.1/experimental/bits/simd_math.h:1328:
> error: 'fmin' is not a member of 'std'; did you mean 'min'?
> .../aarch64-rtems6/include/c++/11.4.1/experimental/bits/simd_math.h:1328:
> error: template argument 1 is invalid
> 
> ISTM that rtems is missing some of the math.h functions expected by
> libstdc++, but also that even those that are present are not visible in
> namespace ::std::, where the macros reasonably expect to find them.  Is
> this known?  Should I file a PR about it?

I had/have no idea. Is rtems6 using the "freestanding" subset of C++? In which 
case simd shouldn't be there at all. Otherwise <cmath> should work, no?

- Matthias
  
Jonathan Wakely June 2, 2023, 12:06 p.m. UTC | #6
On Fri, 2 Jun 2023 at 10:30, Alexandre Oliva via Libstdc++ <
libstdc++@gcc.gnu.org> wrote:

>
> ISTM that rtems is missing some of the math.h functions expected by
> libstdc++, but also that even those that are present are not visible in
> namespace ::std::, where the macros reasonably expect to find them.  Is
> this known?  Should I file a PR about it?
>

That looks like https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109818

We only import the C99 <math.h> functions into namespace std when the
target libc supports all of them.
  

Patch

diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index 26f08f83ab0..f94b8361ab0 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -3304,7 +3304,7 @@  to_native(const fixed_size_simd<_Tp, _Np>& __x)
     return {__mem, vector_aligned};
   }
 
-template <typename _Tp, size_t _Np>
+template <typename _Tp, int _Np>
   _GLIBCXX_SIMD_INTRINSIC
   enable_if_t<(_Np == native_simd_mask<_Tp>::size()), native_simd_mask<_Tp>>
   to_native(const fixed_size_simd_mask<_Tp, _Np>& __x)
@@ -3315,7 +3315,7 @@  to_native(const fixed_size_simd_mask<_Tp, _Np>& __x)
   }
 
 // to_compatible {{{2
-template <typename _Tp, size_t _Np>
+template <typename _Tp, int _Np>
   _GLIBCXX_SIMD_INTRINSIC enable_if_t<(_Np == simd<_Tp>::size()), simd<_Tp>>
   to_compatible(const simd<_Tp, simd_abi::fixed_size<_Np>>& __x)
   {
@@ -3324,12 +3324,13 @@  to_compatible(const simd<_Tp, simd_abi::fixed_size<_Np>>& __x)
     return {__mem, vector_aligned};
   }
 
-template <typename _Tp, size_t _Np>
+template <typename _Tp, int _Np>
   _GLIBCXX_SIMD_INTRINSIC
   enable_if_t<(_Np == simd_mask<_Tp>::size()), simd_mask<_Tp>>
   to_compatible(const simd_mask<_Tp, simd_abi::fixed_size<_Np>>& __x)
   {
     return simd_mask<_Tp>(
+	     __private_init,
 	     [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
   }
 
diff --git a/libstdc++-v3/testsuite/experimental/simd/pr109822_cast_functions.cc b/libstdc++-v3/testsuite/experimental/simd/pr109822_cast_functions.cc
new file mode 100644
index 00000000000..3deafbf7a1f
--- /dev/null
+++ b/libstdc++-v3/testsuite/experimental/simd/pr109822_cast_functions.cc
@@ -0,0 +1,63 @@ 
+// { dg-options "-std=gnu++17" }
+// { dg-do compile { target c++17 } }
+
+#include <experimental/simd>
+
+namespace stdx = std::experimental;
+
+template <typename T, typename V>
+  void
+  test01()
+  {
+    using M = typename V::mask_type;
+    [[maybe_unused]] auto x = to_fixed_size(V());
+    [[maybe_unused]] auto k = to_fixed_size(M());
+    if constexpr (stdx::simd<T>::size() == V::size())
+      {
+	[[maybe_unused]] auto xx = to_compatible(x);
+	[[maybe_unused]] auto kk = to_compatible(k);
+	x = to_fixed_size(xx);
+	k = to_fixed_size(kk);
+      }
+    if constexpr (stdx::native_simd<T>::size() == V::size())
+      {
+	[[maybe_unused]] auto xx = to_native(x);
+	[[maybe_unused]] auto kk = to_native(k);
+	x = to_fixed_size(xx);
+	k = to_fixed_size(kk);
+      }
+  }
+
+template <typename T>
+  void
+  iterate_abis()
+  {
+    test01<T, stdx::simd<T, stdx::simd_abi::scalar>>();
+    test01<T, stdx::simd<T>>();
+    test01<T, stdx::native_simd<T>>();
+    test01<T, stdx::fixed_size_simd<T, 3>>();
+    test01<T, stdx::fixed_size_simd<T, stdx::simd_abi::max_fixed_size<T> - 4>>();
+  }
+
+int
+main()
+{
+  iterate_abis<char>();
+  iterate_abis<wchar_t>();
+  iterate_abis<char16_t>();
+  iterate_abis<char32_t>();
+
+  iterate_abis<signed char>();
+  iterate_abis<unsigned char>();
+  iterate_abis<short>();
+  iterate_abis<unsigned short>();
+  iterate_abis<int>();
+  iterate_abis<unsigned int>();
+  iterate_abis<long>();
+  iterate_abis<unsigned long>();
+  iterate_abis<long long>();
+  iterate_abis<unsigned long long>();
+  iterate_abis<float>();
+  iterate_abis<double>();
+  iterate_abis<long double>();
+}