@@ -268,12 +268,15 @@ bits_headers = \
${bits_srcdir}/shared_ptr_atomic.h \
${bits_srcdir}/shared_ptr_base.h \
${bits_srcdir}/simd_alg.h \
+ ${bits_srcdir}/simd_bit.h \
+ ${bits_srcdir}/simd_complex.h \
${bits_srcdir}/simd_details.h \
${bits_srcdir}/simd_flags.h \
${bits_srcdir}/simd_iterator.h \
${bits_srcdir}/simd_loadstore.h \
${bits_srcdir}/simd_mask.h \
${bits_srcdir}/simd_mask_reductions.h \
+ ${bits_srcdir}/simd_math.h \
${bits_srcdir}/simd_reductions.h \
${bits_srcdir}/simd_vec.h \
${bits_srcdir}/simd_x86.h \
@@ -624,12 +624,15 @@ bits_freestanding = \
@GLIBCXX_HOSTED_TRUE@ ${bits_srcdir}/shared_ptr_atomic.h \
@GLIBCXX_HOSTED_TRUE@ ${bits_srcdir}/shared_ptr_base.h \
@GLIBCXX_HOSTED_TRUE@ ${bits_srcdir}/simd_alg.h \
+@GLIBCXX_HOSTED_TRUE@ ${bits_srcdir}/simd_bit.h \
+@GLIBCXX_HOSTED_TRUE@ ${bits_srcdir}/simd_complex.h \
@GLIBCXX_HOSTED_TRUE@ ${bits_srcdir}/simd_details.h \
@GLIBCXX_HOSTED_TRUE@ ${bits_srcdir}/simd_flags.h \
@GLIBCXX_HOSTED_TRUE@ ${bits_srcdir}/simd_iterator.h \
@GLIBCXX_HOSTED_TRUE@ ${bits_srcdir}/simd_loadstore.h \
@GLIBCXX_HOSTED_TRUE@ ${bits_srcdir}/simd_mask.h \
@GLIBCXX_HOSTED_TRUE@ ${bits_srcdir}/simd_mask_reductions.h \
+@GLIBCXX_HOSTED_TRUE@ ${bits_srcdir}/simd_math.h \
@GLIBCXX_HOSTED_TRUE@ ${bits_srcdir}/simd_reductions.h \
@GLIBCXX_HOSTED_TRUE@ ${bits_srcdir}/simd_vec.h \
@GLIBCXX_HOSTED_TRUE@ ${bits_srcdir}/simd_x86.h \
new file mode 100644
@@ -0,0 +1,192 @@
+// Implementation of <simd> -*- C++ -*-
+
+// Copyright The GNU Toolchain Authors.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+// <http://www.gnu.org/licenses/>.
+
+#ifndef _GLIBCXX_SIMD_BIT_H
+#define _GLIBCXX_SIMD_BIT_H 1
+
+#ifdef _GLIBCXX_SYSHDR
+#pragma GCC system_header
+#endif
+
+#if __cplusplus >= 202400L
+
+#include "simd_vec.h"
+
+// psabi warnings are bogus because the ABI of the internal types never leaks into user code
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpsabi"
+
+// [simd.bit] -----------------------------------------------------------------
+namespace std _GLIBCXX_VISIBILITY(default)
+{
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+namespace simd
+{
+ template<__simd_integral _Vp>
+ [[__gnu__::__always_inline__]]
+ constexpr _Vp
+ byteswap(const _Vp& __v) noexcept
+ {
+ if constexpr (sizeof(typename _Vp::value_type) == 1)
+ return __v;
+ else
+ return _Vp([&](int __i) { return std::byteswap(__v[__i]); });
+ }
+
+ template<__simd_unsigned_integer _Vp>
+ [[__gnu__::__always_inline__]]
+ constexpr _Vp
+ bit_ceil(const _Vp& __v)
+ {
+ using _Tp = typename _Vp::value_type;
+ constexpr _Tp __max = _Tp(1) << (sizeof(_Tp) * __CHAR_BIT__ - 1);
+ __glibcxx_simd_precondition(all_of(__v <= __max), "bit_ceil result is not representable");
+ return _Vp([&](int __i) { return std::bit_ceil(__v[__i]); });
+ }
+
+ template<__simd_unsigned_integer _Vp>
+ [[__gnu__::__always_inline__]]
+ constexpr _Vp
+ bit_floor(const _Vp& __v) noexcept
+ { return _Vp([&](int __i) { return std::bit_floor(__v[__i]); }); }
+
+ template<__simd_unsigned_integer _Vp>
+ [[__gnu__::__always_inline__]]
+ constexpr typename _Vp::mask_type
+ has_single_bit(const _Vp& __v) noexcept
+ { return typename _Vp::mask_type([&](int __i) { return std::has_single_bit(__v[__i]); }); }
+
+ template<__simd_unsigned_integer _V0, __simd_integral _V1>
+ requires (_V0::size() == _V1::size())
+ && (sizeof(typename _V0::value_type) == sizeof(typename _V1::value_type))
+ [[__gnu__::__always_inline__]]
+ constexpr _V0
+ rotl(const _V0& __v, const _V1& __s) noexcept
+ { return _V0([&](int __i) { return std::rotl(__v[__i], __s[__i]); }); }
+
+ template<__simd_unsigned_integer _Vp>
+ [[__gnu__::__always_inline__]]
+ constexpr _Vp
+ rotl(const _Vp& __v, int __s) noexcept
+ { return _Vp([&](int __i) { return std::rotl(__v[__i], __s); }); }
+
+ template<__simd_unsigned_integer _V0, __simd_integral _V1>
+ requires (_V0::size() == _V1::size())
+ && (sizeof(typename _V0::value_type) == sizeof(typename _V1::value_type))
+ [[__gnu__::__always_inline__]]
+ constexpr _V0
+ rotr(const _V0& __v, const _V1& __s) noexcept
+ { return _V0([&](int __i) { return std::rotr(__v[__i], __s[__i]); }); }
+
+ template<__simd_unsigned_integer _Vp>
+ [[__gnu__::__always_inline__]]
+ constexpr _Vp
+ rotr(const _Vp& __v, int __s) noexcept
+ { return _Vp([&](int __i) { return std::rotr(__v[__i], __s); }); }
+
+ template<__simd_unsigned_integer _Vp>
+ [[__gnu__::__always_inline__]]
+ constexpr rebind_t<make_signed_t<typename _Vp::value_type>, _Vp>
+ bit_width(const _Vp& __v) noexcept
+ {
+ using _Ip = make_signed_t<typename _Vp::value_type>;
+ return rebind_t<_Ip, _Vp>([&](int __i) {
+ return static_cast<_Ip>(std::bit_width(__v[__i]));
+ });
+ }
+
+ template<__simd_unsigned_integer _Vp>
+ [[__gnu__::__always_inline__]]
+ constexpr rebind_t<make_signed_t<typename _Vp::value_type>, _Vp>
+ countl_zero(const _Vp& __v) noexcept
+ {
+ using _Ip = make_signed_t<typename _Vp::value_type>;
+ return rebind_t<_Ip, _Vp>([&](int __i) {
+ return static_cast<_Ip>(std::countl_zero(__v[__i]));
+ });
+ }
+
+ template<__simd_unsigned_integer _Vp>
+ [[__gnu__::__always_inline__]]
+ constexpr rebind_t<make_signed_t<typename _Vp::value_type>, _Vp>
+ countl_one(const _Vp& __v) noexcept
+ {
+ using _Ip = make_signed_t<typename _Vp::value_type>;
+ return rebind_t<_Ip, _Vp>([&](int __i) {
+ return static_cast<_Ip>(std::countl_one(__v[__i]));
+ });
+ }
+
+ template<__simd_unsigned_integer _Vp>
+ [[__gnu__::__always_inline__]]
+ constexpr rebind_t<make_signed_t<typename _Vp::value_type>, _Vp>
+ countr_zero(const _Vp& __v) noexcept
+ {
+ using _Ip = make_signed_t<typename _Vp::value_type>;
+ return rebind_t<_Ip, _Vp>([&](int __i) {
+ return static_cast<_Ip>(std::countr_zero(__v[__i]));
+ });
+ }
+
+ template<__simd_unsigned_integer _Vp>
+ [[__gnu__::__always_inline__]]
+ constexpr rebind_t<make_signed_t<typename _Vp::value_type>, _Vp>
+ countr_one(const _Vp& __v) noexcept
+ {
+ using _Ip = make_signed_t<typename _Vp::value_type>;
+ return rebind_t<_Ip, _Vp>([&](int __i) {
+ return static_cast<_Ip>(std::countr_one(__v[__i]));
+ });
+ }
+
+ template<__simd_unsigned_integer _Vp>
+ [[__gnu__::__always_inline__]]
+ constexpr rebind_t<make_signed_t<typename _Vp::value_type>, _Vp>
+ popcount(const _Vp& __v) noexcept
+ {
+ using _Ip = make_signed_t<typename _Vp::value_type>;
+ return rebind_t<_Ip, _Vp>([&](int __i) {
+ return static_cast<_Ip>(std::popcount(__v[__i]));
+ });
+ }
+} // namespace simd
+
+ using simd::byteswap;
+ using simd::bit_ceil;
+ using simd::bit_floor;
+ using simd::has_single_bit;
+ using simd::rotl;
+ using simd::rotr;
+ using simd::bit_width;
+ using simd::countl_zero;
+ using simd::countl_one;
+ using simd::countr_zero;
+ using simd::countr_one;
+ using simd::popcount;
+_GLIBCXX_END_NAMESPACE_VERSION
+} // namespace std
+
+#pragma GCC diagnostic pop
+#endif // C++26
+#endif // _GLIBCXX_SIMD_BIT_H
new file mode 100644
@@ -0,0 +1,2321 @@
+// Implementation of <simd> -*- C++ -*-
+
+// Copyright The GNU Toolchain Authors.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+// <http://www.gnu.org/licenses/>.
+
+#ifndef _GLIBCXX_SIMD_COMPLEX_H
+#define _GLIBCXX_SIMD_COMPLEX_H 1
+
+#ifdef _GLIBCXX_SYSHDR
+#pragma GCC system_header
+#endif
+
+#if __cplusplus >= 202400L
+
+#include "simd_vec.h"
+#include <complex>
+
+// psabi warnings are bogus because the ABI of the internal types never leaks into user code
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpsabi"
+
+namespace std _GLIBCXX_VISIBILITY(default)
+{
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+namespace simd
+{
+ /** @internal
+ * @brief Return a _CxIleav mask that holds @p __k as its data member.
+ *
+ * @note If the resulting mask type has size 1, then it will actually store a single bool, rather
+ * than the given mask object.
+ */
+ template <size_t _Bytes, typename _Ap, __abi_tag _Aret
+ = decltype(__abi_rebind<complex<__float_from<_Bytes>>, _Ap::_S_size / 2, _Ap>())>
+ [[__gnu__::__always_inline__]]
+ constexpr basic_mask<_Bytes * 2, _Aret>
+ __to_cx_ileav(const basic_mask<_Bytes, _Ap>& __k)
+ {
+ static_assert(_Ap::_S_size % 2 == 0
+ && (__filter_abi_variant(_Ap::_S_variant, _AbiVariant::_CxVariants)
+ == _AbiVariant()));
+ if constexpr (_Aret::_S_size == 1)
+ return basic_mask<_Bytes * 2, _Aret>(__k[0]);
+ else
+ return basic_mask<_Bytes * 2, _Aret>::_S_init(__k);
+ }
+
+ constexpr void
+ __check_hi_bits_for_zero(unsigned_integral auto __x)
+ {
+ __glibcxx_simd_precondition(__x == 0,
+ "to_ullong called on mask with 'true' elements at indices"
+ "higher than 64");
+ }
+
+ template <typename _T0, typename _T1>
+ constexpr void
+ __check_hi_bits_for_zero(const __trivial_pair<_T0, _T1>& __p)
+ {
+ __check_hi_bits_for_zero(__p._M_first);
+ __check_hi_bits_for_zero(__p._M_second);
+ }
+
+ constexpr unsigned long long
+ __unwrap_pairs_to_ullong(unsigned_integral auto __x)
+ { return __x; }
+
+ template <typename _T0, typename _T1>
+ constexpr unsigned long long
+ __unwrap_pairs_to_ullong(const __trivial_pair<_T0, _T1>& __p)
+ {
+ __check_hi_bits_for_zero(__p._M_second);
+ return __unwrap_pairs_to_ullong(__p._M_first);
+ }
+
+ template <int _Np>
+ constexpr bitset<_Np>
+ __unwrap_pairs_to_bitset(unsigned_integral auto __x)
+ {
+ static_assert(_Np <= 64);
+ return __x;
+ }
+
+ template <size_t _Np, typename _T0, typename _T1>
+ constexpr bitset<_Np>
+ __unwrap_pairs_to_bitset(const __trivial_pair<_T0, _T1>& __p)
+ {
+ constexpr size_t _N0 = __bit_floor(_Np);
+ constexpr size_t _N1 = _Np - _N0;
+ static_assert(_N0 % 64 == 0);
+ struct _Tmp
+ {
+ bitset<__bit_floor(_Np)> _M_lo;
+ bitset<_Np - __bit_floor(_Np)> _M_hi;
+ };
+ _Tmp __tmp = {__unwrap_pairs_to_bitset<_N0>(__p._M_first),
+ __unwrap_pairs_to_bitset<_N1>(__p._M_second)};
+ return __builtin_bit_cast(bitset<_Np>, __tmp);
+ }
+
+ template <size_t _Bytes>
+ consteval auto
+ __tree_of_ulong()
+ {
+ static constexpr size_t _N0 = __bit_floor(_Bytes - 1);
+ static constexpr size_t _N1 = _Bytes - _N0;
+ if constexpr (_Bytes <= sizeof(unsigned long))
+ return 0ul;
+ else
+ return __trivial_pair {__tree_of_ulong<_N0>(), __tree_of_ulong<_N1>()};
+ }
+
+ template <size_t _Bytes>
+ using __tree_of_ulong_t = decltype(__tree_of_ulong<_Bytes>());
+
+ template <size_t _Np>
+ constexpr auto
+ __bitset_to_pairs(const bitset<_Np>& __b) noexcept
+ {
+ if constexpr (_Np <= 64)
+ return __b.to_ullong();
+ else
+ return __builtin_bit_cast(__tree_of_ulong_t<__div_ceil(_Np, size_t(__CHAR_BIT__))>, __b);
+ }
+
+ // complex interleaved (_CxIleav) -------------------------------------------
+
+ /** @internal
+ * @brief Functions acting on / recursing into the non-complex fp vec objects, interpreting even
+ * elements as real and odd elements as imaginary.
+ */
+ namespace __cxileav
+ {
+ /** @internal
+ * @brief Set even (real) elements in @p __x to the values in @p __re.
+ */
+ template <typename _Tp, typename _Ap>
+ [[__gnu__::__always_inline__]]
+ constexpr void
+ __set_real(basic_vec<_Tp, _Ap>& __x,
+ const __similar_vec<_Tp, _Ap::_S_size / 2, _Ap>& __re) noexcept
+ {
+ if constexpr (__scalar_abi_tag<_Ap> && _Ap::_S_size == 2)
+ __x._M_get_low() = __re;
+ else if constexpr (_Ap::_S_nreg >= 2)
+ { // recurse
+ constexpr int __n0 = __x._M_get_low().size();
+ const auto& [__lo, __hi] = __re.template _M_chunk<
+ __similar_vec<_Tp, __n0 / 2, _Ap>>();
+ __set_real(__x._M_get_low(), __lo);
+ __set_real(__x._M_get_high(), __hi);
+ }
+ else
+ {
+ using _DataType = typename _Ap::template _DataType<_Tp>;
+ _DataType& __xv = __x._M_get();
+ const auto __rv = __re._M_get();
+ if constexpr (_Ap::_S_size == 2)
+ __vec_set(__xv, 0, __rv);
+ else if (__is_const_known(__x, __re))
+ {
+ constexpr auto [...__is] = _IotaArray<_Ap::_S_size>;
+ __xv = _DataType {((__is & 1) == 0 ? __rv[__is / 2] : __xv[__is])...};
+ }
+ else
+ _VecOps<_DataType>::_S_overwrite_even_elements(__xv, __rv);
+ }
+ }
+
+ /** @internal
+ * @brief Set odd (imaginary) elements in @p __x to the values in @p __im.
+ */
+ template <typename _Tp, typename _Ap>
+ [[__gnu__::__always_inline__]]
+ constexpr void
+ __set_imag(basic_vec<_Tp, _Ap>& __x,
+ const __similar_vec<_Tp, _Ap::_S_size / 2, _Ap>& __im) noexcept
+ {
+ if constexpr (__scalar_abi_tag<_Ap> && _Ap::_S_size == 2)
+ __x._M_get_high() = __im;
+ else if constexpr (_Ap::_S_nreg >= 2)
+ { // recurse
+ constexpr int __n0 = __x._M_get_low().size();
+ const auto& [__lo, __hi] = __im.template _M_chunk<
+ __similar_vec<_Tp, __n0 / 2, _Ap>>();
+ __set_imag(__x._M_get_low(), __lo);
+ __set_imag(__x._M_get_high(), __hi);
+ }
+ else
+ {
+ using _DataType = typename _Ap::template _DataType<_Tp>;
+ _DataType& __xv = __x._M_get();
+ const auto __iv = __im._M_get();
+ if constexpr (_Ap::_S_size == 2)
+ __vec_set(__xv, 1, __iv);
+ else if (__is_const_known(__x, __im))
+ {
+ constexpr auto [...__is] = _IotaArray<_Ap::_S_size>;
+ __xv = _DataType {((__is & 1) == 1 ? __iv[__is / 2] : __xv[__is])...};
+ }
+ else
+ _VecOps<_DataType>::_S_overwrite_odd_elements(__xv, __iv);
+ }
+ }
+
+ /** @internal
+ * @brief Return @p __x after flipping the sign of odd (imaginary) elements.
+ */
+ template <typename _Tp, typename _Ap>
+ [[__gnu__::__always_inline__]]
+ constexpr basic_vec<_Tp, _Ap>
+ __negate_imag(const basic_vec<_Tp, _Ap>& __x)
+ {
+ if constexpr (__scalar_abi_tag<_Ap> && _Ap::_S_size == 2)
+ return basic_vec<_Tp, _Ap>::_S_init(__x._M_get_low(), -__x._M_get_high());
+ else if constexpr (_Ap::_S_nreg >= 2) // recurse
+ return basic_vec<_Tp, _Ap>::_S_init(__negate_imag(__x._M_get_low()),
+ __negate_imag(__x._M_get_high()));
+ else
+ return _VecOps<typename _Ap::template _DataType<_Tp>>
+ ::_S_complex_negate_imag(__x._M_get());
+ }
+
+ /** @internal
+ * @brief Recompute all complex multiplications where @p __nan is true using @p _Cx's
+ * multiplication operator.
+ *
+ * @todo use coarser _TargetTraits and move into .so
+ */
+ template <typename _Cx, _TargetTraits, __vec_builtin _TV>
+ [[__gnu__::__cold__]]
+ constexpr _TV
+ __redo_mul(_TV __r, const _TV __x, const _TV __y, const auto __nan, const int __n)
+ {
+ // redo multiplication using scalar complex-mul on (NaN, NaN) results
+ for (int __i = 0; __i < __n; __i += 2)
+ {
+ if (__nan[__i] && __nan[__i + 1])
+ {
+ using _Tc = typename _Cx::value_type;
+ const _Cx __cx(_Tc(__x[__i]), _Tc(__x[__i + 1]));
+ const _Cx __cy(_Tc(__y[__i]), _Tc(__y[__i + 1]));
+ const _Cx __cr = __cx * __cy;
+ __vec_set(__r, __i, __cr.real());
+ __vec_set(__r, __i + 1, __cr.imag());
+ }
+ }
+ return __r;
+ }
+
+ /** @internal
+ * @brief Complex multiplication of @p __x and @p __y, returning the result in @p __x.
+ */
+ template <typename _Cx, _TargetTraits _Traits, typename _Tp, typename _Ap>
+ [[__gnu__::__always_inline__]]
+ constexpr void
+ __mul(basic_vec<_Tp, _Ap>& __x, const basic_vec<_Tp, _Ap>& __y)
+ {
+ static_assert(__complex_like<_Cx>);
+ if constexpr (__scalar_abi_tag<_Ap> && _Ap::_S_size == 2)
+ {
+ const _Cx __c = _Cx(__x[0], __x[1]) * _Cx(__y[0], __y[1]);
+ __x._M_get_low() = __c.real();
+ __x._M_get_high() = __c.imag();
+ }
+ else if constexpr (_Ap::_S_nreg >= 2)
+ { // recurse
+ __mul<_Cx, _Traits>(__x._M_get_low(), __y._M_get_low());
+ __mul<_Cx, _Traits>(__x._M_get_high(), __y._M_get_high());
+ }
+ else if constexpr (_Traits.template _M_eval_as_f32<_Tp>())
+ { // eval float16_t as float
+ using _Vf32 = rebind_t<float, basic_vec<_Tp, _Ap>>;
+ _Vf32 __xf32(__x);
+ __mul<_Cx, _Traits>(__xf32, _Vf32(__y));
+ __x = static_cast<basic_vec<_Tp, _Ap>>(__xf32);
+ }
+ else
+ {
+ using _DataType = typename _Ap::template _DataType<_Tp>;
+ const _DataType __xv = __x._M_get();
+ const _DataType __yv = __y._M_get();
+ using _VO = _VecOps<_DataType>; // don't care for actual numer of elements
+ using _VOS = _VecOps<_DataType, _Ap::_S_size>; // to check for const-prop values
+ if (_VOS::_S_complex_imag_is_const_known_zero(__xv))
+ {
+ if (_VOS::_S_complex_imag_is_const_known_zero(__yv))
+ __x = __xv * __yv;
+ else
+ {
+ if (_Traits._M_conforming_to_STDC_annex_G())
+ { // handle negative zero (0 * y can be -0)
+ auto __a = _VO::_S_dup_even(__xv) * __yv;
+ auto __b = _DataType() * _VO::_S_swap_neighbors(__yv);
+ __x = _VO::_S_addsub(__a, __b);
+ }
+ else
+ __x = _VO::_S_dup_even(__xv) * __yv;
+ }
+ }
+ else if (_VOS::_S_complex_imag_is_const_known_zero(__yv))
+ {
+ if (_Traits._M_conforming_to_STDC_annex_G())
+ __x = _VO::_S_addsub(_VO::_S_dup_even(__yv) * __xv,
+ _DataType() * _VO::_S_swap_neighbors(__xv));
+ else
+ __x = _VO::_S_dup_even(__yv) * __xv;
+ }
+ else if (_VOS::_S_complex_real_is_const_known_zero(__yv))
+ {
+ if (_Traits._M_conforming_to_STDC_annex_G())
+ __x = _VO::_S_addsub(_DataType(), _VO::_S_dup_odd(__yv)
+ * _VO::_S_swap_neighbors(__xv));
+ else
+ __x = _VO::_S_dup_odd(__yv)
+ * _VO::_S_complex_negate_real(_VO::_S_swap_neighbors(__xv));
+ }
+ else if (_VOS::_S_complex_real_is_const_known_zero(__xv))
+ {
+ if (_Traits._M_conforming_to_STDC_annex_G())
+ __x = _VO::_S_addsub(_DataType(), _VO::_S_dup_odd(__xv)
+ * _VO::_S_swap_neighbors(__yv));
+ else
+ __x = _VO::_S_dup_odd(__xv)
+ * _VO::_S_complex_negate_real(_VO::_S_swap_neighbors(__yv));
+ }
+ else
+ {
+#if _GLIBCXX_X86
+ if (_Traits._M_have_fma() && !__is_const_known(__xv, __yv))
+ {
+ if constexpr (_Traits._M_have_fma())
+ __x = __x86_complex_multiplies(__xv, __yv);
+ }
+ else
+#endif
+ __x = _VO::_S_addsub(_VO::_S_dup_even(__xv) * __yv,
+ _VO::_S_dup_odd(__xv) * _VO::_S_swap_neighbors(__yv));
+ const auto __nan = __x._M_isnan();
+ if (_Traits._M_conforming_to_STDC_annex_G() && __nan._M_any_of())
+ __x = __redo_mul<_Cx, _Traits>(__x._M_get(), __xv, __yv, __nan, _Ap::_S_size);
+ }
+ }
+ }
+ }
+
+ template <size_t _Bytes, __abi_tag _Ap>
+ requires _Ap::_S_is_cx_ileav && (_Ap::_S_size >= 2) // size 1 is in simd_mask.h
+ class basic_mask<_Bytes, _Ap>
+ : public _MaskBase<_Bytes, _Ap>
+ {
+ using _Base = _MaskBase<_Bytes, _Ap>;
+
+ using _VecType = _Base::_VecType;
+
+ template <size_t, typename>
+ friend class basic_mask;
+
+ template <typename, typename>
+ friend class basic_vec;
+
+ static constexpr int _S_size = _Ap::_S_size;
+
+ using _DataType = __component_mask_for_ileav<_Bytes, _Ap>;
+
+ static constexpr bool _S_is_scalar = _DataType::_S_is_scalar;
+
+ static constexpr bool _S_use_bitmask = _DataType::_S_use_bitmask;
+
+ static constexpr int _S_full_size = _DataType::_S_full_size / 2;
+
+ static constexpr bool _S_is_partial = _DataType::_S_is_partial;
+
+ static constexpr bool _S_has_bool_member = _DataType::_S_has_bool_member;
+
+ static constexpr size_t _S_padding_bytes = _DataType::_S_padding_bytes;
+
+ _DataType _M_data;
+
+ public:
+ using value_type = bool;
+
+ using abi_type = _Ap;
+
+ using iterator = _Base::iterator;
+
+ using const_iterator = _Base::const_iterator;
+
+ // internal but public API ----------------------------------------------
+ [[__gnu__::__always_inline__]]
+ static constexpr basic_mask
+ _S_init(const _DataType& __x)
+ {
+ basic_mask __r;
+ __r._M_data = __x;
+ return __r;
+ }
+
+ [[__gnu__::__always_inline__]]
+ constexpr auto
+ _M_concat_data() const
+ { return _M_data._M_concat_data(); }
+
+ [[__gnu__::__always_inline__]]
+ constexpr const _DataType&
+ _M_get_ileav_data() const
+ { return _M_data; }
+
+ template <_ArchTraits _Traits = {}>
+ [[__gnu__::__always_inline__]]
+ static constexpr basic_mask
+ _S_partial_mask_of_n(int __n)
+ { return _S_init(_DataType::_S_partial_mask_of_n(__n * 2)); }
+
+ [[__gnu__::__always_inline__]]
+ static constexpr basic_mask
+ _S_and_neighbors(_DataType __k)
+ { return _S_init(__k._M_and_neighbors()); }
+
+ [[__gnu__::__always_inline__]]
+ static constexpr basic_mask
+ _S_or_neighbors(_DataType __k)
+ { return _S_init(__k._M_or_neighbors()); }
+
+ template <typename _Mp>
+ [[__gnu__::__always_inline__]]
+ constexpr auto
+ _M_chunk() const noexcept
+ {
+ if constexpr (_Mp::abi_type::_S_variant != _Ap::_S_variant)
+ {
+ using _M2 = resize_t<_S_size, _Mp>;
+ static_assert(!is_same_v<_M2, basic_mask>);
+ return static_cast<_M2>(*this).template _M_chunk<_Mp>();
+ }
+ else if constexpr (_Mp::_S_size == 1)
+ {
+ constexpr auto [...__is] = _IotaArray<_S_size>;
+ return array{_Mp(_M_data[__is])...};
+ }
+ else // _Mp is the same partial specialization
+ {
+ constexpr int __rem = _S_size % _Mp::_S_size;
+ const auto [...__xs] = _M_data.template _M_chunk<typename _Mp::_DataType>();
+ static_assert(is_same_v<decltype(__to_cx_ileav(__xs...[0])), _Mp>);
+ if constexpr (__rem == 0)
+ return array{__to_cx_ileav(__xs)...};
+ else
+ return tuple(__to_cx_ileav(__xs)...);
+ }
+ }
+
+ [[__gnu__::__always_inline__]]
+ static constexpr const basic_mask&
+ _S_concat(const basic_mask& __x0) noexcept
+ { return __x0; }
+
+ template <typename... _As>
+ requires (sizeof...(_As) > 1)
+ [[__gnu__::__always_inline__]]
+ static constexpr basic_mask
+ _S_concat(const basic_mask<_Bytes, _As>&... __xs) noexcept
+ { return basic_mask::_S_init(_DataType::_S_concat(__xs._M_get_ileav_data()...)); }
+
+ // [simd.mask.overview] default constructor -----------------------------
+ basic_mask() = default;
+
+ // [simd.mask.overview] conversion extensions ---------------------------
+ template <__vec_builtin _TV>
+ [[__gnu__::__always_inline__]]
+ constexpr
+ basic_mask(const _TV& __x) requires convertible_to<_TV, _DataType>
+ : _M_data(__x)
+ {}
+
+ template <__vec_builtin _TV>
+ [[__gnu__::__always_inline__]]
+ constexpr
+ operator _TV() requires convertible_to<_DataType, _TV>
+ { return _M_data; }
+
+ // [simd.mask.ctor] broadcast constructor -------------------------------
+ [[__gnu__::__always_inline__]]
+ constexpr explicit
+ basic_mask(same_as<bool> auto __x) noexcept // LWG 4382.
+ : _M_data(__x)
+ {}
+
+ // [simd.mask.ctor] conversion constructor ------------------------------
+ template <size_t _UBytes, typename _UAbi>
+ requires (_S_size == _UAbi::_S_size)
+ [[__gnu__::__always_inline__]]
+ constexpr explicit(__is_mask_conversion_explicit<_Ap, _UAbi>(_Bytes, _UBytes))
+ basic_mask(const basic_mask<_UBytes, _UAbi>& __x) noexcept
+ : _M_data([&] {
+ using _UV = basic_mask<_UBytes, _UAbi>;
+ if constexpr (_UAbi::_S_is_cx_ileav)
+ // _CxIleav -> _CxIleav => we can simply convert the contained mask
+ return __x._M_data; // calls conversion ctor on _DataType
+
+ // __x is not _CxIleav from here on
+ else if constexpr (_S_use_bitmask || _UV::_S_use_bitmask)
+ return _DataType::_S_init(__duplicate_each_bit<_S_size>(__x._M_to_uint()));
+
+ // vec-mask to vec-mask from here on
+ else if constexpr (_UAbi::_S_is_cx_ctgus)
+ // unwrap _CxCtgus mask and recurse
+ return basic_mask(__x._M_data)._M_data;
+
+ else if constexpr (_UV::_S_is_scalar || _S_is_scalar)
+ // need to duplicate & convert one vector element into two bools
+ return _DataType([&](int __i) { return __x[__i / 2]; }); // TODO: optimize
+
+ else if constexpr (_Bytes == _UBytes)
+ return _DataType::_S_recursive_bit_cast(__x);
+ else if constexpr (_Bytes <= sizeof(0ll))
+ {
+ using _U2 = __similar_mask<__integer_from<_Bytes>, _S_size, _UAbi>;
+ return _DataType::_S_recursive_bit_cast(_U2(__x));
+ }
+ else if constexpr (_UBytes > 1)
+ {
+ using _U2 = __similar_mask<__integer_from<_UBytes / 2>, _S_size * 2, _UAbi>;
+ return _U2::_S_recursive_bit_cast(__x); // calls conversion ctor on _DataType
+ }
+ else // _Bytes == 16 && _UBytes == 1
+ // convert twice (1 -> 2 -> 16)
+ // The conversion to short keeps the intermediate mask as small as possible and thus
+ // requires fewer across-128bit boundary shuffles.
+ return basic_mask(__similar_mask<short, _UV::_S_size, _UAbi>(__x))._M_data;
+ }())
+ {}
+
+ using _Base::_MaskBase;
+
+ // [simd.mask.ctor] generator constructor -------------------------------
+ template <__simd_generator_invokable<bool, _S_size> _Fp>
+ [[__gnu__::__always_inline__]]
+ constexpr explicit
+ basic_mask(_Fp&& __gen)
+ : _M_data([&] [[__gnu__::__always_inline__]] {
+ // for _CxIleav, the results of each __gen call need to initialize two
+ // neighboring elements
+ constexpr auto [...__is] = _IotaArray<_S_size>;
+ bool __tmp[_S_size] = {__gen(__simd_size_c<__is>)...};
+ return _DataType([&] [[__gnu__::__always_inline__]] (size_t __i) {
+ return __tmp[__i / 2];
+ });
+ }())
+ {}
+
+ // [simd.mask.ctor] bitset constructor ----------------------------------
+ [[__gnu__::__always_inline__]]
+ constexpr
+ basic_mask(const same_as<bitset<_S_size>> auto& __b) noexcept // LWG 4382.
+ : _M_data(_DataType::_S_init(__duplicate_each_bit<_S_size>(__bitset_to_pairs(__b))))
+ {}
+
+ // [simd.mask.ctor] uint constructor ------------------------------------
+ template <unsigned_integral _Tp>
+ requires (!same_as<_Tp, bool>) // LWG 4382.
+ [[__gnu__::__always_inline__]]
+ constexpr explicit
+ basic_mask(_Tp __val) noexcept
+ : _M_data(__duplicate_each_bit<_S_size>(__val))
+ {}
+
+ // [simd.mask.subscr] ---------------------------------------------------
+ [[__gnu__::__always_inline__]]
+ constexpr value_type
+ operator[](__simd_size_type __i) const
+ { return _M_data[__i * 2]; }
+
+ // [simd.mask.unary] ----------------------------------------------------
+ [[__gnu__::__always_inline__]]
+ constexpr basic_mask
+ operator!() const noexcept
+ { return _S_init(!_M_data); }
+
+ [[__gnu__::__always_inline__]]
+ constexpr _VecType
+ operator+() const noexcept requires destructible<_VecType>
+ { return operator _VecType(); }
+
+ constexpr _VecType
+ operator+() const noexcept = delete;
+
+ [[__gnu__::__always_inline__]]
+ constexpr _VecType
+ operator-() const noexcept requires destructible<_VecType>
+ {
+ using _Ip = typename _VecType::value_type;
+ if constexpr (_S_use_bitmask)
+ return __select_impl(*this, _Ip(-1), _Ip());
+ else
+ return __builtin_bit_cast(_VecType, -_M_data);
+ }
+
+ constexpr _VecType
+ operator-() const noexcept = delete;
+
+ [[__gnu__::__always_inline__]]
+ constexpr _VecType
+ operator~() const noexcept requires destructible<_VecType>
+ {
+ using _Ip = typename _VecType::value_type;
+ if constexpr (_S_use_bitmask)
+ return __select_impl(*this, _Ip(-2), _Ip(-1));
+ else
+ return __builtin_bit_cast(_VecType, _M_data) - _Ip(1);
+ }
+
+ constexpr _VecType
+ operator~() const noexcept = delete;
+
+ // [simd.mask.conv] -----------------------------------------------------
+ template <typename _Up, typename _UAbi>
+ requires (_UAbi::_S_size == _S_size)
+ [[__gnu__::__always_inline__]]
+ constexpr explicit(sizeof(_Up) != _Bytes)
+ operator basic_vec<_Up, _UAbi>() const noexcept
+ {
+ using _Mp = typename basic_vec<_Up, _UAbi>::mask_type;
+ return __select_impl(_Mp(*this), basic_vec<_Up, _UAbi>(1), basic_vec<_Up, _UAbi>(0));
+ }
+
+ using _Base::operator basic_vec;
+
+ // [simd.mask.namedconv] ------------------------------------------------
+ [[__gnu__::__always_inline__]]
+ constexpr bitset<_S_size>
+ to_bitset() const noexcept
+ { return __unwrap_pairs_to_bitset<_S_size>(_M_to_uint()); }
+
+ template <int _Offset = 0, _ArchTraits _Traits = {}>
+ [[__gnu__::__always_inline__]]
+ constexpr auto
+ _M_to_uint() const
+ { return _M_data.template _M_to_uint<_Offset, true>(); }
+
+ [[__gnu__::__always_inline__]]
+ constexpr unsigned long long
+ to_ullong() const
+ { return __unwrap_pairs_to_ullong(_M_to_uint()); }
+
+ // [simd.mask.binary] ---------------------------------------------------
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ operator&&(const basic_mask& __x, const basic_mask& __y) noexcept
+ { return _S_init(__x._M_data & __y._M_data); }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ operator||(const basic_mask& __x, const basic_mask& __y) noexcept
+ { return _S_init(__x._M_data | __y._M_data); }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ operator&(const basic_mask& __x, const basic_mask& __y) noexcept
+ { return _S_init(__x._M_data & __y._M_data); }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ operator|(const basic_mask& __x, const basic_mask& __y) noexcept
+ { return _S_init(__x._M_data | __y._M_data); }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ operator^(const basic_mask& __x, const basic_mask& __y) noexcept
+ { return _S_init(__x._M_data ^ __y._M_data); }
+
+ // [simd.mask.cassign] --------------------------------------------------
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask&
+ operator&=(basic_mask& __x, const basic_mask& __y) noexcept
+ {
+ __x._M_data &= __y._M_data;
+ return __x;
+ }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask&
+ operator|=(basic_mask& __x, const basic_mask& __y) noexcept
+ {
+ __x._M_data |= __y._M_data;
+ return __x;
+ }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask&
+ operator^=(basic_mask& __x, const basic_mask& __y) noexcept
+ {
+ __x._M_data ^= __y._M_data;
+ return __x;
+ }
+
+ // [simd.mask.comparison] -----------------------------------------------
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ operator==(const basic_mask& __x, const basic_mask& __y) noexcept
+ { return _S_init(__x._M_data == __y._M_data); }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ operator!=(const basic_mask& __x, const basic_mask& __y) noexcept
+ { return _S_init(__x._M_data != __y._M_data); }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ operator>=(const basic_mask& __x, const basic_mask& __y) noexcept
+ { return _S_init(__x._M_data >= __y._M_data); }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ operator<=(const basic_mask& __x, const basic_mask& __y) noexcept
+ { return _S_init(__x._M_data <= __y._M_data); }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ operator>(const basic_mask& __x, const basic_mask& __y) noexcept
+ { return _S_init(__x._M_data > __y._M_data); }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ operator<(const basic_mask& __x, const basic_mask& __y) noexcept
+ { return _S_init(__x._M_data < __y._M_data); }
+
+ // [simd.mask.cond] -----------------------------------------------------
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ __select_impl(const basic_mask& __k, const basic_mask& __t, const basic_mask& __f) noexcept
+ { return _S_init(__select_impl(__k._M_data, __t._M_data, __f._M_data)); }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ __select_impl(const basic_mask& __k, same_as<bool> auto __t, same_as<bool> auto __f) noexcept
+ { return _S_init(__select_impl(__k._M_data, __t, __f)); }
+
+ template <__vectorizable _T0, same_as<_T0> _T1>
+ requires (sizeof(_T0) == _Bytes)
+ [[__gnu__::__always_inline__]]
+ friend constexpr vec<_T0, _S_size>
+ __select_impl(const basic_mask& __k, const _T0& __t, const _T1& __f) noexcept
+ {
+ using _Vp = vec<_T0, _S_size>;
+ return __select_impl(static_cast<typename _Vp::mask_type>(__k), _Vp(__t), _Vp(__f));
+ }
+
+ // [simd.mask.reductions] implementation --------------------------------
+ [[__gnu__::__always_inline__]]
+ constexpr bool
+ _M_all_of() const noexcept
+ { return _M_data._M_all_of(); }
+
+ [[__gnu__::__always_inline__]]
+ constexpr bool
+ _M_any_of() const noexcept
+ { return _M_data._M_any_of(); }
+
+ [[__gnu__::__always_inline__]]
+ constexpr bool
+ _M_none_of() const noexcept
+ { return _M_data._M_none_of(); }
+
+ [[__gnu__::__always_inline__]]
+ constexpr __simd_size_type
+ _M_reduce_count() const noexcept
+ { return _M_data._M_reduce_count() / 2; }
+
+ [[__gnu__::__always_inline__]]
+ constexpr __simd_size_type
+ _M_reduce_min_index() const
+ { return _M_data._M_reduce_min_index() / 2; }
+
+ [[__gnu__::__always_inline__]]
+ constexpr __simd_size_type
+ _M_reduce_max_index() const
+ { return _M_data._M_reduce_max_index() / 2; }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr bool
+ __is_const_known(const basic_mask& __x)
+ { return __is_const_known(__x._M_data); }
+ };
+
+ template <__vectorizable _Tp, __abi_tag _Ap>
+ requires __complex_like<_Tp> && _Ap::_S_is_cx_ileav && (_Ap::_S_size >= 2) // size 1 is below
+ class basic_vec<_Tp, _Ap>
+ : public _VecBase<_Tp, _Ap>
+ {
+ template <typename, typename>
+ friend class basic_vec;
+
+ static constexpr int _S_size = _Ap::_S_size;
+
+ static constexpr int _S_full_size = __bit_ceil(unsigned(_S_size));
+
+ using _T0 = typename _Tp::value_type;
+
+ using _TSimd = __similar_vec<_T0, 2 * _S_size, _Ap>;
+
+ using _RealSimd = __similar_vec<_T0, _S_size, _Ap>;
+
+ _TSimd _M_data = {};
+
+ static constexpr bool _S_use_bitmask = _TSimd::_S_use_bitmask;
+
+ static constexpr bool _S_is_partial = sizeof(_M_data) > sizeof(_Tp) * _S_size;
+
+ [[__gnu__::__always_inline__]]
+ static constexpr basic_vec
+ _S_init(const _TSimd& __x)
+ {
+ basic_vec __r;
+ __r._M_data = __x;
+ return __r;
+ }
+
+ public:
+ using value_type = _Tp;
+
+ using mask_type = _VecBase<_Tp, _Ap>::mask_type;
+
+ // internal but public API ----------------------------------------------
+ [[__gnu__::__always_inline__]]
+ constexpr const _TSimd&
+ _M_get_ileav_data() const
+ { return _M_data; }
+
+ [[__gnu__::__always_inline__]]
+ constexpr const auto&
+ _M_get_low() const requires (_Ap::_S_nreg >= 2)
+ { return _M_data._M_get_low(); }
+
+ [[__gnu__::__always_inline__]]
+ constexpr const auto&
+ _M_get_high() const requires (_Ap::_S_nreg >= 2)
+ { return _M_data._M_get_high(); }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr bool
+ __is_const_known(const basic_vec& __x)
+ { return __is_const_known(__x._M_data); }
+
+ template <typename _Vp>
+ [[__gnu__::__always_inline__]]
+ constexpr auto
+ _M_chunk() const noexcept
+ {
+ if constexpr (_Vp::abi_type::_S_is_cx_ileav)
+ {
+ constexpr int __n = _S_size / _Vp::_S_size;
+ constexpr int __rem = _S_size % _Vp::_S_size;
+ const auto __chunked = _M_data.template _M_chunk<resize_t<_Vp::_S_size * 2,
+ _TSimd>>();
+ constexpr auto [...__is] = _IotaArray<__n>;
+ if constexpr (__rem == 0)
+ return array<_Vp, __n> {_Vp::_S_init(__chunked[__is])...};
+ else
+ {
+ using _Rest = resize_t<__rem, _Vp>;
+ return tuple(_Vp::_S_init(get<__is>(__chunked))...,
+ _Rest::_S_init(get<__n>(__chunked)));
+ }
+ }
+ else
+ return resize_t<_S_size, _Vp>(*this).template _M_chunk<_Vp>();
+ }
+
+ [[__gnu__::__always_inline__]]
+ static constexpr const basic_vec&
+ _S_concat(const basic_vec& __x0) noexcept
+ { return __x0; }
+
+ template <typename... _As>
+ requires (sizeof...(_As) > 1)
+ [[__gnu__::__always_inline__]]
+ static constexpr basic_vec
+ _S_concat(const basic_vec<value_type, _As>&... __xs) noexcept
+ { return basic_vec::_S_init(_TSimd::_S_concat(__xs._M_get_ileav_data()...)); }
+
+ template <typename _BinaryOp>
+ [[__gnu__::__always_inline__]]
+ constexpr auto
+ _M_reduce_to_register(_BinaryOp __binary_op) const
+ {
+ if constexpr (_TSimd::abi_type::_S_nreg == 1)
+ return *this;
+ else
+ {
+ auto [__lo, __hi] = _M_chunk<resize_t<__bit_ceil(unsigned(_S_size)) / 2,
+ basic_vec>>();
+ auto __a = __lo._M_reduce_to_register(__binary_op);
+ auto __b = __hi._M_reduce_to_register(__binary_op);
+ if constexpr (__a._S_size == __b._S_size)
+ return __binary_op(__a, __b);
+ else
+ {
+ using _V1 = resize_t<1, basic_vec>;
+ return __binary_op(_V1(__a._M_reduce(__binary_op)),
+ _V1(__b._M_reduce(__binary_op)));
+ }
+ }
+ }
+
+ template <typename _BinaryOp, _ArchTraits _Traits = {}>
+ [[__gnu__::__always_inline__]]
+ constexpr value_type
+ _M_reduce(_BinaryOp __binary_op) const
+ {
+ if constexpr (_S_size == 1)
+ return operator[](0);
+ else if constexpr (_Traits.template _M_eval_as_f32<_T0>())
+ return value_type(rebind_t<complex<float>, basic_vec>(*this)._M_reduce(__binary_op));
+ else if constexpr (_TSimd::abi_type::_S_nreg >= 2)
+ return _M_reduce_to_register(__binary_op)._M_reduce(__binary_op);
+ else if constexpr (__has_single_bit(unsigned(_S_size)))
+ {
+ const auto [__a, __b] = _M_chunk<resize_t<_S_size / 2, basic_vec>>();
+ return __binary_op(__a, __b)._M_reduce(__binary_op);
+ }
+ else
+ {
+ const auto [__a, __b, __c, ...__rest]
+ = _M_chunk<resize_t<__bit_floor(unsigned(_S_size)) / 2, basic_vec>>();
+ const auto __ab = __binary_op(__a, __b);
+ static_assert(sizeof...(__rest) <= 1);
+ if constexpr (__a._S_size != __c._S_size)
+ return cat(__ab, __c)._M_reduce(__binary_op);
+ else
+ return cat(__binary_op(__ab, __c), __rest...)._M_reduce(__binary_op);
+ }
+ }
+
+ template <typename _Up>
+ [[__gnu__::__always_inline__]]
+ static inline basic_vec
+ _S_partial_load(const _Up* __mem, size_t __n)
+ {
+ if constexpr (__complex_like<_Up>)
+ return _S_init(_TSimd::_S_partial_load(
+ reinterpret_cast<const typename _Up::value_type*>(__mem), __n * 2));
+ else
+ return basic_vec(_RealSimd::_S_partial_load(__mem, __n));
+ }
+
+ template <typename _Up, _ArchTraits _Traits = {}>
+ static inline basic_vec
+ _S_masked_load(const _Up* __mem, mask_type __k)
+ {
+ if constexpr (__complex_like<_Up>)
+ return _S_init(_TSimd::_S_masked_load(
+ reinterpret_cast<const typename _Up::value_type*>(__mem),
+ __k._M_data));
+ else
+ return basic_vec(_RealSimd::_S_masked_load(__mem, typename _RealSimd::mask_type(__k)));
+ }
+
+ template <typename _Up>
+ [[__gnu__::__always_inline__]]
+ inline void
+ _M_store(_Up* __mem) const
+ {
+ static_assert(__complex_like<_Up>);
+ _M_data._M_store(reinterpret_cast<typename _Up::value_type*>(__mem));
+ }
+
+ template <typename _Up>
+ [[__gnu__::__always_inline__]]
+ static inline void
+ _S_partial_store(const basic_vec& __v, _Up* __mem, size_t __n)
+ {
+ static_assert(__complex_like<_Up>);
+ _TSimd::_S_partial_store(__v._M_data, reinterpret_cast<typename _Up::value_type*>(__mem),
+ __n * 2);
+ }
+
+ template <typename _Up>
+ [[__gnu__::__always_inline__]]
+ static inline void
+ _S_masked_store(const basic_vec& __v, _Up* __mem, const mask_type& __k)
+ {
+ static_assert(__complex_like<_Up>);
+ _TSimd::_S_masked_store(__v._M_data, reinterpret_cast<typename _Up::value_type*>(__mem),
+ __k._M_data);
+ }
+
+ basic_vec() = default;
+
+ // TODO: conversion extensions
+
+ // [simd.ctor] broadcast constructor ------------------------------------
+ template <__broadcast_constructible<value_type> _Up>
+ [[__gnu__::__always_inline__]]
+ constexpr
+ basic_vec(_Up&& __x) noexcept
+ : _M_data([&](int __i) {
+ if constexpr (__complex_like<_Up>)
+ return (__i & 1) == 0 ? __x.real() : __x.imag();
+ else
+ return (__i & 1) == 0 ? __x : _T0();
+ })
+ {}
+
+ // [simd.ctor] conversion constructor -----------------------------------
+ template <__complex_like _Up, typename _UAbi>
+ requires (_S_size == _UAbi::_S_size)
+ && __explicitly_convertible_to<_Up, value_type>
+ && _UAbi::_S_is_cx_ileav
+ [[__gnu__::__always_inline__]]
+ constexpr
+ explicit(!convertible_to<_Up, value_type>)
+ basic_vec(const basic_vec<_Up, _UAbi>& __x) noexcept
+ : _M_data(__x._M_data)
+ {}
+
+ template <__complex_like _Up, typename _UAbi>
+ requires (_S_size == _UAbi::_S_size)
+ && __explicitly_convertible_to<_Up, value_type>
+ && (!_UAbi::_S_is_cx_ileav)
+ [[__gnu__::__always_inline__]]
+ constexpr
+ explicit(!convertible_to<_Up, value_type>)
+ basic_vec(const basic_vec<_Up, _UAbi>& __x) noexcept
+ : basic_vec(static_cast<_RealSimd>(__x._M_real), static_cast<_RealSimd>(__x._M_imag))
+ {}
+
+ template <typename _Up, typename _UAbi>
+ requires (!__complex_like<_Up>)
+ && (_S_size == _UAbi::_S_size)
+ && __explicitly_convertible_to<_Up, value_type>
+ [[__gnu__::__always_inline__]]
+ constexpr
+ explicit(!convertible_to<_Up, value_type>)
+ basic_vec(const basic_vec<_Up, _UAbi>& __x) noexcept
+ : basic_vec(_RealSimd(__x))
+ {}
+
+ using _VecBase<_Tp, _Ap>::_VecBase;
+
+ // [simd.ctor] generator constructor ------------------------------------
+ template <__simd_generator_invokable<value_type, _S_size> _Fp>
+ [[__gnu__::__always_inline__]]
+ constexpr explicit
+ basic_vec(_Fp&& __gen)
+ : _M_data([&] {
+ using _Arr = std::array<value_type, sizeof(_TSimd) / sizeof(value_type)>;
+ constexpr auto [...__is] = _IotaArray<_S_size>;
+ const _Arr __tmp = { static_cast<value_type>(__gen(__simd_size_c<__is>))... };
+ return __builtin_bit_cast(_TSimd, __tmp);
+ }())
+ {}
+
+ // [simd.ctor] load constructor -----------------------------------------
+ template <__complex_like _Up>
+ [[__gnu__::__always_inline__]]
+ constexpr
+ basic_vec(_LoadCtorTag, const _Up* __ptr)
+ : _M_data([&] {
+ if consteval
+ {
+ return _TSimd([&](int __i) {
+ const _Up& __cx = __ptr[__i / 2];
+ return static_cast<_T0>(__i % 2 == 0 ? __cx.real() : __cx.imag());
+ });
+ }
+ else
+ {
+ return _TSimd(_LoadCtorTag(),
+ reinterpret_cast<const typename _Up::value_type*>(__ptr));
+ }
+ }())
+ {}
+
+ template <typename _Up>
+ [[__gnu__::__always_inline__]]
+ constexpr
+ basic_vec(_LoadCtorTag, const _Up* __ptr)
+ : basic_vec(_RealSimd(_LoadCtorTag(), __ptr))
+ {}
+
+ template <ranges::contiguous_range _Rg, typename... _Flags>
+ requires __static_sized_range<_Rg, _S_size>
+ && __vectorizable<ranges::range_value_t<_Rg>>
+ && __explicitly_convertible_to<ranges::range_value_t<_Rg>, value_type>
+ [[__gnu__::__always_inline__]]
+ constexpr
+ basic_vec(_Rg&& __range, flags<_Flags...> __flags = {})
+ : basic_vec(_LoadCtorTag(), __flags.template _S_adjust_pointer<basic_vec>(
+ ranges::data(__range)))
+ {
+ static_assert(__loadstore_convertible_to<ranges::range_value_t<_Rg>, value_type,
+ _Flags...>);
+ }
+
+ // [simd.ctor] complex init ---------------------------------------------
+ // This uses _RealSimd as proposed in LWG4230
+ [[__gnu__::__always_inline__]]
+ constexpr
+ basic_vec(const _RealSimd& __re, const _RealSimd& __im = {}) noexcept
+ {
+ __cxileav::__set_real(_M_data, __re);
+ __cxileav::__set_imag(_M_data, __im);
+ }
+
+ // [simd.subscr] --------------------------------------------------------
+ [[__gnu__::__always_inline__]]
+ constexpr value_type
+ operator[](__simd_size_type __i) const
+ { return value_type(_M_data[__i * 2], _M_data[__i * 2 + 1]); }
+
+ // [simd.unary] unary operators -----------------------------------------
+ [[__gnu__::__always_inline__]]
+ constexpr basic_vec&
+ operator++() noexcept requires requires(value_type __a) { ++__a; }
+ {
+ _M_data += value_type(_T0(1));
+ return *this;
+ }
+
+ [[__gnu__::__always_inline__]]
+ constexpr basic_vec
+ operator++(int) noexcept requires requires(value_type __a) { __a++; }
+ {
+ basic_vec __r = *this;
+ _M_data += value_type(_T0(1));
+ return __r;
+ }
+
+ [[__gnu__::__always_inline__]]
+ constexpr basic_vec&
+ operator--() noexcept requires requires(value_type __a) { --__a; }
+ {
+ _M_data -= value_type(_T0(1));
+ return *this;
+ }
+
+ [[__gnu__::__always_inline__]]
+ constexpr basic_vec
+ operator--(int) noexcept requires requires(value_type __a) { __a--; }
+ {
+ basic_vec __r = *this;
+ _M_data -= value_type(_T0(1));
+ return __r;
+ }
+
+ [[__gnu__::__always_inline__]]
+ constexpr mask_type
+ operator!() const noexcept requires requires(value_type __a) { !__a; }
+ { return _S_init(!_M_data); }
+
+ [[__gnu__::__always_inline__]]
+ constexpr basic_vec
+ operator+() const noexcept requires requires(value_type __a) { +__a; }
+ { return *this; }
+
+ [[__gnu__::__always_inline__]]
+ constexpr basic_vec
+ operator-() const noexcept requires requires(value_type __a) { -__a; }
+ {
+ basic_vec __r = *this;
+ __r._M_data = -_M_data;
+ return __r;
+ }
+
+ // [simd.cassign] compound assignment -----------------------------------
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_vec&
+ operator+=(basic_vec& __x, const basic_vec& __y) noexcept
+ requires requires(value_type __a) { __a + __a; }
+ {
+ __x._M_data += __y._M_data;
+ return __x;
+ }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_vec&
+ operator-=(basic_vec& __x, const basic_vec& __y) noexcept
+ requires requires(value_type __a) { __a - __a; }
+ {
+ __x._M_data -= __y._M_data;
+ return __x;
+ }
+
+ template <_TargetTraits _Traits = {}>
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_vec&
+ operator*=(basic_vec& __x, const basic_vec& __y) noexcept
+ requires requires(value_type __a) { __a * __a; }
+ {
+ __cxileav::__mul<value_type, _Traits>(__x._M_data, __y._M_data);
+ return __x;
+ }
+
+ template <int _RemoveMe = 0>
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_vec&
+ operator/=(basic_vec& __x, const basic_vec& __y) noexcept
+ requires requires(value_type __a) { __a / __a; }
+ {
+ static_assert(false, "TODO");
+ }
+
+ // [simd.comparison] compare operators ----------------------------------
+ [[__gnu__::__always_inline__]]
+ friend constexpr mask_type
+ operator==(const basic_vec& __x, const basic_vec& __y) noexcept
+ { return mask_type::_S_and_neighbors(__x._M_data == __y._M_data); }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr mask_type
+ operator!=(const basic_vec& __x, const basic_vec& __y) noexcept
+ { return mask_type::_S_or_neighbors(__x._M_data != __y._M_data); }
+
+ // [simd.complex.access] complex-value accessors ------------------------
+ // LWG4230: returns _RealSimd instead of auto
+ [[__gnu__::__always_inline__]]
+ constexpr _RealSimd
+ real() const noexcept
+ { return permute<_S_size>(_M_data, [](int __i) { return __i * 2; }); }
+
+ [[__gnu__::__always_inline__]]
+ constexpr _RealSimd
+ imag() const noexcept
+ { return permute<_S_size>(_M_data, [](int __i) { return __i * 2 + 1; }); }
+
+ [[__gnu__::__always_inline__]]
+ constexpr void
+ real(const _RealSimd& __x) noexcept
+ { __cxileav::__set_real(_M_data, __x); }
+
+ [[__gnu__::__always_inline__]]
+ constexpr void
+ imag(const _RealSimd& __x) noexcept
+ { __cxileav::__set_imag(_M_data, __x); }
+
+ // [simd.cond] ---------------------------------------------------------
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_vec
+ __select_impl(const mask_type& __k, const basic_vec& __t, const basic_vec& __f) noexcept
+ { return _S_init(__select_impl(__k._M_data, __t._M_data, __f._M_data)); }
+
+ // [simd.complex.math] internals ---------------------------------------
+ [[__gnu__::__always_inline__]]
+ constexpr _RealSimd
+ _M_abs() const; // TODO: depends on [simd.math]
+
+ // associated functions
+ [[__gnu__::__always_inline__]]
+ constexpr _RealSimd
+ _M_norm() const
+ {
+ auto __re = real();
+ auto __im = imag();
+ return __re * __re + __im * __im;
+ }
+
+ [[__gnu__::__always_inline__]]
+ constexpr basic_vec
+ _M_conj() const
+ { return _S_init(__cxileav::__negate_imag(_M_data)); }
+ };
+
+ // complex contiguous (_CxCtgus) --------------------------------------------
+ // (and _CxIleav basic_vec with size 1)
+
+ /** @internal
+ * @brief Functions acting on / recursing into the non-complex fp vec objects, where real and
+ * imaginary parts are stored in separate vec objects.
+ */
+ namespace __cxctgus
+ {
+ /** @internal
+ * @brief Recompute all complex multiplications where @p __nan is true using @p _Cx's
+ * multiplication operator.
+ *
+ * @todo use coarser _TargetTraits and move into .so
+ */
+ template <typename _Cx, _TargetTraits, __vec_builtin _TV, typename _Kp>
+ [[__gnu__::__cold__, __gnu__::__noinline__]]
+ constexpr void
+ __redo_mul(_TV& __re, _TV& __im, const _TV __re0, const _TV __im0,
+ const _TV __re1, const _TV __im1, const _Kp __nan, int __n)
+ {
+ for (int __i = 0; __i < __n; ++__i)
+ {
+ bool __isnan;
+ if constexpr (is_integral_v<_Kp>)
+ __isnan = (__nan & (_Kp(1) << __i)) != 0;
+ else
+ __isnan = __nan[__i] != 0;
+ if (__isnan)
+ {
+ const _Cx __c0(__re0[__i], __im0[__i]);
+ const _Cx __c1(__re1[__i], __im1[__i]);
+ const _Cx __cr = __c0 * __c1;
+ __vec_set(__re, __i, __cr.real());
+ __vec_set(__im, __i, __cr.imag());
+ }
+ }
+ }
+
+ /** @internal
+ * @brief Complex multiplication of (@p __re0, @p __im0) and (@p __re1, @p __im1), returning the
+ * result in @p __re0 and @p __im0.
+ */
+ template <typename _Cx, _TargetTraits _Traits, typename _Tp, typename _Ap>
+ [[__gnu__::__always_inline__]]
+ constexpr void
+ __mul(basic_vec<_Tp, _Ap>& __re0, basic_vec<_Tp, _Ap>& __im0,
+ const basic_vec<_Tp, _Ap>& __re1, const basic_vec<_Tp, _Ap>& __im1)
+ {
+ static_assert(__complex_like<_Cx>);
+ if constexpr (_Ap::_S_nreg >= 2)
+ {
+ __mul<_Cx, _Traits>(__re0._M_get_low(), __im0._M_get_low(),
+ __re1._M_get_low(), __im1._M_get_low());
+ __mul<_Cx, _Traits>(__re0._M_get_high(), __im0._M_get_high(),
+ __re1._M_get_high(), __im1._M_get_high());
+ }
+ else if constexpr (_Ap::_S_size == 1)
+ { // use _Cx::operator*
+ const _Cx __c0(__re0._M_get(), __im0._M_get());
+ const _Cx __c1(__re1._M_get(), __im1._M_get());
+ const _Cx __cr = __c0 * __c1;
+ __re0._M_get() = __cr.real();
+ __im0._M_get() = __cr.imag();
+ }
+ else if constexpr (_Traits.template _M_eval_as_f32<_Tp>())
+ {
+ using _Vf = rebind_t<float, basic_vec<_Tp, _Ap>>;
+ using _Cf = complex<float>;
+ _Vf __re0f = __re0;
+ _Vf __im0f = __im0;
+ __mul<_Cf, _Traits, float, typename _Vf::abi_type>(__re0f, __im0f, __re1, __im1);
+ __re0 = static_cast<basic_vec<_Tp, _Ap>>(__re0f);
+ __im0 = static_cast<basic_vec<_Tp, _Ap>>(__im0f);
+ }
+ else
+ {
+ basic_vec<_Tp, _Ap> __re = __re0 * __re1 - __im0 * __im1;
+ basic_vec<_Tp, _Ap> __im = __re0 * __im1 + __im0 * __re1;
+ const auto __nan = __re._M_isunordered(__im);
+ if (__nan._M_any_of()) [[unlikely]]
+ __redo_mul<_Cx, _Traits>(__re._M_get(), __im._M_get(), __re0._M_get(), __im0._M_get(),
+ __re1._M_get(), __im1._M_get(),
+ __nan._M_concat_data(), _Ap::_S_size);
+ __re0 = __re;
+ __im0 = __im;
+ }
+ }
+ }
+
+ template <size_t _Bytes, __abi_tag _Ap>
+ requires _Ap::_S_is_cx_ctgus && (_Ap::_S_size >= 2) // size 1 is in simd_mask.h
+ class basic_mask<_Bytes, _Ap>
+ : public _MaskBase<_Bytes, _Ap>
+ {
+ using _Base = _MaskBase<_Bytes, _Ap>;
+
+ using _VecType = _Base::_VecType;
+
+ template <size_t, typename>
+ friend class basic_mask;
+
+ template <typename, typename>
+ friend class basic_vec;
+
+ static constexpr int _S_size = _Ap::_S_size;
+
+ using _DataType = __component_mask_for_ctgus<_Bytes, _Ap>;
+
+ static_assert(_DataType::abi_type::_S_nreg == _Ap::_S_nreg);
+
+ static constexpr bool _S_is_scalar = _DataType::_S_is_scalar;
+
+ static constexpr bool _S_use_bitmask = _DataType::_S_use_bitmask;
+
+ static constexpr int _S_full_size = _DataType::_S_full_size;
+
+ static constexpr bool _S_is_partial = _DataType::_S_is_partial;
+
+ static constexpr bool _S_has_bool_member = _DataType::_S_has_bool_member;
+
+ static constexpr size_t _S_padding_bytes = _DataType::_S_padding_bytes;
+
+ _DataType _M_data;
+
+ public:
+ using value_type = bool;
+
+ using abi_type = _Ap;
+
+ // internal but public API ----------------------------------------------
+ [[__gnu__::__always_inline__]]
+ static constexpr basic_mask
+ _S_init(const _DataType& __x)
+ {
+ basic_mask __r;
+ __r._M_data = __x;
+ return __r;
+ }
+
+ [[__gnu__::__always_inline__]]
+ constexpr const _DataType&
+ _M_get() const
+ { return _M_data; }
+
+ [[__gnu__::__always_inline__]]
+ constexpr auto
+ _M_concat_data() const
+ { return _M_data._M_concat_data(); }
+
+ template <_ArchTraits _Traits = {}>
+ [[__gnu__::__always_inline__]]
+ static constexpr basic_mask
+ _S_partial_mask_of_n(int __n)
+ { return _S_init(_DataType::_S_partial_mask_of_n(__n)); }
+
+ template <typename _Mp>
+ [[__gnu__::__always_inline__]]
+ constexpr auto
+ _M_chunk() const noexcept
+ {
+ if constexpr (_Mp::abi_type::_S_variant != _Ap::_S_variant)
+ {
+ using _M2 = resize_t<_S_size, _Mp>;
+ static_assert(!is_same_v<_M2, basic_mask>);
+ return static_cast<_M2>(*this).template _M_chunk<_Mp>();
+ }
+ else if constexpr (_Mp::_S_size == 1)
+ {
+ constexpr auto [...__is] = _IotaArray<_S_size>;
+ return array{_Mp(_M_data[__is])...};
+ }
+ else // _Mp is the same partial specialization
+ {
+ constexpr int __rem = _S_size % _Mp::_S_size;
+ const auto [...__xs, __last] = _M_data.template _M_chunk<typename _Mp::_DataType>();
+ if constexpr (__rem == 0)
+ return array{_Mp::_S_init(__xs)..., _Mp::_S_init(__last)};
+ else
+ return tuple(_Mp::_S_init(__xs)..., resize_t<__rem, _Mp>(__last));
+ }
+ }
+
+ [[__gnu__::__always_inline__]]
+ static constexpr const basic_mask&
+ _S_concat(const basic_mask& __x0) noexcept
+ { return __x0; }
+
+ /** @internal
+ * @brief Adjust the mask type to match _RealSimd.
+ *
+ * This is a trivial unwrap for this partial specialization of basic_mask. However, for
+ * _Abi<1, 1, _CxCtgus> _M_data is the bool object and needs to be converted.
+ */
+ [[__gnu__::__always_inline__]]
+ constexpr const _DataType&
+ _M_get_ctgus_data() const noexcept
+ { return _M_data; }
+
+ template <typename... _As>
+ requires (sizeof...(_As) > 1)
+ [[__gnu__::__always_inline__]]
+ static constexpr basic_mask
+ _S_concat(const basic_mask<_Bytes, _As>&... __xs) noexcept
+ { return basic_mask::_S_init(_DataType::_S_concat(__xs._M_get_ctgus_data()...)); }
+
+ // [simd.mask.overview] default constructor -----------------------------
+ basic_mask() = default;
+
+ // [simd.mask.overview] conversion extensions ---------------------------
+ template <__vec_builtin _TV>
+ [[__gnu__::__always_inline__]]
+ constexpr
+ basic_mask(const _TV& __x) requires convertible_to<_TV, _DataType>
+ : _M_data(__x)
+ {}
+
+ template <__vec_builtin _TV>
+ [[__gnu__::__always_inline__]]
+ constexpr
+ operator _TV() requires convertible_to<_DataType, _TV>
+ { return _M_data; }
+
+ // [simd.mask.ctor] broadcast constructor -------------------------------
+ [[__gnu__::__always_inline__]]
+ constexpr explicit
+ basic_mask(same_as<bool> auto __x) noexcept // LWG 4382.
+ : _M_data(__x)
+ {}
+
+ // [simd.mask.ctor] conversion constructor ------------------------------
+ template <size_t _UBytes, typename _UAbi>
+ requires (_S_size == _UAbi::_S_size)
+ [[__gnu__::__always_inline__]]
+ constexpr explicit(__is_mask_conversion_explicit<_Ap, _UAbi>(_Bytes, _UBytes))
+ basic_mask(const basic_mask<_UBytes, _UAbi>& __x) noexcept
+ : _M_data(__x)
+ {}
+
+ using _Base::_MaskBase;
+
+ // [simd.mask.ctor] generator constructor -------------------------------
+ template <__simd_generator_invokable<bool, _S_size> _Fp>
+ [[__gnu__::__always_inline__]]
+ constexpr explicit
+ basic_mask(_Fp&& __gen)
+ : _M_data(__gen)
+ {}
+
+ // [simd.mask.ctor] bitset constructor ----------------------------------
+ [[__gnu__::__always_inline__]]
+ constexpr
+ basic_mask(const same_as<bitset<_S_size>> auto& __b) noexcept // LWG 4382.
+ : _M_data(__b)
+ {}
+
+ // [simd.mask.ctor] uint constructor ------------------------------------
+ template <unsigned_integral _Tp>
+ requires (!same_as<_Tp, bool>) // LWG 4382.
+ [[__gnu__::__always_inline__]]
+ constexpr explicit
+ basic_mask(_Tp __val) noexcept
+ : _M_data(__val)
+ {}
+
+ // [simd.mask.subscr] ---------------------------------------------------
+ [[__gnu__::__always_inline__]]
+ constexpr value_type
+ operator[](__simd_size_type __i) const
+ { return _M_data[__i]; }
+
+ // [simd.mask.unary] ----------------------------------------------------
+ [[__gnu__::__always_inline__]]
+ constexpr basic_mask
+ operator!() const noexcept
+ { return _S_init(!_M_data); }
+
+ [[__gnu__::__always_inline__]]
+ constexpr _VecType
+ operator+() const noexcept requires destructible<_VecType>
+ { return static_cast<_VecType>(_M_data); }
+
+ constexpr _VecType
+ operator+() const noexcept = delete;
+
+ [[__gnu__::__always_inline__]]
+ constexpr _VecType
+ operator-() const noexcept requires destructible<_VecType>
+ {
+ using _Ip = typename _VecType::value_type;
+ if constexpr (_S_use_bitmask)
+ return __select_impl(*this, _Ip(-1), _Ip());
+ else
+ return -_M_data; // sign-extends
+ }
+
+ constexpr _VecType
+ operator-() const noexcept = delete;
+
+ [[__gnu__::__always_inline__]]
+ constexpr _VecType
+ operator~() const noexcept requires destructible<_VecType>
+ {
+ using _Ip = typename _VecType::value_type;
+ if constexpr (_S_use_bitmask)
+ return __select_impl(*this, _Ip(-2), _Ip(-1));
+ else
+ return ~_M_data; // sign-extends
+ }
+
+ constexpr _VecType
+ operator~() const noexcept = delete;
+
+ // [simd.mask.conv] -----------------------------------------------------
+ template <typename _Up, typename _UAbi>
+ requires (_UAbi::_S_size == _S_size)
+ [[__gnu__::__always_inline__]]
+ constexpr explicit(sizeof(_Up) != _Bytes)
+ operator basic_vec<_Up, _UAbi>() const noexcept
+ {
+ using _UV = basic_vec<_Up, _UAbi>;
+ using _Mp = typename _UV::mask_type;
+ return __select_impl(static_cast<_Mp>(_M_data), _UV(1), _UV(0));
+ }
+
+ using _Base::operator basic_vec;
+
+ // [simd.mask.namedconv] ------------------------------------------------
+ [[__gnu__::__always_inline__]]
+ constexpr bitset<_S_size>
+ to_bitset() const noexcept
+ { return _M_data.to_bitset(); }
+
+ template <int _Offset = 0, _ArchTraits _Traits = {}>
+ [[__gnu__::__always_inline__]]
+ constexpr auto
+ _M_to_uint() const
+ { return _M_data.template _M_to_uint<_Offset>(); }
+
+ [[__gnu__::__always_inline__]]
+ constexpr unsigned long long
+ to_ullong() const
+ { return _M_data.to_ullong(); }
+
+ // [simd.mask.binary] ---------------------------------------------------
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ operator&&(const basic_mask& __x, const basic_mask& __y) noexcept
+ { return _S_init(__x._M_data & __y._M_data); }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ operator||(const basic_mask& __x, const basic_mask& __y) noexcept
+ { return _S_init(__x._M_data | __y._M_data); }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ operator&(const basic_mask& __x, const basic_mask& __y) noexcept
+ { return _S_init(__x._M_data & __y._M_data); }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ operator|(const basic_mask& __x, const basic_mask& __y) noexcept
+ { return _S_init(__x._M_data | __y._M_data); }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ operator^(const basic_mask& __x, const basic_mask& __y) noexcept
+ { return _S_init(__x._M_data ^ __y._M_data); }
+
+ // [simd.mask.cassign] --------------------------------------------------
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask&
+ operator&=(basic_mask& __x, const basic_mask& __y) noexcept
+ {
+ __x._M_data &= __y._M_data;
+ return __x;
+ }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask&
+ operator|=(basic_mask& __x, const basic_mask& __y) noexcept
+ {
+ __x._M_data |= __y._M_data;
+ return __x;
+ }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask&
+ operator^=(basic_mask& __x, const basic_mask& __y) noexcept
+ {
+ __x._M_data ^= __y._M_data;
+ return __x;
+ }
+
+ // [simd.mask.comparison] -----------------------------------------------
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ operator==(const basic_mask& __x, const basic_mask& __y) noexcept
+ { return _S_init(__x._M_data == __y._M_data); }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ operator!=(const basic_mask& __x, const basic_mask& __y) noexcept
+ { return _S_init(__x._M_data != __y._M_data); }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ operator>=(const basic_mask& __x, const basic_mask& __y) noexcept
+ { return _S_init(__x._M_data >= __y._M_data); }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ operator<=(const basic_mask& __x, const basic_mask& __y) noexcept
+ { return _S_init(__x._M_data <= __y._M_data); }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ operator>(const basic_mask& __x, const basic_mask& __y) noexcept
+ { return _S_init(__x._M_data > __y._M_data); }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ operator<(const basic_mask& __x, const basic_mask& __y) noexcept
+ { return _S_init(__x._M_data < __y._M_data); }
+
+ // [simd.mask.cond] -----------------------------------------------------
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ __select_impl(const basic_mask& __k, const basic_mask& __t, const basic_mask& __f) noexcept
+ { return __select_impl(__k._M_data, __t._M_data, __f._M_data); }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_mask
+ __select_impl(const basic_mask& __k, same_as<bool> auto __t, same_as<bool> auto __f) noexcept
+ { return _S_init(__select_impl(__k._M_data, __t, __f)); }
+
+ template <__vectorizable _T0, same_as<_T0> _T1>
+ requires (sizeof(_T0) == _Bytes)
+ [[__gnu__::__always_inline__]]
+ friend constexpr vec<_T0, _S_size>
+ __select_impl(const basic_mask& __k, const _T0& __t, const _T1& __f) noexcept
+ {
+ using _Vp = vec<_T0, _S_size>;
+ return __select_impl(static_cast<typename _Vp::mask_type>(__k), _Vp(__t), _Vp(__f));
+ }
+
+ // [simd.mask.reductions] implementation --------------------------------
+ [[__gnu__::__always_inline__]]
+ constexpr bool
+ _M_all_of() const noexcept
+ { return _M_data._M_all_of(); }
+
+ [[__gnu__::__always_inline__]]
+ constexpr bool
+ _M_any_of() const noexcept
+ { return _M_data._M_any_of(); }
+
+ [[__gnu__::__always_inline__]]
+ constexpr bool
+ _M_none_of() const noexcept
+ { return _M_data._M_none_of(); }
+
+ [[__gnu__::__always_inline__]]
+ constexpr __simd_size_type
+ _M_reduce_count() const noexcept
+ { return _M_data._M_reduce_count(); }
+
+ [[__gnu__::__always_inline__]]
+ constexpr __simd_size_type
+ _M_reduce_min_index() const
+ { return _M_data._M_reduce_min_index(); }
+
+ [[__gnu__::__always_inline__]]
+ constexpr __simd_size_type
+ _M_reduce_max_index() const
+ { return _M_data._M_reduce_max_index(); }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr bool
+ __is_const_known(const basic_mask& __x)
+ { return __is_const_known(__x._M_data); }
+ };
+
+ template <__vectorizable _Tp, __abi_tag _Ap>
+ requires __complex_like<_Tp> && (_Ap::_S_is_cx_ctgus || _Ap::_S_size == 1)
+ class basic_vec<_Tp, _Ap>
+ : public _VecBase<_Tp, _Ap>
+ {
+ template <typename, typename>
+ friend class basic_vec;
+
+ static constexpr int _S_size = _Ap::_S_size;
+
+ static constexpr int _S_full_size = __bit_ceil(unsigned(_S_size));
+
+ using _T0 = typename _Tp::value_type;
+
+ using _RealSimd = __similar_vec<_T0, _S_size, _Ap>;
+
+ _RealSimd _M_real = {};
+
+ _RealSimd _M_imag = {};
+
+ static constexpr bool _S_is_scalar = _RealSimd::_S_is_scalar;
+
+ static constexpr bool _S_use_bitmask = _RealSimd::_S_use_bitmask;
+
+ static constexpr bool _S_is_partial = _RealSimd::_S_is_partial;
+
+ public:
+ using value_type = _Tp;
+
+ using mask_type = _VecBase<_Tp, _Ap>::mask_type;
+
+ // internal but public API ----------------------------------------------
+ [[__gnu__::__always_inline__]]
+ constexpr _RealSimd&
+ _M_get_real() noexcept
+ { return _M_real; }
+
+ [[__gnu__::__always_inline__]]
+ constexpr const _RealSimd&
+ _M_get_real() const noexcept
+ { return _M_real; }
+
+ [[__gnu__::__always_inline__]]
+ constexpr _RealSimd&
+ _M_get_imag() noexcept
+ { return _M_imag; }
+
+ [[__gnu__::__always_inline__]]
+ constexpr const _RealSimd&
+ _M_get_imag() const noexcept
+ { return _M_imag; }
+
+ [[__gnu__::__always_inline__]]
+ constexpr auto
+ _M_get_low() const requires (_Ap::_S_nreg >= 2)
+ {
+ return resize_t<_M_real._N0, basic_vec>(
+ _M_real._M_get_low(), _M_imag._M_get_low());
+ }
+
+ [[__gnu__::__always_inline__]]
+ constexpr auto
+ _M_get_high() const requires (_Ap::_S_nreg >= 2)
+ {
+ return resize_t<_M_real._N1, basic_vec>(
+ _M_real._M_get_high(), _M_imag._M_get_high());
+ }
+
+ [[__gnu__::__always_inline__]]
+ constexpr auto
+ _M_concat_data(bool /*do_sanitize*/ = false) const
+ requires (_S_size == 1) // only for _CxCtgus of size 1
+ {
+ return __vec_builtin_type<__canonical_vec_type_t<_T0>, 2>{
+ _M_real._M_data, _M_imag._M_data
+ };
+ }
+
+ [[__gnu__::__always_inline__]]
+ constexpr auto
+ _M_get_ileav_data() const
+ requires (_S_size == 1 && _Ap::_S_is_cx_ileav)
+ { return __builtin_bit_cast(__similar_vec<_T0, 2, _Ap>, *this); }
+
+ [[__gnu__::__always_inline__]]
+ static constexpr basic_vec
+ _S_init(const __similar_vec<_T0, 2, _Ap>& __x)
+ requires (_S_size == 1 && _Ap::_S_is_cx_ileav)
+ { return __builtin_bit_cast(basic_vec, __x); }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr bool
+ __is_const_known(const basic_vec& __x)
+ { return __is_const_known(__x._M_real) && __is_const_known(__x._M_imag); }
+
+ template <typename _Vp>
+ [[__gnu__::__always_inline__]]
+ constexpr auto
+ _M_chunk() const noexcept
+ {
+ constexpr int __n = _S_size / _Vp::_S_size;
+ constexpr int __rem = _S_size % _Vp::_S_size;
+ const auto [...__rs, __rN] = _M_real.template _M_chunk<typename _Vp::_RealSimd>();
+ const auto [...__is, __iN] = _M_imag.template _M_chunk<typename _Vp::_RealSimd>();
+ if constexpr (__rem == 0)
+ return array<_Vp, __n>{_Vp(__rs, __is)..., _Vp(__rN, __iN)};
+ else
+ return tuple(_Vp(__rs, __is)..., resize_t<__rem, _Vp>(__rN, __iN));
+ }
+
+ template <typename _A0>
+ [[__gnu__::__always_inline__]]
+ static constexpr basic_vec
+ _S_concat(const basic_vec<value_type, _A0>& __x0) noexcept
+ { return static_cast<basic_vec>(__x0); }
+
+ template <typename... _As>
+ requires (sizeof...(_As) > 1)
+ [[__gnu__::__always_inline__]]
+ static constexpr basic_vec
+ _S_concat(const basic_vec<value_type, _As>&... __xs) noexcept
+ { return {_RealSimd::_S_concat(__xs._M_real...), _RealSimd::_S_concat(__xs._M_imag...) }; }
+
+ template <typename _BinaryOp>
+ [[__gnu__::__always_inline__]]
+ constexpr auto
+ _M_reduce_to_register(_BinaryOp __binary_op) const
+ {
+ if constexpr (_RealSimd::abi_type::_S_nreg == 1)
+ return *this;
+ else
+ {
+ auto [__lo, __hi] = _M_chunk<resize_t<_RealSimd::_N0, basic_vec>>();
+ auto __a = __lo._M_reduce_to_register(__binary_op);
+ auto __b = __hi._M_reduce_to_register(__binary_op);
+ if constexpr (__a._S_size == __b._S_size)
+ return __binary_op(__a, __b);
+ else
+ {
+ using _V1 = resize_t<1, basic_vec>;
+ return __binary_op(_V1(__a._M_reduce(__binary_op)),
+ _V1(__b._M_reduce(__binary_op)));
+ }
+ }
+ }
+
+ template <typename _BinaryOp, _ArchTraits _Traits = {}>
+ [[__gnu__::__always_inline__]]
+ constexpr value_type
+ _M_reduce(_BinaryOp __binary_op) const
+ {
+ if constexpr (_S_size == 1)
+ return operator[](0);
+ else if constexpr (_Traits.template _M_eval_as_f32<_T0>())
+ return value_type(rebind_t<complex<float>, basic_vec>(*this)._M_reduce(__binary_op));
+ else if constexpr (_RealSimd::abi_type::_S_nreg >= 2)
+ return _M_reduce_to_register(__binary_op)._M_reduce(__binary_op);
+ else if constexpr (__has_single_bit(unsigned(_S_size)))
+ {
+ const auto [__a, __b] = _M_chunk<resize_t<_S_size / 2, basic_vec>>();
+ return __binary_op(__a, __b)._M_reduce(__binary_op);
+ }
+ else
+ {
+ const auto [__a, __b, __c, ...__rest]
+ = _M_chunk<resize_t<__bit_floor(unsigned(_S_size)) / 2, basic_vec>>();
+ const auto __ab = __binary_op(__a, __b);
+ static_assert(sizeof...(__rest) <= 1);
+ if constexpr (__a._S_size != __c._S_size)
+ return cat(__ab, __c)._M_reduce(__binary_op);
+ else
+ return cat(__binary_op(__ab, __c), __rest...)._M_reduce(__binary_op);
+ }
+ }
+
+ /** @internal
+ * Implementation of @ref partial_load.
+ *
+ * If @p __mem stores complex numbers, this needs to load @c abcdefgh from memory into two
+ * basic_vec: @c aceg and @c bdfh.
+ *
+ * @param __mem A pointer to an array of @p __n values. Can be complex or real.
+ * @param __n Read no more than @p __n values from memory.
+ *
+ * @todo Optimize with deinterleaving loads or loads + deinterleaving fixup.
+ */
+ template <typename _Up>
+ [[__gnu__::__always_inline__]]
+ static inline basic_vec
+ _S_partial_load(const _Up* __mem, size_t __n)
+ {
+ if constexpr (__complex_like<_Up>)
+ return basic_vec(
+ _RealSimd([&](size_t __i) -> _T0 {
+ return __i < __n ? __mem[__i].real() : _T0();
+ }),
+ _RealSimd([&](size_t __i) -> _T0 {
+ return __i < __n ? __mem[__i].imag() : _T0();
+ }));
+ else
+ return basic_vec(_RealSimd::_S_partial_load(__mem, __n));
+ }
+
+ /** @internal
+ *
+ * @todo Optimize with deinterleaving loads or loads + deinterleaving fixup.
+ */
+ template <typename _Up, _ArchTraits _Traits = {}>
+ static inline basic_vec
+ _S_masked_load(const _Up* __mem, mask_type __k)
+ {
+ if constexpr (__complex_like<_Up>)
+ { // TODO: optimize
+ return basic_vec(_RealSimd([&](int __i) {
+ return __k[__i] ? __mem[__i].real() : _T0();
+ }),
+ _RealSimd([&](int __i) {
+ return __k[__i] ? __mem[__i].imag() : _T0();
+ }));
+ }
+ else
+ return basic_vec(_RealSimd::_S_masked_load(__mem, typename _RealSimd::mask_type(__k)));
+ }
+
+ template <typename _Up>
+ [[__gnu__::__always_inline__]]
+ inline void
+ _M_store(_Up* __mem) const
+ {
+ static_assert(__complex_like<_Up>);
+ for (int __i = 0; __i < _S_size; ++__i)
+ {
+ __mem[__i].real(_M_real[__i]);
+ __mem[__i].imag(_M_imag[__i]);
+ }
+ }
+
+ template <typename _Up>
+ [[__gnu__::__always_inline__]]
+ static inline void
+ _S_partial_store(const basic_vec& __v, _Up* __mem, size_t __n)
+ {
+ static_assert(__complex_like<_Up>);
+ for (size_t __i = 0; __i < std::min(__n, size_t(_S_size)); ++__i)
+ {
+ __mem[__i].real(__v._M_real[__i]);
+ __mem[__i].imag(__v._M_imag[__i]);
+ }
+ }
+
+ template <typename _Up>
+ [[__gnu__::__always_inline__]]
+ static inline void
+ _S_masked_store(const basic_vec& __v, _Up* __mem, const mask_type& __k)
+ {
+ // TODO: optimize
+ static_assert(__complex_like<_Up>);
+ for (int __i = 0; __i < _S_size; ++__i)
+ {
+ if (__k[__i])
+ __mem[__i] = __v[__i];
+ }
+ }
+
+ basic_vec() = default;
+
+ // TODO: conversion extensions
+
+ // [simd.ctor] broadcast constructor ------------------------------------
+ template <__broadcast_constructible<value_type> _Up>
+ requires __complex_like<_Up>
+ [[__gnu__::__always_inline__]]
+ constexpr
+ basic_vec(_Up&& __x) noexcept
+ : _M_real(__x.real()), _M_imag(__x.imag())
+ {}
+
+ template <__broadcast_constructible<value_type> _Up>
+ [[__gnu__::__always_inline__]]
+ constexpr
+ basic_vec(_Up&& __x) noexcept
+ : _M_real(__x), _M_imag()
+ {}
+
+ // [simd.ctor] conversion constructor -----------------------------------
+ template <__complex_like _Up, typename _UAbi>
+ requires (_S_size == _UAbi::_S_size)
+ && __explicitly_convertible_to<_Up, value_type>
+ && _UAbi::_S_is_cx_ileav
+ [[__gnu__::__always_inline__]]
+ constexpr
+ explicit(!convertible_to<_Up, value_type>)
+ basic_vec(const basic_vec<_Up, _UAbi>& __x) noexcept
+ : _M_real(__x.real()), _M_imag(__x.imag())
+ {}
+
+ template <__complex_like _Up, typename _UAbi>
+ requires (_S_size == _UAbi::_S_size)
+ && __explicitly_convertible_to<_Up, value_type>
+ && (!_UAbi::_S_is_cx_ileav)
+ [[__gnu__::__always_inline__]]
+ constexpr
+ explicit(!convertible_to<_Up, value_type>)
+ basic_vec(const basic_vec<_Up, _UAbi>& __x) noexcept
+ : _M_real(__x._M_real), _M_imag(__x._M_imag) // using real() instead of _M_real is possible
+ // but potentially leads to memcpy because of oversized _M_real (likewise for imag)
+ {}
+
+ template <typename _Up, typename _UAbi> // _Up is not complex!
+ requires (!__complex_like<_Up>)
+ && (_S_size == _UAbi::_S_size)
+ && __explicitly_convertible_to<_Up, value_type>
+ [[__gnu__::__always_inline__]]
+ constexpr
+ explicit(!convertible_to<_Up, value_type>)
+ basic_vec(const basic_vec<_Up, _UAbi>& __x) noexcept
+ : _M_real(__x), _M_imag()
+ {}
+
+ using _VecBase<_Tp, _Ap>::_VecBase;
+
+ // [simd.ctor] generator constructor ------------------------------------
+ template <__simd_generator_invokable<value_type, _S_size> _Fp>
+ [[__gnu__::__always_inline__]]
+ constexpr explicit
+ basic_vec(_Fp&& __gen)
+ : _M_real(),
+ _M_imag([&] {
+ _T0 __re[sizeof(_RealSimd) / sizeof(_T0)] = {};
+ _T0 __im[sizeof(_RealSimd) / sizeof(_T0)] = {};
+ template for (constexpr int __i : _IotaArray<_S_size>)
+ {
+ const value_type __c = static_cast<value_type>(__gen(__simd_size_c<__i>));
+ __re[__i] = __c.real();
+ __im[__i] = __c.imag();
+ }
+ _M_real = __builtin_bit_cast(_RealSimd, __re);
+ return __builtin_bit_cast(_RealSimd, __im);
+ }())
+ {}
+
+ // [simd.ctor] load constructor -----------------------------------------
+ template <__complex_like _Up>
+ [[__gnu__::__always_inline__]]
+ constexpr
+ basic_vec(_LoadCtorTag, const _Up* __ptr)
+ : _M_real([&](int __i) -> _T0 { return __ptr[__i].real(); }),
+ _M_imag([&](int __i) -> _T0 { return __ptr[__i].imag(); })
+ {}
+
+ template <typename _Up>
+ [[__gnu__::__always_inline__]]
+ constexpr
+ basic_vec(_LoadCtorTag, const _Up* __ptr)
+ : _M_real(_LoadCtorTag(), __ptr), _M_imag()
+ {}
+
+ template <ranges::contiguous_range _Rg, typename... _Flags>
+ requires __static_sized_range<_Rg, _S_size>
+ && __vectorizable<ranges::range_value_t<_Rg>>
+ && __explicitly_convertible_to<ranges::range_value_t<_Rg>, value_type>
+ [[__gnu__::__always_inline__]]
+ constexpr
+ basic_vec(_Rg&& __range, flags<_Flags...> __flags = {})
+ : basic_vec(_LoadCtorTag(), __flags.template _S_adjust_pointer<basic_vec>(
+ ranges::data(__range)))
+ {
+ static_assert(__loadstore_convertible_to<ranges::range_value_t<_Rg>, value_type,
+ _Flags...>);
+ }
+
+ // [simd.ctor] complex init ---------------------------------------------
+ // This uses _RealSimd as proposed in LWG4230
+ [[__gnu__::__always_inline__]]
+ constexpr
+ basic_vec(const _RealSimd& __re, const _RealSimd& __im = {}) noexcept
+ : _M_real(__re), _M_imag(__im)
+ {}
+
+ // [simd.subscr] --------------------------------------------------------
+ [[__gnu__::__always_inline__]]
+ constexpr value_type
+ operator[](__simd_size_type __i) const
+ { return value_type(_M_real[__i], _M_imag[__i]); }
+
+ // [simd.unary] unary operators -----------------------------------------
+ [[__gnu__::__always_inline__]]
+ constexpr basic_vec&
+ operator++() noexcept requires requires(value_type __a) { ++__a; }
+ {
+ ++_M_real;
+ return *this;
+ }
+
+ [[__gnu__::__always_inline__]]
+ constexpr basic_vec
+ operator++(int) noexcept requires requires(value_type __a) { __a++; }
+ {
+ basic_vec __r = *this;
+ ++_M_real;
+ return __r;
+ }
+
+ [[__gnu__::__always_inline__]]
+ constexpr basic_vec&
+ operator--() noexcept requires requires(value_type __a) { --__a; }
+ {
+ --_M_real;
+ return *this;
+ }
+
+ [[__gnu__::__always_inline__]]
+ constexpr basic_vec
+ operator--(int) noexcept requires requires(value_type __a) { __a--; }
+ {
+ basic_vec __r = *this;
+ --_M_real;
+ return __r;
+ }
+
+ [[__gnu__::__always_inline__]]
+ constexpr mask_type
+ operator!() const noexcept requires requires(value_type __a) { !__a; }
+ { return !_M_real && !_M_imag; }
+
+ [[__gnu__::__always_inline__]]
+ constexpr basic_vec
+ operator+() const noexcept requires requires(value_type __a) { +__a; }
+ { return *this; }
+
+ [[__gnu__::__always_inline__]]
+ constexpr basic_vec
+ operator-() const noexcept requires requires(value_type __a) { -__a; }
+ { return basic_vec(-_M_real, -_M_imag); }
+
+ // [simd.cassign] compound assignment -----------------------------------
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_vec&
+ operator+=(basic_vec& __x, const basic_vec& __y) noexcept
+ requires requires(value_type __a) { __a + __a; }
+ {
+ __x._M_real += __y._M_real;
+ __x._M_imag += __y._M_imag;
+ return __x;
+ }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_vec&
+ operator-=(basic_vec& __x, const basic_vec& __y) noexcept
+ requires requires(value_type __a) { __a - __a; }
+ {
+ __x._M_real -= __y._M_real;
+ __x._M_imag -= __y._M_imag;
+ return __x;
+ }
+
+
+ template <_TargetTraits _Traits = {}>
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_vec&
+ operator*=(basic_vec& __x, const basic_vec& __y) noexcept
+ requires requires(value_type __a) { __a * __a; }
+ {
+ __cxctgus::__mul<value_type, _Traits>(__x._M_real, __x._M_imag, __y._M_real, __y._M_imag);
+ return __x;
+ }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_vec&
+ operator/=(basic_vec& __x, const basic_vec& __y) noexcept
+ requires requires(value_type __a) { __a / __a; }
+ {
+ const _RealSimd __r = __x._M_real * __y._M_real + __x._M_imag * __y._M_imag;
+ const _RealSimd __n = __y._M_norm();
+ __x._M_imag = (__x._M_imag * __y._M_real - __x._M_real * __y._M_imag) / __n;
+ __x._M_real = __r / __n;
+ return __x;
+ }
+
+ // [simd.comparison] compare operators ----------------------------------
+ [[__gnu__::__always_inline__]]
+ friend constexpr mask_type
+ operator==(const basic_vec& __x, const basic_vec& __y) noexcept
+ { return mask_type(__x._M_real == __y._M_real && __x._M_imag == __y._M_imag); }
+
+ [[__gnu__::__always_inline__]]
+ friend constexpr mask_type
+ operator!=(const basic_vec& __x, const basic_vec& __y) noexcept
+ { return mask_type(__x._M_real != __y._M_real || __x._M_imag != __y._M_imag); }
+
+ // [simd.complex.access] complex-value accessors ------------------------
+ // LWG4230: returns _RealSimd instead of auto
+ [[__gnu__::__always_inline__]]
+ constexpr _RealSimd
+ real() const noexcept
+ { return _M_real; }
+
+ [[__gnu__::__always_inline__]]
+ constexpr _RealSimd
+ imag() const noexcept
+ { return _M_imag; }
+
+ [[__gnu__::__always_inline__]]
+ constexpr void
+ real(const _RealSimd& __x) noexcept
+ { _M_real = __x; }
+
+ [[__gnu__::__always_inline__]]
+ constexpr void
+ imag(const _RealSimd& __x) noexcept
+ { _M_imag = __x; }
+
+ // [simd.cond] ---------------------------------------------------------
+ [[__gnu__::__always_inline__]]
+ friend constexpr basic_vec
+ __select_impl(const mask_type& __k, const basic_vec& __t, const basic_vec& __f) noexcept
+ {
+ typename basic_vec::_RealSimd::mask_type __kk(__k);
+ return basic_vec(__select_impl(__kk, __t._M_real, __f._M_real),
+ __select_impl(__kk, __t._M_imag, __f._M_imag));
+ }
+
+ // [simd.complex.math] internals ---------------------------------------
+ [[__gnu__::__always_inline__]]
+ constexpr _RealSimd
+ _M_abs() const; // TODO: depends on [simd.math]
+
+ // associated functions
+ [[__gnu__::__always_inline__]]
+ constexpr _RealSimd
+ _M_norm() const
+ { return _M_real * _M_real + _M_imag * _M_imag; }
+
+ [[__gnu__::__always_inline__]]
+ constexpr basic_vec
+ _M_conj() const
+ { return basic_vec(_M_real, -_M_imag); }
+ };
+
+ // [P3319R5] (extension) ----------------------------------------------------
+ template <__complex_like _Tp, typename _Ap>
+ inline constexpr basic_vec<_Tp, _Ap>
+ __iota<basic_vec<_Tp, _Ap>> = basic_vec<_Tp, _Ap>([](typename _Tp::value_type __i)
+ -> typename _Tp::value_type {
+ static_assert(_Ap::_S_size - 1 <= numeric_limits<typename _Tp::value_type>::max(),
+ "iota object would overflow");
+ return __i;
+ });
+} // namespace simd
+_GLIBCXX_END_NAMESPACE_VERSION
+} // namespace std
+
+#pragma GCC diagnostic pop
+#endif // C++26
+#endif // _GLIBCXX_SIMD_COMPLEX_H
@@ -75,6 +75,8 @@ namespace std _GLIBCXX_VISIBILITY(default)
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
+ template<typename> class complex;
+
namespace simd
{
template <typename _Tp>
@@ -82,6 +84,15 @@ namespace simd
__iota = [] { static_assert(false, "invalid __iota specialization"); }();
// [simd.general] vectorizable types
+ template <typename _Tp>
+ concept __complex_like_impl
+ = same_as<_Tp, complex<typename _Tp::value_type>>;
+
+ /** @internal
+ * Satisfied if @p _Tp implements the std::complex interface.
+ */
+ template <typename _Tp>
+ concept __complex_like = __complex_like_impl<remove_cvref_t<_Tp>>;
template <typename _Tp>
concept __vectorizable_scalar
@@ -94,7 +105,10 @@ namespace simd
// [simd.general] p2
template <typename _Tp>
- concept __vectorizable = __vectorizable_scalar<_Tp>;
+ concept __vectorizable
+ = __vectorizable_scalar<_Tp>
+ || (__complex_like_impl<_Tp> && __vectorizable_scalar<typename _Tp::value_type>
+ && floating_point<typename _Tp::value_type>);
/** @internal
* Describes variants of _Abi.
@@ -103,6 +117,11 @@ namespace simd
{
_BitMask = 0x01, // AVX512 bit-masks
_MaskVariants = 0x0f, // vector masks if bits [0:3] are 0
+ _CxIleav = 0x10, // store complex components interleaved (ririri...)
+ // mask elements are stored for both (001122...)
+ _CxCtgus = 0x20, // ... or store complex components contiguously (rrrr iiii)
+ // mask elements are store for one component (0123)
+ _CxVariants = _CxIleav | _CxCtgus,
};
/** @internal
@@ -141,6 +160,16 @@ namespace simd
return _InvalidInteger();
}());
+ template <size_t _Bytes>
+ using __float_from = decltype([] consteval {
+ if constexpr (sizeof(double) == _Bytes)
+ return double();
+ else if constexpr (sizeof(float) == _Bytes)
+ return float();
+ else if constexpr (sizeof(_Float16) == _Bytes)
+ return _Float16();
+ }());
+
/** @internal
* Alias for an unsigned integer type T such that sizeof(T) equals _Bytes.
*/
@@ -271,12 +300,17 @@ namespace simd
* The number of registers needed to represent one basic_vec for the element type that was
* used on ABI deduction.
*
+ * For _CxCtgus the value applies twice, once per reals and once per imags.
+ *
* Examples:
* - '_Abi< 8, 2>' for 'int' is 2x 128-bit
* - '_Abi< 9, 3>' for 'int' is 2x 128-bit and 1x 32-bit
* - '_Abi<10, 3>' for 'int' is 2x 128-bit and 1x 64-bit
* - '_Abi<10, 1>' for 'int' is 1x 512-bit
* - '_Abi<10, 2>' for 'int' is 1x 256-bit and 1x 64-bit
+ * - '_Abi< 8, 2, _CxIleav>' for 'complex<float>' is 2x 256-bit
+ * - '_Abi< 9, 2, _CxIleav>' for 'complex<float>' is 1x 512-bit and 1x 64-bit
+ * - '_Abi< 8, 1, _CxCtgus>' for 'complex<float>' is 2x 256-bit
*/
static constexpr int _S_nreg = _Nreg;
@@ -285,6 +319,16 @@ namespace simd
static constexpr _AbiVariant _S_variant = static_cast<_AbiVariant>(_Var);
+ static constexpr bool _S_is_cx_ileav
+ = __filter_abi_variant(_S_variant, _AbiVariant::_CxIleav) == _AbiVariant::_CxIleav;
+
+ static constexpr bool _S_is_cx_ctgus
+ = __filter_abi_variant(_S_variant, _AbiVariant::_CxCtgus) == _AbiVariant::_CxCtgus;
+
+ static_assert(!(_S_is_cx_ileav && _S_is_cx_ctgus)); // can't be both
+
+ static_assert(_S_size >= _S_nreg || (_S_is_cx_ileav && _S_size * 2 >= _S_nreg));
+
static constexpr bool _S_is_bitmask
= __filter_abi_variant(_S_variant, _AbiVariant::_BitMask) == _AbiVariant::_BitMask;
@@ -725,6 +769,10 @@ namespace simd
consteval bool
_M_eval_as_f32() const
{ return is_same_v<_Tp, _Float16> && !_M_have_avx512fp16(); }
+
+ consteval bool
+ _M_have_addsub() const
+ { return _M_have_sse3(); }
};
template <typename _Tp, _ArchTraits _Traits = {}>
@@ -734,6 +782,12 @@ namespace simd
constexpr int __adj_sizeof = sizeof(_Tp) * (1 + is_same_v<_Tp, _Float16>);
if constexpr (!__vectorizable<_Tp>)
return _InvalidAbi();
+ else if constexpr (__complex_like<_Tp>)
+ {
+ constexpr auto __underlying = std::simd::__native_abi<typename _Tp::value_type>();
+ constexpr int __cx_size = __underlying._S_size / (__underlying._S_size == 1 ? 1 : 2);
+ return _Abi_t<__cx_size, 1, __underlying._S_variant, _AbiVariant::_CxIleav>();
+ }
else if constexpr (_Traits._M_have_avx512fp16())
return _Abi_t<64 / sizeof(_Tp), 1, _AbiVariant::_BitMask>();
else if constexpr (_Traits._M_have_avx512f())
@@ -772,6 +826,8 @@ namespace simd
{
if constexpr (!__vectorizable<_Tp>)
return _InvalidAbi();
+ else if constexpr (__complex_like<_Tp>)
+ return _Abi_t<1, 1, _AbiVariant::_CxIleav>();
else
return _Abi_t<1, 1>();
}
@@ -839,9 +895,40 @@ namespace simd
// an explicit __scalar_abi_tag before some resize_t)
if constexpr (__scalar_abi_tag<_Native> || (__scalar_abi_tag<_A0> && _A0::_S_size >= 2))
{
+ constexpr bool __remove_cx
+ = __filter_abi_variant(_A0::_S_variant, _AbiVariant::_CxVariants) != _AbiVariant()
+ && !__complex_like<_Tp>;
+ constexpr bool __add_cx
+ = __filter_abi_variant(_A0::_S_variant, _AbiVariant::_CxVariants) == _AbiVariant()
+ && __complex_like<_Tp>;
+
+ if constexpr (__remove_cx)
+ return _Abi_t<_Np, _Np,
+ __filter_abi_variant(_A0::_S_variant, _AbiVariant::_MaskVariants)>();
+ else if constexpr (__add_cx)
+ return _Abi_t<_Np, _Np, _A0::_S_variant,
+ __filter_abi_variant(_Native::_S_variant, _AbiVariant::_CxVariants)>();
+ else
return _A0::template _S_resize<_Np, _Np>();
}
+ else if constexpr (__complex_like<_Tp> && _A0::_S_is_cx_ctgus && _Native::_S_is_cx_ileav)
+ // we need half the number of registers since the number applies twice, to reals and
+ // imaginaries.
+ return _A0::template _S_resize<_Np, __div_ceil(__nreg, 2)>();
+
+ else if constexpr (__complex_like<_Tp> && _A0::_S_is_cx_ileav && _Native::_S_is_cx_ctgus)
+ return _A0::template _S_resize<_Np, __nreg * 2>();
+
+ else if constexpr (__complex_like<_Tp> && (_A0::_S_is_cx_ctgus || _A0::_S_is_cx_ileav))
+ return _A0::template _S_resize<_Np, __nreg>();
+
+ else if constexpr (__complex_like<_Tp>)
+ // Bit vs. Vec Mask determined by _A0, _CxVariant determined by _Native
+ return _Abi_t<_Native::_S_size, 1, _A0::_S_variant,
+ __filter_abi_variant(_Native::_S_variant, _AbiVariant::_CxVariants)>
+ ::template _S_resize<_Np, __nreg>();
+
else
return _Abi_t<_Native::_S_size, 1, __filter_abi_variant(_A0::_S_variant,
_AbiVariant::_MaskVariants)
@@ -863,9 +950,21 @@ namespace simd
consteval auto
__abi_rebind()
{
+ constexpr bool __from_cx = _A0::_S_is_cx_ctgus || _A0::_S_is_cx_ileav;
+
if constexpr (_Bytes == 0 || _Np <= 0)
return _InvalidAbi();
+ // If the source ABI is complex, _Bytes == sizeof(complex<float>) or
+ // sizeof(complex<float16_t>), and _IsOnlyResize is true, then it's a mask<complex<float>,
+ // _Np>
+ else if constexpr (__from_cx && _IsOnlyResize && _Bytes == 2 * sizeof(double))
+ return __abi_rebind<complex<double>, _Np, _A0>();
+ else if constexpr (__from_cx && _IsOnlyResize && _Bytes == 2 * sizeof(float))
+ return __abi_rebind<complex<float>, _Np, _A0>();
+ else if constexpr (__from_cx && _IsOnlyResize && _Bytes == 2 * sizeof(_Float16))
+ return __abi_rebind<complex<_Float16>, _Np, _A0>();
+
#if _GLIBCXX_X86
// AVX w/o AVX2:
// e.g. resize_t<8, mask<float, Whatever>> needs to be _Abi<8, 1> not _Abi<8, 2>
@@ -925,6 +1024,18 @@ namespace simd
else if constexpr (_From::_S_nreg != _To::_S_nreg)
return _From::_S_nreg < _To::_S_nreg;
+ // differ only on _Cx flags
+ // interleaved complex is worse
+ else if constexpr (_To::_S_is_cx_ileav)
+ return true;
+ else if constexpr (_From::_S_is_cx_ileav)
+ return false;
+
+ // prefer non-_Cx over _CxCtgus
+ else if constexpr (_To::_S_is_cx_ctgus)
+ return true;
+ else if constexpr (_From::_S_is_cx_ctgus)
+ return false;
else
__builtin_unreachable();
#endif
@@ -991,7 +1102,9 @@ namespace simd
*/
template <typename _From, typename _To>
concept __value_preserving_convertible_to
- = __arithmetic_only_value_preserving_convertible_to<_From, _To>;
+ = __arithmetic_only_value_preserving_convertible_to<_From, _To>
+ || (__complex_like<_To> && __arithmetic_only_value_preserving_convertible_to<
+ _From, typename _To::value_type>);
// LWG4420
template <typename _From, typename _To>
@@ -1119,6 +1232,17 @@ namespace simd
concept __simd_integral
= __simd_vec_type<_Vp> && integral<typename _Vp::value_type>;
+ template <typename _Vp>
+ concept __simd_unsigned_integer
+ = __simd_vec_type<_Vp> && __unsigned_integer<typename _Vp::value_type>;
+
+ template <typename _Vp>
+ using __simd_complex_value_type = typename _Vp::value_type::value_type;
+
+ template <typename _Vp>
+ concept __simd_complex
+ = __simd_vec_type<_Vp> && __complex_like_impl<typename _Vp::value_type>;
+
template <typename _Tp>
concept __converts_to_vec
= __simd_vec_type<decltype(declval<const _Tp&>() + declval<const _Tp&>())>;
@@ -1160,6 +1284,16 @@ namespace simd
template <__vectorizable _Tp, __simd_size_type _Np, __abi_tag _Ap>
using __similar_mask = basic_mask<sizeof(_Tp), decltype(__abi_rebind<_Tp, _Np, _Ap>())>;
+ template <size_t _Bytes, __abi_tag _Ap>
+ using __component_mask_for_ileav
+ = basic_mask<_Bytes / 2,
+ decltype(__abi_rebind<__float_from<_Bytes / 2>, _Ap::_S_size * 2, _Ap>())>;
+
+ template <size_t _Bytes, __abi_tag _Ap>
+ using __component_mask_for_ctgus
+ = basic_mask<_Bytes / 2,
+ decltype(__abi_rebind<__float_from<_Bytes / 2>, _Ap::_S_size, _Ap>())>;
+
// Allow _Tp to be _InvalidInteger for __integer_from<16>
template <typename _Tp, __simd_size_type _Np, __abi_tag _Ap>
using __similar_vec = basic_vec<_Tp, decltype(__abi_rebind<_Tp, _Np, _Ap>())>;
@@ -90,6 +90,8 @@ namespace simd
return _RV([&](size_t __i) -> _Rp {
if (__i >= __rg_size)
return _Rp();
+ else if constexpr (__complex_like<_Rp> && !__complex_like<_Tp>)
+ return static_cast<typename _Rp::value_type>(__r[__i]);
else
return static_cast<_Rp>(__r[__i]);
});
@@ -137,6 +139,8 @@ namespace simd
return _RV([&](size_t __i) -> _Rp {
if (__i >= __rg_size || !__mask[int(__i)])
return _Rp();
+ else if constexpr (__complex_like<_Rp> && !__complex_like<_Tp>)
+ return static_cast<typename _Rp::value_type>(__r[__i]);
else
return static_cast<_Rp>(__r[__i]);
});
@@ -522,6 +522,8 @@ namespace simd
template <size_t _Bytes, __abi_tag _Ap>
requires (_Ap::_S_nreg == 1)
+ && (__filter_abi_variant(_Ap::_S_variant, _AbiVariant::_CxVariants) == _AbiVariant()
+ || _Ap::_S_size == 1) // _Abi<1, 1, _CxIleav> and _Abi<1, 1, _CxCtgus> go here
class basic_mask<_Bytes, _Ap>
: public _MaskBase<_Bytes, _Ap>
{
@@ -605,6 +607,30 @@ namespace simd
_M_get() const
{ return _M_data; }
+ /** @internal
+ * @brief Converts the type of the mask without changing the data member.
+ *
+ * Since _Abi<1, 1, _CxCtgus> uses this partial specialization of basic_mask, the _M_data
+ * member cannot be used as mask that matches the basic_vec elements.
+ */
+ [[__gnu__::__always_inline__]]
+ constexpr auto
+ _M_get_ileav_data() const noexcept
+ requires _Ap::_S_is_cx_ileav
+ { return __component_mask_for_ileav<_Bytes, _Ap>(_M_data); }
+
+ /** @internal
+ * @brief Converts the type of the mask from a scalar (bool) into a mask of 2 elements.
+ *
+ * Since _Abi<1, 1, _CxIleav> uses this partial specialization of basic_mask, the _M_data
+ * member cannot be used as mask that matches the basic_vec elements.
+ */
+ [[__gnu__::__always_inline__]]
+ constexpr auto
+ _M_get_ctgus_data() const noexcept
+ requires _Ap::_S_is_cx_ctgus
+ { return __component_mask_for_ctgus<_Bytes, _Ap>(_M_data); }
+
/** @internal
* Bit-cast the given object @p __x to basic_mask.
*
@@ -786,10 +812,34 @@ namespace simd
else if constexpr (_S_use_bitmask || _UV::_S_use_bitmask)
return basic_mask(__x.to_bitset())._M_data;
+ // _CxCtgus stores its masks matching the complex::value_type (_UBytes/2)
+ else if constexpr (_UAbi::_S_is_cx_ctgus)
+ return basic_mask(__x._M_data)._M_data;
+
// vec-mask to vec-mask
else if constexpr (_Bytes == _UBytes)
return _S_recursive_bit_cast(__x)._M_data;
+ // 2-mask-elements wrapper to plain mask
+ else if constexpr (_UAbi::_S_is_cx_ileav)
+ {
+ if constexpr (_UBytes <= sizeof(0ll))
+ // two step (bit-cast -> convert)
+ return basic_mask(__similar_mask<__integer_from<_UBytes>, _S_size, _UAbi>(__x))
+ ._M_data;
+ else if constexpr (_Bytes == 1)
+ { // 16 -> 1
+ constexpr auto [...__is] = _IotaArray<_S_size>;
+ using _Ip = __vec_value_type<_DataType>;
+ return _DataType {_Ip(__x._M_data._M_concat_data()[__is * 2])...};
+ }
+ else // from complex<double>
+ {
+ const auto __k2 = __similar_mask<__integer_from<_Bytes / 2>, 2 * _S_size,
+ _UAbi>(__x._M_data);
+ return _S_recursive_bit_cast(__k2);
+ }
+ }
else
{
#if _GLIBCXX_X86
@@ -1015,13 +1065,14 @@ namespace simd
* @tparam _Use_2_for_1 Store the value of every second element into one bit of the result.
* (precondition: each even/odd pair stores the same value)
*/
- template <int _Offset = 0, _ArchTraits _Traits = {}>
+ template <int _Offset = 0, bool _Use_2_for_1 = false, _ArchTraits _Traits = {}>
[[__gnu__::__always_inline__]]
- constexpr _Bitmask<_S_size + _Offset>
+ constexpr _Bitmask<_S_size / (_Use_2_for_1 + 1) + _Offset>
_M_to_uint() const
{
- constexpr int __nbits = _S_size;
+ constexpr int __nbits = _S_size / (_Use_2_for_1 + 1);
static_assert(__nbits + _Offset <= numeric_limits<unsigned long long>::digits);
+ static_assert(!(_S_is_scalar && _Use_2_for_1));
// before shifting
using _U0 = _Bitmask<__nbits>;
// potentially wider type needed for shift by _Offset
@@ -1031,35 +1082,56 @@ namespace simd
auto __bits = _M_data;
if constexpr (_S_is_partial)
__bits &= _S_implicit_mask;
+ if constexpr (_Use_2_for_1)
+ __bits = __bit_extract_even<__nbits>(__bits);
return _Ur(__bits) << _Offset;
}
+ else if constexpr (_Bytes == sizeof(0ll) && _Use_2_for_1)
+ {
+ const auto __u32 = __vec_bit_cast<unsigned>(_M_data);
+ if constexpr (sizeof(_M_data) == 16)
+ {
+ if constexpr (_Offset < 32)
+ return __u32[0] & (1u << _Offset);
+ else
+ return _M_data[0] & (1ull << _Offset);
+ }
+ else if constexpr (sizeof(_M_data) == 32)
+ {
+ if constexpr (_Offset < 31)
+ return (__u32[4] & (2u << _Offset)) | (__u32[0] & (1u << _Offset));
+ else
+ return (_M_data[2] & (2ull << _Offset)) | (_M_data[0] & (1ull << _Offset));
+ }
+ else
+ static_assert(false);
+ }
+ else if constexpr (_Use_2_for_1 && __nbits == 1)
+ return _Ur(operator[](0)) << _Offset;
else
{
#if _GLIBCXX_X86
if (!__is_const_known(*this))
{
_U0 __uint;
- if constexpr (_Bytes != 2) // movmskb would duplicate each bit
- __uint = _U0(__x86_movmsk(_M_data));
- else if constexpr (_Bytes == 2 && _Traits._M_have_bmi2())
- __uint = __bit_extract_even<__nbits>(__x86_movmsk(_M_data));
- else if constexpr (_Bytes == 2)
- return __similar_mask<char, __nbits, _Ap>(*this).template _M_to_uint<_Offset>();
+ if constexpr (_Use_2_for_1)
+ __uint = __x86_cvt_vecmask_to_bitmask<_Traits>(
+ __vec_bit_cast<__integer_from<_Bytes * 2>>(_M_data));
else
- static_assert(false);
- // TODO: with AVX512 use __builtin_ia32_cvt[bwdq]2mask(128|256|512)
- // TODO: Ask for compiler builtin to do the best of the above. This should also
- // combine with a preceding vector-mask compare to produce a bit-mask compare (on
- // AVX512)
+ __uint = __x86_cvt_vecmask_to_bitmask<_Traits>( _M_data);
if constexpr (_S_is_partial)
__uint &= (_U0(1) << _S_size) - 1;
return _Ur(__uint) << _Offset;
}
#endif
- using _IV = _VecType;
+ using _IV = conditional_t<_Use_2_for_1,
+ __similar_vec<__integer_from<_Bytes * 2>, __nbits, _Ap>,
+ _VecType>;
static_assert(destructible<_IV>);
const typename _IV::mask_type& __k = [&] [[__gnu__::__always_inline__]] () {
- if constexpr (is_same_v<typename _IV::mask_type, basic_mask>)
+ if constexpr (_Use_2_for_1)
+ return typename _IV::mask_type(__to_cx_ileav(*this));
+ else if constexpr (is_same_v<typename _IV::mask_type, basic_mask>)
return *this;
else
return typename _IV::mask_type(*this);
@@ -1075,8 +1147,8 @@ namespace simd
{ // recurse after splitting in two
constexpr int __n_lo = __n - __n % __CHAR_BIT__;
const auto [__lo, __hi] = chunk<__n_lo>(__k);
- _Ur __bits = __hi.template _M_to_uint<_Offset + __n_lo>();
- return __bits | __lo.template _M_to_uint<_Offset>();
+ _Ur __bits = __hi.template _M_to_uint<_Offset + __n_lo, _Use_2_for_1>();
+ return __bits | __lo.template _M_to_uint<_Offset, _Use_2_for_1>();
}
else
{ // limit powers_of_2 to 1, 2, 4, ..., 128
@@ -1345,6 +1417,7 @@ namespace simd
template <size_t _Bytes, __abi_tag _Ap>
requires (_Ap::_S_nreg > 1)
+ && (__filter_abi_variant(_Ap::_S_variant, _AbiVariant::_CxVariants) == _AbiVariant())
class basic_mask<_Bytes, _Ap>
: public _MaskBase<_Bytes, _Ap>
{
@@ -1599,13 +1672,25 @@ namespace simd
// [simd.mask.ctor] conversion constructor ------------------------------
template <size_t _UBytes, typename _UAbi>
- requires (_S_size == _UAbi::_S_size)
+ requires (_S_size == _UAbi::_S_size) && (_UAbi::_S_is_cx_ctgus)
+ [[__gnu__::__always_inline__]]
+ constexpr explicit(__is_mask_conversion_explicit<_Ap, _UAbi>(_Bytes, _UBytes))
+ basic_mask(const basic_mask<_UBytes, _UAbi>& __x) noexcept
+ : basic_mask(__x._M_data) // unwrap _CxCtgus basic_mask partial specialization
+ {}
+
+
+ template <size_t _UBytes, typename _UAbi>
+ requires (_S_size == _UAbi::_S_size) && (!_UAbi::_S_is_cx_ctgus)
[[__gnu__::__always_inline__]]
constexpr explicit(__is_mask_conversion_explicit<_Ap, _UAbi>(_Bytes, _UBytes))
basic_mask(const basic_mask<_UBytes, _UAbi>& __x) noexcept
: _M_data0([&] {
if constexpr (_UAbi::_S_nreg > 1)
{
+ if constexpr (_UAbi::_S_is_cx_ileav)
+ return __to_cx_ileav(__x._M_data._M_data0);
+ else
return __x._M_data0;
}
else if constexpr (_N0 == 1)
@@ -1616,6 +1701,9 @@ namespace simd
_M_data1([&] {
if constexpr (_UAbi::_S_nreg > 1)
{
+ if constexpr (_UAbi::_S_is_cx_ileav)
+ return __to_cx_ileav(__x._M_data._M_data1);
+ else
return __x._M_data1;
}
else if constexpr (_N1 == 1)
@@ -1744,29 +1832,31 @@ namespace simd
}
}
- template <int _Offset = 0, _ArchTraits _Traits = {}>
+ template <int _Offset = 0, bool _Use_2_for_1 = false, _ArchTraits _Traits = {}>
[[__gnu__::__always_inline__]]
constexpr auto
_M_to_uint() const
{
- constexpr int _N0x = _N0;
- if constexpr (_N0x >= numeric_limits<unsigned long long>::digits)
+ constexpr int _N0x = _Use_2_for_1 ? _N0 / 2 : _N0;
+ if constexpr (_Use_2_for_1 && _S_is_scalar && _S_size == 2)
+ return _M_data1.template _M_to_uint<_Offset>();
+ else if constexpr (_N0x >= numeric_limits<unsigned long long>::digits)
{
static_assert(_Offset == 0);
return __trivial_pair {
- _M_data0.template _M_to_uint<0>(),
- _M_data1.template _M_to_uint<0>()
+ _M_data0.template _M_to_uint<0, _Use_2_for_1>(),
+ _M_data1.template _M_to_uint<0, _Use_2_for_1>()
};
}
else
{
#if _GLIBCXX_X86
if constexpr (_Bytes == 2 && !_Traits._M_have_bmi2() && _Ap::_S_nreg == 2
- && !_S_use_bitmask)
+ && !_S_is_scalar && !_S_use_bitmask && !_Use_2_for_1)
return __similar_mask<char, _S_size, _Ap>(*this).template _M_to_uint<_Offset>();
#endif
- auto __uint = _M_data1.template _M_to_uint<_N0x + _Offset>();
- __uint |= _M_data0.template _M_to_uint<_Offset>();
+ auto __uint = _M_data1.template _M_to_uint<_N0x + _Offset, _Use_2_for_1>();
+ __uint |= _M_data0.template _M_to_uint<_Offset, _Use_2_for_1>();
return __uint;
}
}
@@ -1899,6 +1989,9 @@ namespace simd
using _Vp = vec<_T0, _S_size>;
if constexpr (!is_same_v<basic_mask, typename _Vp::mask_type>)
return __select_impl(static_cast<_Vp::mask_type>(__k), __t, __f);
+ else if constexpr (__complex_like<_T0>)
+ return _Vp::_S_concat(__select_impl(__k._M_data0, __t, __f),
+ __select_impl(__k._M_data1, __t, __f));
else
return _Vp::_S_init(__select_impl(__k._M_data0, __t, __f),
__select_impl(__k._M_data1, __t, __f));
@@ -1937,6 +2030,21 @@ namespace simd
return _M_data0._M_none_of() && _M_data1._M_none_of();
}
+ [[__gnu__::__always_inline__]]
+ constexpr __simd_size_type
+ _M_reduce_count() const noexcept
+ {
+ if constexpr (_S_is_scalar)
+ // SWAR could help. I don't think we care at the moment.
+ return _M_data0._M_reduce_count() + _M_data1._M_reduce_count();
+ else if constexpr (_S_size <= numeric_limits<unsigned>::digits)
+ return __builtin_popcount(_M_to_uint());
+ else if constexpr (_S_size <= numeric_limits<unsigned long long>::digits)
+ return __builtin_popcountll(to_ullong());
+ else
+ return _M_data0._M_reduce_count() + _M_data1._M_reduce_count();
+ }
+
[[__gnu__::__always_inline__]]
constexpr __simd_size_type
_M_reduce_min_index() const
@@ -68,7 +68,7 @@ namespace simd
{
if constexpr (_Ap::_S_size == 1)
return +__k[0];
- else if constexpr (_Ap::_S_is_vecmask)
+ else if constexpr (_Ap::_S_is_vecmask && _Bytes <= sizeof(0ll))
return -reduce(-__k);
else
return __k._M_reduce_count();
new file mode 100644
@@ -0,0 +1,125 @@
+// Implementation of <simd> -*- C++ -*-
+
+// Copyright The GNU Toolchain Authors.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+// <http://www.gnu.org/licenses/>.
+
+#ifndef _GLIBCXX_SIMD_MATH_H
+#define _GLIBCXX_SIMD_MATH_H 1
+
+#ifdef _GLIBCXX_SYSHDR
+#pragma GCC system_header
+#endif
+
+#if __cplusplus >= 202400L
+
+#include "simd_vec.h"
+
+// psabi warnings are bogus because the ABI of the internal types never leaks into user code
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpsabi"
+
+// [simd.math] ----------------------------------------------------------------
+namespace std _GLIBCXX_VISIBILITY(default)
+{
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+namespace simd
+{
+ template <signed_integral T, typename Abi>
+ [[__gnu__::__always_inline__]]
+ constexpr basic_vec<T, Abi>
+ abs(const basic_vec<T, Abi>& __x)
+ { return __x._M_abs(); }
+
+ template <__math_floating_point _Vp>
+ [[__gnu__::__always_inline__]]
+ constexpr typename __deduced_vec_t<_Vp>::mask_type
+ isinf(const _Vp& __x)
+ { return static_cast<const __deduced_vec_t<_Vp>&>(__x)._M_isinf(); }
+
+ template <__math_floating_point _Vp>
+ [[__gnu__::__always_inline__]]
+ constexpr typename __deduced_vec_t<_Vp>::mask_type
+ isnan(const _Vp& __x)
+ { return static_cast<const __deduced_vec_t<_Vp>&>(__x)._M_isnan(); }
+} // namespace simd
+
+
+// [simd.complex.math] --------------------------------------------------------
+namespace simd
+{
+ template <__simd_complex _Vp>
+ [[__gnu__::__always_inline__]]
+ constexpr rebind_t<__simd_complex_value_type<_Vp>, _Vp>
+ real(const _Vp& __x) noexcept
+ { return __x.real(); }
+
+ template <__simd_complex _Vp>
+ [[__gnu__::__always_inline__]]
+ constexpr rebind_t<__simd_complex_value_type<_Vp>, _Vp>
+ imag(const _Vp& __x) noexcept
+ { return __x.imag(); }
+
+ template <__simd_complex _Vp>
+ [[__gnu__::__always_inline__]]
+ constexpr rebind_t<__simd_complex_value_type<_Vp>, _Vp>
+ abs(const _Vp& __x) noexcept
+ { return __x._M_abs(); }
+
+ template <__simd_complex _Vp>
+ [[__gnu__::__always_inline__]]
+ constexpr rebind_t<__simd_complex_value_type<_Vp>, _Vp>
+ arg(const _Vp& __x) noexcept
+ { return __x._M_arg(); }
+
+ template <__simd_complex _Vp>
+ [[__gnu__::__always_inline__]]
+ constexpr rebind_t<__simd_complex_value_type<_Vp>, _Vp>
+ norm(const _Vp& __x) noexcept
+ { return __x._M_norm(); }
+
+ template <__simd_complex _Vp>
+ [[__gnu__::__always_inline__]]
+ constexpr _Vp
+ conj(const _Vp& __x) noexcept
+ { return __x._M_conj(); }
+
+ template <__simd_complex _Vp>
+ [[__gnu__::__always_inline__]]
+ constexpr _Vp
+ proj(const _Vp& __x) noexcept
+ { return __x._M_proj(); }
+} // namespace simd
+
+ using simd::real;
+ using simd::imag;
+ using simd::abs;
+ using simd::arg;
+ using simd::norm;
+ using simd::conj;
+ using simd::proj;
+
+_GLIBCXX_END_NAMESPACE_VERSION
+} // namespace std
+
+#pragma GCC diagnostic pop
+#endif // C++26
+#endif // _GLIBCXX_SIMD_MATH_H
@@ -249,6 +249,7 @@ namespace simd
template <__vectorizable _Tp, __abi_tag _Ap>
requires (_Ap::_S_nreg == 1)
+ && (!__complex_like<_Tp>)
class basic_vec<_Tp, _Ap>
: public _VecBase<_Tp, _Ap>
{
@@ -295,9 +296,14 @@ namespace simd
return __r;
}
+ [[__gnu__::__always_inline__]]
+ constexpr _DataType&
+ _M_get() noexcept
+ { return _M_data; }
+
[[__gnu__::__always_inline__]]
constexpr const _DataType&
- _M_get() const
+ _M_get() const noexcept
{ return _M_data; }
[[__gnu__::__always_inline__]]
@@ -1746,6 +1752,7 @@ namespace simd
template <__vectorizable _Tp, __abi_tag _Ap>
requires (_Ap::_S_nreg > 1)
+ && (!__complex_like<_Tp>)
class basic_vec<_Tp, _Ap>
: public _VecBase<_Tp, _Ap>
{
@@ -1795,14 +1802,24 @@ namespace simd
return __r;
}
+ [[__gnu__::__always_inline__]]
+ constexpr _DataType0&
+ _M_get_low() noexcept
+ { return _M_data0; }
+
[[__gnu__::__always_inline__]]
constexpr const _DataType0&
- _M_get_low() const
+ _M_get_low() const noexcept
{ return _M_data0; }
+ [[__gnu__::__always_inline__]]
+ constexpr _DataType1&
+ _M_get_high() noexcept
+ { return _M_data1; }
+
[[__gnu__::__always_inline__]]
constexpr const _DataType1&
- _M_get_high() const
+ _M_get_high() const noexcept
{ return _M_data1; }
[[__gnu__::__always_inline__]]
@@ -1043,6 +1043,52 @@ namespace simd
static_assert(false, "TODO");
}
+ template <_ArchTraits _Traits = {}, __vec_builtin _TV>
+ [[__gnu__::__always_inline__]]
+ inline _Bitmask<__width_of<_TV>>
+ __x86_cvt_vecmask_to_bitmask(const _TV __k)
+ {
+ using _Tp = __vec_value_type<_TV>;
+ constexpr int _Bytes = sizeof(_Tp);
+ constexpr int _Np = __width_of<_TV>;
+ constexpr bool __vl = _Traits._M_have_avx512vl();
+ constexpr bool __bw = _Traits._M_have_avx512bw();
+ constexpr bool __dq = _Traits._M_have_avx512dq();
+ if constexpr (__vl && __bw && _Bytes == 1 && sizeof(__k) == 16)
+ return __builtin_ia32_cvtb2mask128(__k);
+ else if constexpr (__vl && __bw && _Bytes == 1 && sizeof(__k) == 32)
+ return __builtin_ia32_cvtb2mask256(__k);
+ else if constexpr (__bw && _Bytes == 1 && sizeof(__k) == 64)
+ return __builtin_ia32_cvtb2mask512(__k);
+ else if constexpr (__vl && __bw && _Bytes == 2 && sizeof(__k) == 16)
+ return __builtin_ia32_cvtw2mask128(__k);
+ else if constexpr (__vl && __bw && _Bytes == 2 && sizeof(__k) == 32)
+ return __builtin_ia32_cvtw2mask256(__k);
+ else if constexpr (__bw && _Bytes == 2 && sizeof(__k) == 64)
+ return __builtin_ia32_cvtw2mask512(__k);
+ else if constexpr (__vl && __dq && _Bytes == 4 && sizeof(__k) == 16)
+ return __builtin_ia32_cvtd2mask128(__k);
+ else if constexpr (__vl && __dq && _Bytes == 4 && sizeof(__k) == 32)
+ return __builtin_ia32_cvtd2mask256(__k);
+ else if constexpr (__dq && _Bytes == 4 && sizeof(__k) == 64)
+ return __builtin_ia32_cvtd2mask512(__k);
+ else if constexpr (__vl && __dq && _Bytes == 8 && sizeof(__k) == 16)
+ return __builtin_ia32_cvtq2mask128(__k);
+ else if constexpr (__vl && __dq && _Bytes == 8 && sizeof(__k) == 32)
+ return __builtin_ia32_cvtq2mask256(__k);
+ else if constexpr (__dq && _Bytes == 8 && sizeof(__k) == 64)
+ return __builtin_ia32_cvtq2mask512(__k);
+ else if constexpr (__vl && __dq && __bw && sizeof(__k) < 16)
+ return __x86_cvt_vecmask_to_bitmask(__vec_zero_pad_to_16(__k));
+ else if constexpr (_Bytes != 2) // movmskb would duplicate each bit
+ return __x86_movmsk(__k);
+ else if constexpr (_Traits._M_have_bmi2())
+ return __bit_extract_even<_Np>(__x86_movmsk(__k));
+ else
+ return __x86_cvt_vecmask_to_bitmask(
+ __x86_cvt_vecmask<__vec_builtin_type<char, _Np>>(__k));
+ }
+
/** @internal
* AVX512 masked (converting) loads
*
@@ -299,7 +299,10 @@ namespace simd
constexpr bool
__is_const_known(const _Tp& __x)
{
- return __builtin_constant_p(__x);
+ if constexpr (__complex_like<_Tp>)
+ return __is_const_known(__x.real()) && __is_const_known(__x.imag());
+ else
+ return __builtin_constant_p(__x);
}
[[__gnu__::__always_inline__]]
@@ -590,12 +593,71 @@ namespace simd
__xh = __builtin_shufflevector(__xh, __y, ((_Is & 1) == 1 ? __nh + _Is / 2 : _Is)...);
}
+ // negate every even element (real part of interleaved complex)
+ [[__gnu__::__always_inline__]]
+ static constexpr _TV
+ _S_complex_negate_real(_TV __x)
+ { return __vec_xor(_S_broadcast_to_even(_S_signmask<_TV>[0]), __x); }
+
+ // negate every odd element (imaginary part of interleaved complex)
+ [[__gnu__::__always_inline__]]
+ static constexpr _TV
+ _S_complex_negate_imag(_TV __x)
+ { return __vec_xor(_S_broadcast_to_odd(_S_signmask<_TV>[0]), __x); }
+
+ // Subtract elements with even index, add elements with odd index.
+ template <_ArchTraits _Traits = {}>
+ [[__gnu__::__always_inline__]]
+ static constexpr _TV
+ _S_addsub(_TV __x, _TV __y)
+ {
+ if constexpr (_Traits._M_have_addsub())
+ // GCC recognizes this pattern as addsub
+ return __builtin_shufflevector(__x - __y, __x + __y,
+ (_Is + (_Is & 1) * __width_of<_TV>)...);
+ else
+ return __x + _S_complex_negate_real(__y);
+ }
+
// true if all elements are know to be equal to __ref at compile time
[[__gnu__::__always_inline__]]
static constexpr bool
_S_is_const_known_equal_to(_TV __x, _Tp __ref)
{ return (__is_const_known_equal_to(__x[_Is], __ref) && ...); }
+ // True iff all elements at even indexes are zero. This includes signed zeros only when
+ // -fno-signed-zeros is in effect.
+ template <_OptTraits _Traits = {}>
+ [[__gnu__::__always_inline__]]
+ static constexpr bool
+ _S_complex_real_is_const_known_zero(_TV __x)
+ {
+ if constexpr (_Traits._M_conforming_to_STDC_annex_G())
+ {
+ using _Up = _UInt<sizeof(_Tp)>;
+ return (((_Is & 1) == 1 || __is_const_known_equal_to(
+ __builtin_bit_cast(_Up, __x[_Is]), _Up())) && ...);
+ }
+ else
+ return (((_Is & 1) == 1 || __is_const_known_equal_to(__x[_Is], _Tp())) && ...);
+ }
+
+ // True iff all elements at odd indexes are zero. This includes signed zeros only when
+ // -fno-signed-zeros is in effect.
+ template <_OptTraits _Traits = {}>
+ [[__gnu__::__always_inline__]]
+ static constexpr bool
+ _S_complex_imag_is_const_known_zero(_TV __x)
+ {
+ if constexpr (_Traits._M_conforming_to_STDC_annex_G())
+ {
+ using _Up = _UInt<sizeof(_Tp)>;
+ return (((_Is & 1) == 0 || __is_const_known_equal_to(
+ __builtin_bit_cast(_Up, __x[_Is]), _Up())) && ...);
+ }
+ else
+ return (((_Is & 1) == 0 || __is_const_known_equal_to(__x[_Is], _Tp())) && ...);
+ }
};
} // namespace simd
_GLIBCXX_END_NAMESPACE_VERSION
@@ -2401,6 +2401,17 @@ ftms = {
};
};
+ftms = {
+ name = simd_complex;
+ values = {
+ no_stdname = true; // TODO: change once complete
+ v = 202502;
+ cxxmin = 26;
+ extra_cond = "defined __glibcxx_simd";
+ hosted = yes;
+ };
+};
+
ftms = {
name = is_structural;
values = {
@@ -2665,6 +2665,15 @@
#endif /* !defined(__cpp_lib_simd) */
#undef __glibcxx_want_simd
+#if !defined(__cpp_lib_simd_complex)
+# if (__cplusplus > 202302L) && _GLIBCXX_HOSTED && (defined __glibcxx_simd)
+# define __glibcxx_simd_complex 202502L
+# if defined(__glibcxx_want_all) || defined(__glibcxx_want_simd_complex)
+# endif
+# endif
+#endif /* !defined(__cpp_lib_simd_complex) */
+#undef __glibcxx_want_simd_complex
+
#if !defined(__cpp_lib_is_structural)
# if (__cplusplus > 202302L) && (__has_builtin(__builtin_is_structural))
# define __glibcxx_is_structural 202603L
@@ -34,6 +34,7 @@
#endif
#define __glibcxx_want_simd
+#define __glibcxx_want_simd_complex
#include <bits/version.h>
#ifdef __glibcxx_simd
@@ -43,6 +44,11 @@
#include "bits/simd_mask_reductions.h"
#include "bits/simd_reductions.h"
#include "bits/simd_alg.h"
+#include "bits/simd_bit.h"
+#ifdef __glibcxx_simd_complex
+#include "bits/simd_complex.h"
+#endif
+#include "bits/simd_math.h"
#endif
#endif
@@ -3,6 +3,7 @@
// { dg-additional-options "-msse2" }
#include "test_setup.h"
+#include "complex_init.h"
static constexpr bool is_iec559 =
#ifdef __GCC_IEC_559
@@ -13,7 +14,6 @@ static constexpr bool is_iec559 =
false;
#endif
-#if VIR_NEXT_PATCH
template <typename V>
requires complex_like<typename V::value_type>
struct Tests<V>
@@ -135,7 +135,6 @@ template <typename V>
}
};
};
-#endif
template <typename V>
struct Tests
new file mode 100644
@@ -0,0 +1,17 @@
+#include <complex>
+
+/**
+ * This class is a workaround for std::complex not being allowed in template arguments.
+ *
+ * All it does is carry the real & imag values until it can "decay" into a std::complex.
+ * There's no other interface.
+ */
+template <typename T>
+ struct C
+ {
+ T re, im;
+
+ template <typename U>
+ constexpr operator std::complex<U>() const
+ { return {U(re), U(im)}; }
+ };
@@ -2,7 +2,11 @@
void create_tests()
{
- template for (auto t : {char(), short(), unsigned(), 0l, 0ull, float(), double()})
+ template for (auto t : {char(), short(), unsigned(), 0l, 0ull, float(), double(),
+#ifdef __STDCPP_FLOAT16_T__
+ std::float16_t(), std::complex<std::float16_t>(),
+#endif
+ std::complex<float>(), std::complex<double>()})
{
using T = decltype(t);
#ifndef EXPENSIVE_TESTS
@@ -18,7 +18,8 @@ template <typename V>
ADD_TEST(mask_conversion) {
std::array {alternating, k010, k00111},
[](auto& t, M k) {
- template for (auto tmp : {char(), short(), int(), double()})
+ template for (auto tmp : {char(), short(), int(), double(), std::complex<float>(),
+ std::complex<double>()})
{
using U = decltype(tmp);
using M2 = simd::rebind_t<U, M>;
new file mode 100644
@@ -0,0 +1,203 @@
+// { dg-do run { target c++26 } }
+// { dg-require-effective-target x86 }
+// { dg-additional-options "-msse2" }
+
+#include "test_setup.h"
+#include <climits>
+
+template <typename V>
+ struct CheckInvocable
+ {
+ using T = typename V::value_type;
+ static constexpr bool unsigned_integer
+ = any_type_of<T, unsigned char, unsigned short, unsigned int, unsigned long,
+ unsigned long long>;
+ static_assert(std::integral<T> == requires(V x) { std::byteswap(x); });
+ static_assert(unsigned_integer == requires(V x) { std::bit_ceil(x); });
+ static_assert(unsigned_integer == requires(V x) { std::bit_floor(x); });
+ static_assert(unsigned_integer == requires(V x) { std::has_single_bit(x); });
+ static_assert(unsigned_integer == requires(V x, V y) { std::rotl(x, y); });
+ static_assert(unsigned_integer == requires(V x, int y) { std::rotl(x, y); });
+ static_assert(unsigned_integer == requires(V x, V y) { std::rotr(x, y); });
+ static_assert(unsigned_integer == requires(V x, int y) { std::rotr(x, y); });
+ static_assert(unsigned_integer == requires(V x) { std::bit_width(x); });
+ static_assert(unsigned_integer == requires(V x) { std::countl_zero(x); });
+ static_assert(unsigned_integer == requires(V x) { std::countl_one(x); });
+ static_assert(unsigned_integer == requires(V x) { std::countr_zero(x); });
+ static_assert(unsigned_integer == requires(V x) { std::countr_one(x); });
+ static_assert(unsigned_integer == requires(V x) { std::popcount(x); });
+ };
+
+template <typename V>
+ requires std::integral<typename V::value_type>
+ struct Tests<V> : CheckInvocable<V>
+ {
+ using T = typename V::value_type;
+ using M = typename V::mask_type;
+
+ static constexpr T msb = T(std::make_unsigned_t<T>(1) << (sizeof(T) * CHAR_BIT - 1));
+
+ ADD_TEST(Byteswap) {
+ std::tuple {test_iota<V>, V(T(0x01'02'03'04'05'06'07'08LL))},
+ [](auto& t, const V a, const V b) {
+ if constexpr (sizeof(T) == 1)
+ t.verify_equal(std::byteswap(a), a);
+ else
+ {
+ auto x = std::byteswap(a);
+ for (int i = 0; i < V::size(); ++i)
+ t.verify_equal(x[i], std::byteswap(a[i]));
+ auto y = std::simd::byteswap(b);
+ for (int i = 0; i < V::size(); ++i)
+ t.verify_equal(y[i], std::byteswap(b[i]));
+ }
+ }
+ };
+
+ ADD_TEST(BitCeil, std::__unsigned_integer<T>) {
+ std::tuple {test_iota<V, 0, msb < test_iota_max<V> ? msb : test_iota_max<V>>,
+ T(1024), T(msb + 1)},
+ [](auto& t, const V a, const V b, const V c) {
+ t.verify_precondition_failure("bit_ceil result is not representable", [&] {
+ bit_ceil(c);
+ });
+ t.verify_equal(bit_ceil(b), select(b == T(), T(1), b));
+ t.verify_equal(std::bit_ceil(a), bit_ceil(a));
+ t.verify_equal(std::simd::bit_ceil(a), bit_ceil(a));
+ t.verify_equal(bit_ceil(a), V([&](int i) { return std::bit_ceil(a[i]); }));
+ }
+ };
+
+ ADD_TEST(BitFloor, std::__unsigned_integer<T>) {
+ std::tuple {test_iota<V>, T(1024), T(msb + 1)},
+ [](auto& t, const V a, const V b, const V c) {
+ t.verify_equal(bit_floor(c), msb);
+ t.verify_equal(bit_floor(b), b);
+ t.verify_equal(std::bit_floor(a), bit_floor(a));
+ t.verify_equal(std::simd::bit_floor(a), bit_floor(a));
+ t.verify_equal(bit_floor(a), V([&](int i) { return std::bit_floor(a[i]); }));
+ }
+ };
+
+ ADD_TEST(HasSingleBit, std::__unsigned_integer<T>) {
+ std::tuple {test_iota<V>, msb, T(msb + 1)},
+ [](auto& t, const V a, const V b, const V c) {
+ t.verify(all_of(has_single_bit(b)));
+ t.verify(none_of(has_single_bit(c)));
+ t.verify_equal(std::has_single_bit(a), has_single_bit(a));
+ t.verify_equal(std::simd::has_single_bit(a), has_single_bit(a));
+ t.verify_equal(has_single_bit(a), a != T() && a == bit_floor(a));
+ }
+ };
+
+ ADD_TEST(FullRotate, std::__unsigned_integer<T>) {
+ std::tuple {test_iota<V, 0, 0>},
+ [](auto& t, const V a) {
+ constexpr int digits = std::numeric_limits<T>::digits;
+ template for (int n : {0, digits, 5 * digits})
+ {
+ t.verify_equal(rotl(a, n), a);
+ t.verify_equal(std::rotl(a, n), a);
+ t.verify_equal(std::simd::rotl(a, n), a);
+ t.verify_equal(rotr(a, n), a);
+ t.verify_equal(std::rotr(a, n), a);
+ t.verify_equal(std::simd::rotr(a, n), a);
+ }
+ }
+ };
+
+ using I = std::make_signed_t<T>;
+ using IV = std::simd::rebind_t<I, V>;
+
+ ADD_TEST_N(RotateN, 12, std::__unsigned_integer<T>) {
+ std::tuple {test_iota<V, 0, 0>},
+ []<int N>(auto& t, const V x) {
+ constexpr int shift = 11 * N;
+ constexpr int rshift = I(sizeof(T) * CHAR_BIT) - shift;
+ const IV vshift = I(shift);
+ const IV vshiftx = vshift ^ IV(x & T(1));
+ V ref([](T i) -> T { return std::rotl(i, shift); });
+ V refx([](T i) -> T { return std::rotl(i, shift ^ (i & 1)); });
+ const V l1 = rotl(x, shift);
+ const V lv = rotl(x, vshift);
+ const V lx = rotl(x, vshiftx);
+ t.verify_equal(l1, ref);
+ t.verify_equal(lv, ref);
+ t.verify_equal(lx, refx);
+ t.verify_equal(rotr(x, rshift), ref);
+ t.verify_equal(rotr(x, I(rshift)), ref);
+ t.verify_equal(rotr(x, I(sizeof(T) * CHAR_BIT) - vshiftx), refx);
+ }
+ };
+
+ // The value-type of reference is always going to be 'int', forcing a conversion in verify_equal
+ // (unless V::value_type is 'unsigned int'). That's intentional, since we thus can find
+ // (hypothetical) cases of value-changing conversions in the implementation.
+#define REFERENCE(x, fun) simd::rebind_t<decltype(fun(x[0])), V>([&](int i) { return fun(x[i]); })
+
+ ADD_TEST(BitWidth, std::__unsigned_integer<T>) {
+ std::tuple {test_iota<V>, msb - test_iota<V>},
+ [](auto& t, const V x, const V y) {
+ t.verify_equal(std::bit_width(x), REFERENCE(x, std::bit_width));
+ t.verify_equal(simd::bit_width(x), REFERENCE(x, std::bit_width));
+ t.verify_equal(std::bit_width(y), REFERENCE(y, std::bit_width));
+ t.verify_equal(simd::bit_width(y), REFERENCE(y, std::bit_width));
+ }
+ };
+
+ ADD_TEST(CountLZero, std::__unsigned_integer<T>) {
+ std::tuple {test_iota<V>, msb - test_iota<V>},
+ [](auto& t, const V x, const V y) {
+ t.verify_equal(std::countl_zero(x), REFERENCE(x, std::countl_zero));
+ t.verify_equal(simd::countl_zero(x), REFERENCE(x, std::countl_zero));
+ t.verify_equal(std::countl_zero(y), REFERENCE(y, std::countl_zero));
+ t.verify_equal(simd::countl_zero(y), REFERENCE(y, std::countl_zero));
+ }
+ };
+
+ ADD_TEST(CountLOne, std::__unsigned_integer<T>) {
+ std::tuple {test_iota<V>, msb - test_iota<V>},
+ [](auto& t, const V x, const V y) {
+ t.verify_equal(std::countl_one(x), REFERENCE(x, std::countl_one));
+ t.verify_equal(simd::countl_one(x), REFERENCE(x, std::countl_one));
+ t.verify_equal(std::countl_one(y), REFERENCE(y, std::countl_one));
+ t.verify_equal(simd::countl_one(y), REFERENCE(y, std::countl_one));
+ }
+ };
+
+ ADD_TEST(CountRZero, std::__unsigned_integer<T>) {
+ std::tuple {test_iota<V>, msb - test_iota<V>},
+ [](auto& t, const V x, const V y) {
+ t.verify_equal(std::countr_zero(x), REFERENCE(x, std::countr_zero));
+ t.verify_equal(simd::countr_zero(x), REFERENCE(x, std::countr_zero));
+ t.verify_equal(std::countr_zero(y), REFERENCE(y, std::countr_zero));
+ t.verify_equal(simd::countr_zero(y), REFERENCE(y, std::countr_zero));
+ }
+ };
+
+ ADD_TEST(CountROne, std::__unsigned_integer<T>) {
+ std::tuple {test_iota<V>, msb - test_iota<V>},
+ [](auto& t, const V x, const V y) {
+ t.verify_equal(std::countr_one(x), REFERENCE(x, std::countr_one));
+ t.verify_equal(simd::countr_one(x), REFERENCE(x, std::countr_one));
+ t.verify_equal(std::countr_one(y), REFERENCE(y, std::countr_one));
+ t.verify_equal(simd::countr_one(y), REFERENCE(y, std::countr_one));
+ }
+ };
+
+ ADD_TEST(PopCount, std::__unsigned_integer<T>) {
+ std::tuple {test_iota<V>, msb - test_iota<V>},
+ [](auto& t, const V x, const V y) {
+ t.verify_equal(std::popcount(x), REFERENCE(x, std::popcount));
+ t.verify_equal(simd::popcount(x), REFERENCE(x, std::popcount));
+ t.verify_equal(std::popcount(y), REFERENCE(y, std::popcount));
+ t.verify_equal(simd::popcount(y), REFERENCE(y, std::popcount));
+ }
+ };
+ };
+
+template <typename V>
+ struct Tests : CheckInvocable<V>
+ {};
+
+#include "create_tests.h"
new file mode 100644
@@ -0,0 +1,7 @@
+// { dg-do run { target c++26 } }
+// { dg-require-effective-target x86 }
+// { dg-timeout-factor 2 }
+// { dg-require-effective-target run_expensive_tests }
+
+#define EXPENSIVE_TESTS 1
+#include "simd_bit.cc"
@@ -25,12 +25,10 @@ template <typename V>
t.verify_equal(mem[i], T(i + 1));
t.verify_equal(mem[V::size + i], T(i + 1));
}
-#if VIR_NEXT_PATCH
if constexpr (complex_like<T>)
- {
+ { // TODO
}
else
-#endif
{
simd::unchecked_store(v, ints, simd::flag_convert);
simd::partial_store(v, ints.begin() + V::size() + 1, ints.end(),
@@ -73,6 +73,12 @@ static std::string_view test_name = "unknown";
namespace simd = std::simd;
+template <typename T, typename... Us>
+ concept any_type_of = (std::same_as<T, Us> || ...);
+
+template <typename T>
+ concept complex_like = std::simd::__complex_like<T>;
+
template <typename T>
struct is_character_type
: std::bool_constant<false>
@@ -285,8 +291,16 @@ template <typename T>
using TT = typename T::value_type;
if constexpr (std::is_integral_v<TT>)
return all_of(a == b);
+ else if constexpr (T::abi_type::_S_nreg > 1)
+ {
+ return bit_equal(a._M_get_low(), b._M_get_low())
+ && bit_equal(a._M_get_high(), b._M_get_high());
+ }
else
{
+ // float, 4 -> unsigned, 4 (uint_size = 4)
+ // double, 4 -> ullong, 4 (uint_size = 8)
+ // complex<double>, 4 -> ullong, 8 (uint_size = 8)
constexpr size_t uint_size = std::min(size_t(8), sizeof(TT));
struct B
{
@@ -303,13 +317,26 @@ template <typename T>
}
}
}
+ else if constexpr (complex_like<T>)
+ return bit_equal(a.real(), b.real()) && bit_equal(a.imag(), b.imag());
else
static_assert(false);
}
+// true iff real or imag parts of x are +/-inf. This matches the C23 Annex G interpretation.
+template <complex_like T, typename Abi>
+ constexpr typename simd::basic_vec<T, Abi>::mask_type
+ cx_isinf(const simd::basic_vec<T, Abi>& x)
+ {
+ using M = typename simd::basic_vec<T, Abi>::mask_type;
+ return M(isinf(x.real()) || isinf(x.imag()));
+ }
+
// treat as equal if either:
// - operator== yields true
// - or for floats, a and b are NaNs
+// - or for complex, a and b are any infinity (see cx_isinf)
+// - or for complex, a and b are NaNs in real *and* imag components
template <typename V>
constexpr bool
equal_with_nan_and_inf_fixup(const V& a, const V& b)
@@ -321,9 +348,15 @@ template <typename V>
{
using M = typename V::mask_type;
using T = typename V::value_type;
- if constexpr (std::is_floating_point_v<T>)
+ if constexpr (complex_like<T>)
+ { // fix up nan == nan and (inf,nan) == (inf,?)
+ eq |= M(isnan(a.real()) && isnan(a.imag()) && isnan(b.real()) && isnan(a.imag()))
+ // a and b are "an infinity" according to C23 Annex G.3
+ || (cx_isinf(a) && cx_isinf(b));
+ }
+ else if constexpr (std::is_floating_point_v<T>)
{ // fix up nan == nan results
- eq |= a._M_isnan() && b._M_isnan();
+ eq |= isnan(a) && isnan(b);
}
else
return false;
@@ -617,6 +650,11 @@ template <std::size_t B, typename Abi>
is_const_known(const std::simd::basic_mask<B, Abi>& x)
{ return __is_const_known(x); }
+template <typename T>
+ [[gnu::always_inline]] inline bool
+ is_const_known(const std::complex<T>& x)
+ { return is_const_known(x.real()) && is_const_known(x.imag()); }
+
template <std::ranges::sized_range R>
[[gnu::always_inline]] inline bool
is_const_known(const R& arr)
@@ -675,6 +713,9 @@ template <typename V, int Init = 0, int MaxArg = int(test_iota_max<V, Init>)>
i -= Max - Init + 1;
}
using T = value_type_t<V>;
+ if constexpr (std::simd::__simd_complex<V>)
+ return std::complex<T>(T(i), T());
+ else
return static_cast<T>(i);
});
@@ -4,10 +4,13 @@
// { dg-timeout-factor 2 }
#include <simd>
+#include <complex>
#include <stdfloat>
namespace simd = std::simd;
+using std::complex;
+
// test that instantiation of the complete class is well-formed
template class simd::basic_vec<int, typename simd::vec<int, 1>::abi_type>;
template class simd::basic_vec<int, typename simd::vec<int, 5>::abi_type>;
@@ -15,6 +18,45 @@ template class simd::basic_vec<int, typename simd::vec<int, 8>::abi_type>;
template class simd::basic_vec<int, typename simd::vec<int, 13>::abi_type>;
template class simd::basic_vec<float, typename simd::vec<float, 8>::abi_type>;
template class simd::basic_vec<float, typename simd::vec<float, 13>::abi_type>;
+template class simd::basic_vec<complex<double>, typename simd::vec<complex<double>, 3>::abi_type>;
+
+// LWG4420 ///////////////////////////////////////
+
+#ifdef __STDCPP_FLOAT16_T__
+namespace LWG4420
+{
+ using std::convertible_to;
+ using std::constructible_from;
+ using std::float16_t;
+ using std::array;
+
+ static_assert( convertible_to<simd::vec<float16_t, 4>, simd::vec<float, 4>>);
+ static_assert(!convertible_to<simd::vec<float, 4>, simd::vec<float16_t, 4>>);
+
+ static_assert( convertible_to<float16_t, simd::vec<float, 4>>);
+ static_assert(!convertible_to<float, simd::vec<float16_t, 4>>);
+
+ static_assert(constructible_from<simd::vec<float16_t, 4>, simd::vec<float, 4>>);
+
+ static_assert(convertible_to<array<float16_t, 4>, simd::vec<float, 4>>);
+ static_assert(convertible_to<array<float, 4>, simd::vec<float16_t, 4>>); // Mandates flag_convert
+
+ static_assert([] {
+ array<float, 4> f4 = {};
+ array<float16_t, 4> h4 = {};
+
+ simd::vec<float, 4> vh2f = h4;
+ simd::vec<float, 4> vh2f_b = float16_t();
+
+ simd::vec<float16_t, 4> vf2h = {f4, simd::flag_convert};
+
+ vh2f = vf2h;
+ vf2h = static_cast<decltype(vf2h)>(vh2f);
+
+ return all_of(vh2f == vh2f_b);
+ }());
+}
+#endif
constexpr auto default_mask_abi_variant =
#ifdef __AVX512F__
@@ -52,14 +94,55 @@ namespace test02
static_assert(!destructible<simd::basic_mask<7>>);
+ template <int N>
+ using expected_abi = _Abi_t<N, 1, default_mask_abi_variant, _AbiVariant::_CxIleav>;
+
+ static_assert(same_as<simd::vec<complex<float>, 1>::abi_type, expected_abi<1>>);
+ static_assert(same_as<simd::vec<complex<double>, 1>::abi_type, expected_abi<1>>);
+
+#if defined __AVX512F__
+ static_assert(same_as<simd::vec<complex<float>, 2>::abi_type,
+ _Abi_t<2, 1, _AbiVariant::_CxIleav, _AbiVariant::_BitMask>>);
+ static_assert(same_as<simd::vec<complex<double>, 2>::abi_type,
+ _Abi_t<2, 1, _AbiVariant::_CxIleav, _AbiVariant::_BitMask>>);
+ static_assert(same_as<simd::vec<complex<float>, 4>::abi_type,
+ _Abi_t<4, 1, _AbiVariant::_CxIleav, _AbiVariant::_BitMask>>);
+ static_assert(same_as<simd::vec<complex<double>, 4>::abi_type,
+ _Abi_t<4, 1, _AbiVariant::_CxIleav, _AbiVariant::_BitMask>>);
+#elif defined __AVX__
+ static_assert(same_as<simd::vec<complex<float>, 2>::abi_type,
+ _Abi_t<2, 1, _AbiVariant::_CxIleav>>);
+ static_assert(same_as<simd::vec<complex<double>, 2>::abi_type,
+ _Abi_t<2, 1, _AbiVariant::_CxIleav>>);
+ static_assert(same_as<simd::vec<complex<float>, 4>::abi_type,
+ _Abi_t<4, 1, _AbiVariant::_CxIleav>>);
+ static_assert(same_as<simd::vec<complex<double>, 4>::abi_type,
+ _Abi_t<4, 2, _AbiVariant::_CxIleav>>);
+#elif defined __SSE__
+ static_assert(same_as<simd::vec<complex<float>, 2>::abi_type,
+ _Abi_t<2, 1, _AbiVariant::_CxIleav>>);
+ static_assert(same_as<simd::vec<complex<double>, 2>::abi_type,
+ _Abi_t<2, 2, _AbiVariant::_CxIleav>>);
+ static_assert(same_as<simd::vec<complex<float>, 4>::abi_type,
+ _Abi_t<4, 2, _AbiVariant::_CxIleav>>);
+ static_assert(same_as<simd::vec<complex<double>, 4>::abi_type,
+ _Abi_t<4, 4, _AbiVariant::_CxIleav>>);
+#endif
+
static_assert(same_as<simd::vec<int>::mask_type, simd::mask<int>>);
static_assert(same_as<simd::vec<float>::mask_type, simd::mask<float>>);
static_assert(same_as<simd::vec<float, 1>::mask_type, simd::mask<float, 1>>);
+ static_assert(destructible<simd::vec<complex<float>>>);
+ static_assert(same_as<simd::vec<complex<float>>::mask_type, simd::mask<complex<float>>>);
+ static_assert(same_as<simd::vec<complex<float>, 1>::mask_type, simd::mask<complex<float>, 1>>);
+
// ensure 'true ? int : vec<float>' doesn't work
template <typename T>
concept has_type_member = requires { typename T::type; };
static_assert(!has_type_member<common_type<int, simd::vec<float>>>);
+
+ constexpr simd::vec<complex<double>>::mask_type k = {};
}
#if defined __AVX__ && !defined __AVX2__
@@ -77,6 +160,8 @@ static_assert(std::same_as<decltype(+simd::mask<float, 8>()), simd::vec<int, 8>>
#if defined __SSE__ && !defined __F16C__ && defined __STDCPP_FLOAT16_T__
static_assert(simd::vec<std::float16_t>::size() == 1);
static_assert(simd::mask<std::float16_t>::size() == 1);
+static_assert(simd::vec<std::complex<std::float16_t>>::size() == 1);
+static_assert(simd::mask<std::complex<std::float16_t>>::size() == 1);
static_assert(alignof(simd::vec<std::float16_t, 8>) == alignof(std::float16_t));
static_assert(alignof(simd::rebind_t<std::float16_t, simd::vec<float>>) == alignof(std::float16_t));
static_assert(simd::rebind_t<std::float16_t, simd::mask<float>>::abi_type::_S_nreg
@@ -198,17 +283,38 @@ template <template <typename> class Tpl>
Tpl<unsigned long long> p;
#ifdef __STDCPP_FLOAT16_T__
Tpl<std::float16_t> q;
+ Tpl<std::complex<std::float16_t>> qc;
#endif
#ifdef __STDCPP_FLOAT32_T__
Tpl<std::float32_t> r;
+ Tpl<std::complex<std::float32_t>> rc;
#endif
#ifdef __STDCPP_FLOAT64_T__
Tpl<std::float64_t> s;
+ Tpl<std::complex<std::float64_t>> sc;
#endif
+ Tpl<std::complex<float>> u;
+ Tpl<std::complex<double>> v;
};
template struct instantiate_all_vectorizable<test_usable_simd>;
+// vec broadcast ctor ///////////////
+namespace test_broadcast
+{
+ using std::constructible_from;
+ using std::complex;
+ using simd::vec;
+
+ static_assert(constructible_from<simd::vec<complex<float>>, complex<float>>);
+ static_assert(constructible_from<simd::vec<complex<double>>, complex<float>>);
+
+ constexpr simd::vec<complex<double>, 2> cd2 = 1.f; // broadcast real from float
+ static_assert(all_of(cd2.real() == 1));
+ static_assert(all_of(cd2.imag() == 0));
+ static_assert(all_of(cd2 == complex{1.f, 0.f}));
+}
+
// vec generator ctor ///////////////
namespace test_generator
@@ -223,6 +329,10 @@ namespace test_generator
static_assert( std::constructible_from<simd::vec<float>, short (&)(int)>);
static_assert(!std::constructible_from<simd::vec<float>, long double (&)(int)>);
static_assert( std::constructible_from<simd::vec<float>, udt_convertible_to_float (&)(int)>);
+ static_assert( std::constructible_from<simd::vec<std::complex<double>>,
+ std::complex<double> (&)(int)>);
+ static_assert( std::constructible_from<simd::vec<std::complex<double>>,
+ std::complex<float> (&)(int)>);
}
// mask generator ctor ///////////////
@@ -358,6 +468,7 @@ static_assert([] constexpr {
// mask conversions //////////////////
namespace mask_conversion_tests
{
+ using std::complex;
using simd::mask;
struct TestResult
@@ -423,6 +534,10 @@ namespace mask_conversion_tests
check<do_test<std::float16_t>( k)>();
check<do_test<std::float16_t>(!k)>();
#endif
+ check<do_test<complex<float>>( k)>();
+ check<do_test<complex<float>>(!k)>();
+ check<do_test<complex<double>>( k)>();
+ check<do_test<complex<double>>(!k)>();
if constexpr (P <= 2)
do_test<T, N, P + 1>();
}
@@ -453,6 +568,8 @@ namespace mask_conversion_tests
#ifdef __STDCPP_FLOAT16_T__
static_assert(test<std::float16_t>());
#endif
+ static_assert(test<complex<float>>());
+ static_assert(test<complex<double>>());
}
// vec reductions ///////////////////
@@ -542,6 +659,18 @@ static_assert(all_of(simd::cat(simd::__iota<simd::vec<double, 4>>, simd::__iota<
static_assert(all_of(simd::cat(simd::__iota<simd::vec<double, 4>>, simd::__iota<simd::vec<double, 4>> + 4)
== simd::__iota<simd::vec<double, 8>>));
+static_assert(all_of(simd::cat(simd::__iota<simd::vec<complex<float>, 1>>,
+ simd::__iota<simd::vec<complex<float>, 1>> + 1.f)
+ == simd::__iota<simd::vec<complex<float>, 2>>));
+
+static_assert(all_of(simd::cat(simd::__iota<simd::vec<complex<float>, 3>>,
+ simd::__iota<simd::vec<complex<float>, 3>> + 3.f)
+ == simd::__iota<simd::vec<complex<float>, 6>>));
+
+static_assert(all_of(simd::cat(simd::__iota<simd::vec<complex<float>, 8>>,
+ simd::__iota<simd::vec<complex<float>, 8>> + 8.f)
+ == simd::__iota<simd::vec<complex<float>, 16>>));
+
// select ////////////////////////
#ifndef AVOID_BROKEN_CLANG_FAILURES
@@ -5,10 +5,12 @@
#include <bits/simd_details.h>
#include <bits/simd_flags.h>
+#include <complex>
#include <stdfloat>
namespace simd = std::simd;
+using std::complex;
#ifdef __STDCPP_FLOAT16_T__
using std::float16_t;
#endif
@@ -29,6 +31,9 @@ void test()
{
using T = decltype(t);
static_assert(__vectorizable<T>);
+ static_assert(__complex_like<complex<T>>);
+ static_assert(__complex_like<const complex<T>&>);
+ static_assert(__vectorizable<complex<T>>);
}
static_assert(!__vectorizable<const float>);
@@ -43,6 +48,16 @@ void test()
static_assert(sizeof(__integer_from<N>) == N);
static_assert(__vectorizable<__integer_from<N>>);
}
+ template for (constexpr int N : {
+#ifdef __STDCPP_BFLOAT16_T__
+ 2,
+#endif
+ 4, 8})
+ {
+ static_assert(std::floating_point<__float_from<N>>);
+ static_assert(sizeof(__float_from<N>) == N);
+ static_assert(__vectorizable<__float_from<N>>);
+ }
static_assert(__div_ceil(5, 3) == 2);
@@ -53,6 +68,9 @@ void test()
static_assert(__scalar_abi_tag<_Abi_t<2, 2>>);
static_assert(!__scalar_abi_tag<_Abi_t<2, 1>>);
+ static_assert(__abi_tag<_Abi_t<2, 1, _AbiVariant::_CxIleav>>);
+ static_assert(__abi_tag<_Abi_t<2, 1, _AbiVariant::_CxCtgus>>);
+
using AN = decltype(__native_abi<float>());
using A1 = decltype(__native_abi<float>()._S_resize<1>());
static_assert(A1::_S_size == 1);
@@ -72,8 +90,33 @@ void test()
}
using A4 = decltype(__abi_rebind<float, 4, AN>());
static_assert(A4::_S_size == 4);
+
+ // at this point we unconditionally expect _CxIleav from __abi_rebind:
+ using AC2 = decltype(__abi_rebind<complex<float>, 2, AN>());
+ static_assert(AC2::_S_size == 2);
+ static_assert(AC2::_S_nreg == A4::_S_nreg);
+ static_assert(AC2::_S_variant != A4::_S_variant);
+ static_assert(__filter_abi_variant(AC2::_S_variant, _AbiVariant::_MaskVariants)
+ == A4::_S_variant);
+ static_assert(__filter_abi_variant(AC2::_S_variant, _AbiVariant::_CxVariants)
+ == _AbiVariant::_CxIleav);
+ static_assert(AC2::_S_is_cx_ileav);
+ static_assert(!AC2::_S_is_cx_ctgus);
}
+#if __glibcxx_simd_complex
+ {
+ using ACx2 = _Abi_t<2, 2, _AbiVariant::_CxIleav>;
+ static_assert(__abi_tag<ACx2>);
+ static_assert(__scalar_abi_tag<ACx2>);
+ using AM4 = decltype(__abi_rebind<__float_from<2>, ACx2::_S_size * 2, ACx2>());
+ static_assert(__abi_tag<AM4>);
+ static_assert(__scalar_abi_tag<AM4>);
+ static_assert(AM4::_S_size == ACx2::_S_size * 2);
+ static_assert(!AM4::_S_is_cx_ileav);
+ }
+#endif
+
static_assert(__streq_to_1("1"));
static_assert(!__streq_to_1(""));
static_assert(!__streq_to_1(nullptr));
@@ -88,6 +131,10 @@ void test()
static_assert(!__value_preserving_convertible_to<int, float>);
static_assert( __value_preserving_convertible_to<float, double>);
static_assert(!__value_preserving_convertible_to<double, float>);
+ static_assert( __value_preserving_convertible_to<float, complex<float>>);
+ static_assert( __value_preserving_convertible_to<float, complex<double>>);
+ static_assert( __value_preserving_convertible_to<double, complex<double>>);
+ static_assert(!__value_preserving_convertible_to<double, complex<float>>);
#ifdef __STDCPP_FLOAT16_T__
static_assert(__explicitly_convertible_to<float, float16_t>);
@@ -108,6 +155,9 @@ void test()
static_assert(__broadcast_constructible<decltype(std::cw<0.f>), std::float16_t>);
#endif
+ static_assert( __broadcast_constructible<complex<float>, complex<float>>);
+ static_assert( __broadcast_constructible<complex<float>, complex<double>>);
+ static_assert(!__broadcast_constructible<complex<double>, complex<float>>);
static_assert(__higher_rank_than<long, int>);
static_assert(__higher_rank_than<long long, long>);