[v2,29/36] arm: [MVE intrinsics] rework vshlcq
Commit Message
Implement vshlc using the new MVE builtins framework.
2024-08-28 Christophe Lyon <christophe.lyon@linaro.org>
gcc/
* config/arm/arm-mve-builtins-base.cc (class vshlc_impl): New.
(vshlc): New.
* config/arm/arm-mve-builtins-base.def (vshlcq): New.
* config/arm/arm-mve-builtins-base.h (vshlcq): New.
* config/arm/arm-mve-builtins.cc
(function_instance::has_inactive_argument): Handle vshlc.
* config/arm/arm_mve.h (vshlcq): Delete.
(vshlcq_m): Delete.
(vshlcq_s8): Delete.
(vshlcq_u8): Delete.
(vshlcq_s16): Delete.
(vshlcq_u16): Delete.
(vshlcq_s32): Delete.
(vshlcq_u32): Delete.
(vshlcq_m_s8): Delete.
(vshlcq_m_u8): Delete.
(vshlcq_m_s16): Delete.
(vshlcq_m_u16): Delete.
(vshlcq_m_s32): Delete.
(vshlcq_m_u32): Delete.
(__arm_vshlcq_s8): Delete.
(__arm_vshlcq_u8): Delete.
(__arm_vshlcq_s16): Delete.
(__arm_vshlcq_u16): Delete.
(__arm_vshlcq_s32): Delete.
(__arm_vshlcq_u32): Delete.
(__arm_vshlcq_m_s8): Delete.
(__arm_vshlcq_m_u8): Delete.
(__arm_vshlcq_m_s16): Delete.
(__arm_vshlcq_m_u16): Delete.
(__arm_vshlcq_m_s32): Delete.
(__arm_vshlcq_m_u32): Delete.
(__arm_vshlcq): Delete.
(__arm_vshlcq_m): Delete.
* config/arm/mve.md (mve_vshlcq_<supf><mode>): Add '@' prefix.
(mve_vshlcq_m_<supf><mode>): Likewise.
---
gcc/config/arm/arm-mve-builtins-base.cc | 72 +++++++
gcc/config/arm/arm-mve-builtins-base.def | 1 +
gcc/config/arm/arm-mve-builtins-base.h | 1 +
gcc/config/arm/arm-mve-builtins.cc | 1 +
gcc/config/arm/arm_mve.h | 233 -----------------------
gcc/config/arm/mve.md | 4 +-
6 files changed, 77 insertions(+), 235 deletions(-)
Comments
On 04/09/2024 14:26, Christophe Lyon wrote:
> Implement vshlc using the new MVE builtins framework.
>
> 2024-08-28 Christophe Lyon <christophe.lyon@linaro.org>
>
> gcc/
> * config/arm/arm-mve-builtins-base.cc (class vshlc_impl): New.
> (vshlc): New.
> * config/arm/arm-mve-builtins-base.def (vshlcq): New.
> * config/arm/arm-mve-builtins-base.h (vshlcq): New.
> * config/arm/arm-mve-builtins.cc
> (function_instance::has_inactive_argument): Handle vshlc.
> * config/arm/arm_mve.h (vshlcq): Delete.
> (vshlcq_m): Delete.
> (vshlcq_s8): Delete.
> (vshlcq_u8): Delete.
> (vshlcq_s16): Delete.
> (vshlcq_u16): Delete.
> (vshlcq_s32): Delete.
> (vshlcq_u32): Delete.
> (vshlcq_m_s8): Delete.
> (vshlcq_m_u8): Delete.
> (vshlcq_m_s16): Delete.
> (vshlcq_m_u16): Delete.
> (vshlcq_m_s32): Delete.
> (vshlcq_m_u32): Delete.
> (__arm_vshlcq_s8): Delete.
> (__arm_vshlcq_u8): Delete.
> (__arm_vshlcq_s16): Delete.
> (__arm_vshlcq_u16): Delete.
> (__arm_vshlcq_s32): Delete.
> (__arm_vshlcq_u32): Delete.
> (__arm_vshlcq_m_s8): Delete.
> (__arm_vshlcq_m_u8): Delete.
> (__arm_vshlcq_m_s16): Delete.
> (__arm_vshlcq_m_u16): Delete.
> (__arm_vshlcq_m_s32): Delete.
> (__arm_vshlcq_m_u32): Delete.
> (__arm_vshlcq): Delete.
> (__arm_vshlcq_m): Delete.
> * config/arm/mve.md (mve_vshlcq_<supf><mode>): Add '@' prefix.
> (mve_vshlcq_m_<supf><mode>): Likewise.
OK.
R.
> ---
> gcc/config/arm/arm-mve-builtins-base.cc | 72 +++++++
> gcc/config/arm/arm-mve-builtins-base.def | 1 +
> gcc/config/arm/arm-mve-builtins-base.h | 1 +
> gcc/config/arm/arm-mve-builtins.cc | 1 +
> gcc/config/arm/arm_mve.h | 233 -----------------------
> gcc/config/arm/mve.md | 4 +-
> 6 files changed, 77 insertions(+), 235 deletions(-)
>
> diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
> index eaf054d9823..9f1f7e69c57 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.cc
> +++ b/gcc/config/arm/arm-mve-builtins-base.cc
> @@ -483,6 +483,77 @@ public:
> }
> };
>
> +/* Map the vshlc function directly to CODE (UNSPEC, M) where M is the vector
> + mode associated with type suffix 0. We need this special case because the
> + intrinsics derefrence the second parameter and update its contents. */
> +class vshlc_impl : public function_base
> +{
> +public:
> + unsigned int
> + call_properties (const function_instance &) const override
> + {
> + return CP_WRITE_MEMORY | CP_READ_MEMORY;
> + }
> +
> + tree
> + memory_scalar_type (const function_instance &) const override
> + {
> + return get_typenode_from_name (UINT32_TYPE);
> + }
> +
> + rtx
> + expand (function_expander &e) const override
> + {
> + machine_mode mode = e.vector_mode (0);
> + insn_code code;
> + rtx insns, carry_ptr, carry, new_carry;
> + int carry_arg_no;
> +
> + if (! e.type_suffix (0).integer_p)
> + gcc_unreachable ();
> +
> + if (e.mode_suffix_id != MODE_none)
> + gcc_unreachable ();
> +
> + carry_arg_no = 1;
> +
> + carry = gen_reg_rtx (SImode);
> + carry_ptr = e.args[carry_arg_no];
> + emit_insn (gen_rtx_SET (carry, gen_rtx_MEM (SImode, carry_ptr)));
> + e.args[carry_arg_no] = carry;
> +
> + new_carry = gen_reg_rtx (SImode);
> + e.args.quick_insert (0, new_carry);
> +
> + switch (e.pred)
> + {
> + case PRED_none:
> + /* No predicate. */
> + code = e.type_suffix (0).unsigned_p
> + ? code_for_mve_vshlcq (VSHLCQ_U, mode)
> + : code_for_mve_vshlcq (VSHLCQ_S, mode);
> + insns = e.use_exact_insn (code);
> + break;
> +
> + case PRED_m:
> + /* "m" predicate. */
> + code = e.type_suffix (0).unsigned_p
> + ? code_for_mve_vshlcq_m (VSHLCQ_M_U, mode)
> + : code_for_mve_vshlcq_m (VSHLCQ_M_S, mode);
> + insns = e.use_cond_insn (code, 0);
> + break;
> +
> + default:
> + gcc_unreachable ();
> + }
> +
> + /* Update carry. */
> + emit_insn (gen_rtx_SET (gen_rtx_MEM (Pmode, carry_ptr), new_carry));
> +
> + return insns;
> + }
> +};
> +
> } /* end anonymous namespace */
>
> namespace arm_mve {
> @@ -815,6 +886,7 @@ FUNCTION_WITH_M_N_NO_F (vrshlq, VRSHLQ)
> FUNCTION_ONLY_N_NO_F (vrshrnbq, VRSHRNBQ)
> FUNCTION_ONLY_N_NO_F (vrshrntq, VRSHRNTQ)
> FUNCTION_ONLY_N_NO_F (vrshrq, VRSHRQ)
> +FUNCTION (vshlcq, vshlc_impl,)
> FUNCTION_ONLY_N_NO_F (vshllbq, VSHLLBQ)
> FUNCTION_ONLY_N_NO_F (vshlltq, VSHLLTQ)
> FUNCTION_WITH_M_N_R (vshlq, VSHLQ)
> diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
> index c5f1e8a197b..bd69f06d7e4 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.def
> +++ b/gcc/config/arm/arm-mve-builtins-base.def
> @@ -152,6 +152,7 @@ DEF_MVE_FUNCTION (vrshlq, binary_round_lshift, all_integer, mx_or_none)
> DEF_MVE_FUNCTION (vrshrnbq, binary_rshift_narrow, integer_16_32, m_or_none)
> DEF_MVE_FUNCTION (vrshrntq, binary_rshift_narrow, integer_16_32, m_or_none)
> DEF_MVE_FUNCTION (vrshrq, binary_rshift, all_integer, mx_or_none)
> +DEF_MVE_FUNCTION (vshlcq, vshlc, all_integer, m_or_none)
> DEF_MVE_FUNCTION (vshllbq, binary_widen_n, integer_8_16, mx_or_none)
> DEF_MVE_FUNCTION (vshlltq, binary_widen_n, integer_8_16, mx_or_none)
> DEF_MVE_FUNCTION (vshlq, binary_lshift, all_integer, mx_or_none)
> diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
> index ed8761318bb..1eff50d3c6d 100644
> --- a/gcc/config/arm/arm-mve-builtins-base.h
> +++ b/gcc/config/arm/arm-mve-builtins-base.h
> @@ -188,6 +188,7 @@ extern const function_base *const vrshlq;
> extern const function_base *const vrshrnbq;
> extern const function_base *const vrshrntq;
> extern const function_base *const vrshrq;
> +extern const function_base *const vshlcq;
> extern const function_base *const vshllbq;
> extern const function_base *const vshlltq;
> extern const function_base *const vshlq;
> diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc
> index 1180421bf0a..252744596ce 100644
> --- a/gcc/config/arm/arm-mve-builtins.cc
> +++ b/gcc/config/arm/arm-mve-builtins.cc
> @@ -810,6 +810,7 @@ function_instance::has_inactive_argument () const
> || (base == functions::vrshlq && mode_suffix_id == MODE_n)
> || base == functions::vrshrnbq
> || base == functions::vrshrntq
> + || base == functions::vshlcq
> || base == functions::vshrnbq
> || base == functions::vshrntq
> || base == functions::vsliq
> diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
> index 37b0fedc4ff..c577c373e98 100644
> --- a/gcc/config/arm/arm_mve.h
> +++ b/gcc/config/arm/arm_mve.h
> @@ -42,7 +42,6 @@
>
> #ifndef __ARM_MVE_PRESERVE_USER_NAMESPACE
> #define vst4q(__addr, __value) __arm_vst4q(__addr, __value)
> -#define vshlcq(__a, __b, __imm) __arm_vshlcq(__a, __b, __imm)
> #define vstrbq_scatter_offset(__base, __offset, __value) __arm_vstrbq_scatter_offset(__base, __offset, __value)
> #define vstrbq(__addr, __value) __arm_vstrbq(__addr, __value)
> #define vstrwq_scatter_base(__addr, __offset, __value) __arm_vstrwq_scatter_base(__addr, __offset, __value)
> @@ -101,7 +100,6 @@
> #define vld4q(__addr) __arm_vld4q(__addr)
> #define vsetq_lane(__a, __b, __idx) __arm_vsetq_lane(__a, __b, __idx)
> #define vgetq_lane(__a, __idx) __arm_vgetq_lane(__a, __idx)
> -#define vshlcq_m(__a, __b, __imm, __p) __arm_vshlcq_m(__a, __b, __imm, __p)
>
>
> #define vst4q_s8( __addr, __value) __arm_vst4q_s8( __addr, __value)
> @@ -113,12 +111,6 @@
> #define vst4q_f16( __addr, __value) __arm_vst4q_f16( __addr, __value)
> #define vst4q_f32( __addr, __value) __arm_vst4q_f32( __addr, __value)
> #define vpnot(__a) __arm_vpnot(__a)
> -#define vshlcq_s8(__a, __b, __imm) __arm_vshlcq_s8(__a, __b, __imm)
> -#define vshlcq_u8(__a, __b, __imm) __arm_vshlcq_u8(__a, __b, __imm)
> -#define vshlcq_s16(__a, __b, __imm) __arm_vshlcq_s16(__a, __b, __imm)
> -#define vshlcq_u16(__a, __b, __imm) __arm_vshlcq_u16(__a, __b, __imm)
> -#define vshlcq_s32(__a, __b, __imm) __arm_vshlcq_s32(__a, __b, __imm)
> -#define vshlcq_u32(__a, __b, __imm) __arm_vshlcq_u32(__a, __b, __imm)
> #define vstrbq_s8( __addr, __value) __arm_vstrbq_s8( __addr, __value)
> #define vstrbq_u8( __addr, __value) __arm_vstrbq_u8( __addr, __value)
> #define vstrbq_u16( __addr, __value) __arm_vstrbq_u16( __addr, __value)
> @@ -421,12 +413,6 @@
> #define urshrl(__p0, __p1) __arm_urshrl(__p0, __p1)
> #define lsll(__p0, __p1) __arm_lsll(__p0, __p1)
> #define asrl(__p0, __p1) __arm_asrl(__p0, __p1)
> -#define vshlcq_m_s8(__a, __b, __imm, __p) __arm_vshlcq_m_s8(__a, __b, __imm, __p)
> -#define vshlcq_m_u8(__a, __b, __imm, __p) __arm_vshlcq_m_u8(__a, __b, __imm, __p)
> -#define vshlcq_m_s16(__a, __b, __imm, __p) __arm_vshlcq_m_s16(__a, __b, __imm, __p)
> -#define vshlcq_m_u16(__a, __b, __imm, __p) __arm_vshlcq_m_u16(__a, __b, __imm, __p)
> -#define vshlcq_m_s32(__a, __b, __imm, __p) __arm_vshlcq_m_s32(__a, __b, __imm, __p)
> -#define vshlcq_m_u32(__a, __b, __imm, __p) __arm_vshlcq_m_u32(__a, __b, __imm, __p)
> #endif
>
> /* For big-endian, GCC's vector indices are reversed within each 64 bits
> @@ -502,60 +488,6 @@ __arm_vpnot (mve_pred16_t __a)
> return __builtin_mve_vpnotv16bi (__a);
> }
>
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_s8 (int8x16_t __a, uint32_t * __b, const int __imm)
> -{
> - int8x16_t __res = __builtin_mve_vshlcq_vec_sv16qi (__a, *__b, __imm);
> - *__b = __builtin_mve_vshlcq_carry_sv16qi (__a, *__b, __imm);
> - return __res;
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_u8 (uint8x16_t __a, uint32_t * __b, const int __imm)
> -{
> - uint8x16_t __res = __builtin_mve_vshlcq_vec_uv16qi (__a, *__b, __imm);
> - *__b = __builtin_mve_vshlcq_carry_uv16qi (__a, *__b, __imm);
> - return __res;
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_s16 (int16x8_t __a, uint32_t * __b, const int __imm)
> -{
> - int16x8_t __res = __builtin_mve_vshlcq_vec_sv8hi (__a, *__b, __imm);
> - *__b = __builtin_mve_vshlcq_carry_sv8hi (__a, *__b, __imm);
> - return __res;
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_u16 (uint16x8_t __a, uint32_t * __b, const int __imm)
> -{
> - uint16x8_t __res = __builtin_mve_vshlcq_vec_uv8hi (__a, *__b, __imm);
> - *__b = __builtin_mve_vshlcq_carry_uv8hi (__a, *__b, __imm);
> - return __res;
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_s32 (int32x4_t __a, uint32_t * __b, const int __imm)
> -{
> - int32x4_t __res = __builtin_mve_vshlcq_vec_sv4si (__a, *__b, __imm);
> - *__b = __builtin_mve_vshlcq_carry_sv4si (__a, *__b, __imm);
> - return __res;
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_u32 (uint32x4_t __a, uint32_t * __b, const int __imm)
> -{
> - uint32x4_t __res = __builtin_mve_vshlcq_vec_uv4si (__a, *__b, __imm);
> - *__b = __builtin_mve_vshlcq_carry_uv4si (__a, *__b, __imm);
> - return __res;
> -}
> -
> __extension__ extern __inline void
> __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> __arm_vstrbq_scatter_offset_s8 (int8_t * __base, uint8x16_t __offset, int8x16_t __value)
> @@ -2404,60 +2336,6 @@ __arm_srshr (int32_t value, const int shift)
> return __builtin_mve_srshr_si (value, shift);
> }
>
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_m_s8 (int8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
> -{
> - int8x16_t __res = __builtin_mve_vshlcq_m_vec_sv16qi (__a, *__b, __imm, __p);
> - *__b = __builtin_mve_vshlcq_m_carry_sv16qi (__a, *__b, __imm, __p);
> - return __res;
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_m_u8 (uint8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
> -{
> - uint8x16_t __res = __builtin_mve_vshlcq_m_vec_uv16qi (__a, *__b, __imm, __p);
> - *__b = __builtin_mve_vshlcq_m_carry_uv16qi (__a, *__b, __imm, __p);
> - return __res;
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_m_s16 (int16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
> -{
> - int16x8_t __res = __builtin_mve_vshlcq_m_vec_sv8hi (__a, *__b, __imm, __p);
> - *__b = __builtin_mve_vshlcq_m_carry_sv8hi (__a, *__b, __imm, __p);
> - return __res;
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_m_u16 (uint16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
> -{
> - uint16x8_t __res = __builtin_mve_vshlcq_m_vec_uv8hi (__a, *__b, __imm, __p);
> - *__b = __builtin_mve_vshlcq_m_carry_uv8hi (__a, *__b, __imm, __p);
> - return __res;
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_m_s32 (int32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
> -{
> - int32x4_t __res = __builtin_mve_vshlcq_m_vec_sv4si (__a, *__b, __imm, __p);
> - *__b = __builtin_mve_vshlcq_m_carry_sv4si (__a, *__b, __imm, __p);
> - return __res;
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_m_u32 (uint32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
> -{
> - uint32x4_t __res = __builtin_mve_vshlcq_m_vec_uv4si (__a, *__b, __imm, __p);
> - *__b = __builtin_mve_vshlcq_m_carry_uv4si (__a, *__b, __imm, __p);
> - return __res;
> -}
> -
> #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */
>
> __extension__ extern __inline void
> @@ -2868,48 +2746,6 @@ __arm_vst4q (uint32_t * __addr, uint32x4x4_t __value)
> __arm_vst4q_u32 (__addr, __value);
> }
>
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq (int8x16_t __a, uint32_t * __b, const int __imm)
> -{
> - return __arm_vshlcq_s8 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq (uint8x16_t __a, uint32_t * __b, const int __imm)
> -{
> - return __arm_vshlcq_u8 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq (int16x8_t __a, uint32_t * __b, const int __imm)
> -{
> - return __arm_vshlcq_s16 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq (uint16x8_t __a, uint32_t * __b, const int __imm)
> -{
> - return __arm_vshlcq_u16 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq (int32x4_t __a, uint32_t * __b, const int __imm)
> -{
> - return __arm_vshlcq_s32 (__a, __b, __imm);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq (uint32x4_t __a, uint32_t * __b, const int __imm)
> -{
> - return __arm_vshlcq_u32 (__a, __b, __imm);
> -}
> -
> __extension__ extern __inline void
> __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> __arm_vstrbq_scatter_offset (int8_t * __base, uint8x16_t __offset, int8x16_t __value)
> @@ -4240,48 +4076,6 @@ __arm_vgetq_lane (uint64x2_t __a, const int __idx)
> return __arm_vgetq_lane_u64 (__a, __idx);
> }
>
> -__extension__ extern __inline int8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_m (int8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vshlcq_m_s8 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint8x16_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_m (uint8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vshlcq_m_u8 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_m (int16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vshlcq_m_s16 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint16x8_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_m (uint16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vshlcq_m_u16 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline int32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_m (int32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vshlcq_m_s32 (__a, __b, __imm, __p);
> -}
> -
> -__extension__ extern __inline uint32x4_t
> -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> -__arm_vshlcq_m (uint32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
> -{
> - return __arm_vshlcq_m_u32 (__a, __b, __imm, __p);
> -}
> -
> #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */
>
> __extension__ extern __inline void
> @@ -4887,15 +4681,6 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8x4_t]: __arm_vst4q_f16 (__ARM_mve_coerce_f16_ptr(__p0, float16_t *), __ARM_mve_coerce(__p1, float16x8x4_t)), \
> int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4x4_t]: __arm_vst4q_f32 (__ARM_mve_coerce_f32_ptr(__p0, float32_t *), __ARM_mve_coerce(__p1, float32x4x4_t)));})
>
> -#define __arm_vshlcq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> - int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
> - int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlcq_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
> - int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlcq_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
> - int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlcq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
> - int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
> - int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
> -
> #define __arm_vld1q_z(p0,p1) ( \
> _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
> int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), p1), \
> @@ -5234,15 +5019,6 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8x4_t]: __arm_vst4q_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8x4_t)), \
> int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4x4_t]: __arm_vst4q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4x4_t)));})
>
> -#define __arm_vshlcq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> - int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
> - int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlcq_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
> - int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlcq_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
> - int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlcq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
> - int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
> - int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
> -
> #define __arm_vstrwq_scatter_base(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
> _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
> int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_s32(p0, p1, __ARM_mve_coerce(__p2, int32x4_t)), \
> @@ -5615,15 +5391,6 @@ extern void *__ARM_undef;
> int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16(__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
> int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32(__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t)));})
>
> -#define __arm_vshlcq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
> - _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> - int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2, p3), \
> - int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlcq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2, p3), \
> - int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlcq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2, p3), \
> - int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlcq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2, p3), \
> - int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2, p3), \
> - int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2, p3));})
> -
> #define __arm_vstrbq(p0,p1) ({ __typeof(p1) __p1 = (p1); \
> _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
> int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vstrbq_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t)), \
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index 0507e117f51..83a1eb48533 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -1719,7 +1719,7 @@ (define_expand "mve_vshlcq_carry_<supf><mode>"
> DONE;
> })
>
> -(define_insn "mve_vshlcq_<supf><mode>"
> +(define_insn "@mve_vshlcq_<supf><mode>"
> [(set (match_operand:MVE_2 0 "s_register_operand" "=w")
> (unspec:MVE_2 [(match_operand:MVE_2 2 "s_register_operand" "0")
> (match_operand:SI 3 "s_register_operand" "1")
> @@ -6279,7 +6279,7 @@ (define_expand "mve_vshlcq_m_carry_<supf><mode>"
> DONE;
> })
>
> -(define_insn "mve_vshlcq_m_<supf><mode>"
> +(define_insn "@mve_vshlcq_m_<supf><mode>"
> [(set (match_operand:MVE_2 0 "s_register_operand" "=w")
> (unspec:MVE_2 [(match_operand:MVE_2 2 "s_register_operand" "0")
> (match_operand:SI 3 "s_register_operand" "1")
@@ -483,6 +483,77 @@ public:
}
};
+/* Map the vshlc function directly to CODE (UNSPEC, M) where M is the vector
+ mode associated with type suffix 0. We need this special case because the
+ intrinsics derefrence the second parameter and update its contents. */
+class vshlc_impl : public function_base
+{
+public:
+ unsigned int
+ call_properties (const function_instance &) const override
+ {
+ return CP_WRITE_MEMORY | CP_READ_MEMORY;
+ }
+
+ tree
+ memory_scalar_type (const function_instance &) const override
+ {
+ return get_typenode_from_name (UINT32_TYPE);
+ }
+
+ rtx
+ expand (function_expander &e) const override
+ {
+ machine_mode mode = e.vector_mode (0);
+ insn_code code;
+ rtx insns, carry_ptr, carry, new_carry;
+ int carry_arg_no;
+
+ if (! e.type_suffix (0).integer_p)
+ gcc_unreachable ();
+
+ if (e.mode_suffix_id != MODE_none)
+ gcc_unreachable ();
+
+ carry_arg_no = 1;
+
+ carry = gen_reg_rtx (SImode);
+ carry_ptr = e.args[carry_arg_no];
+ emit_insn (gen_rtx_SET (carry, gen_rtx_MEM (SImode, carry_ptr)));
+ e.args[carry_arg_no] = carry;
+
+ new_carry = gen_reg_rtx (SImode);
+ e.args.quick_insert (0, new_carry);
+
+ switch (e.pred)
+ {
+ case PRED_none:
+ /* No predicate. */
+ code = e.type_suffix (0).unsigned_p
+ ? code_for_mve_vshlcq (VSHLCQ_U, mode)
+ : code_for_mve_vshlcq (VSHLCQ_S, mode);
+ insns = e.use_exact_insn (code);
+ break;
+
+ case PRED_m:
+ /* "m" predicate. */
+ code = e.type_suffix (0).unsigned_p
+ ? code_for_mve_vshlcq_m (VSHLCQ_M_U, mode)
+ : code_for_mve_vshlcq_m (VSHLCQ_M_S, mode);
+ insns = e.use_cond_insn (code, 0);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Update carry. */
+ emit_insn (gen_rtx_SET (gen_rtx_MEM (Pmode, carry_ptr), new_carry));
+
+ return insns;
+ }
+};
+
} /* end anonymous namespace */
namespace arm_mve {
@@ -815,6 +886,7 @@ FUNCTION_WITH_M_N_NO_F (vrshlq, VRSHLQ)
FUNCTION_ONLY_N_NO_F (vrshrnbq, VRSHRNBQ)
FUNCTION_ONLY_N_NO_F (vrshrntq, VRSHRNTQ)
FUNCTION_ONLY_N_NO_F (vrshrq, VRSHRQ)
+FUNCTION (vshlcq, vshlc_impl,)
FUNCTION_ONLY_N_NO_F (vshllbq, VSHLLBQ)
FUNCTION_ONLY_N_NO_F (vshlltq, VSHLLTQ)
FUNCTION_WITH_M_N_R (vshlq, VSHLQ)
@@ -152,6 +152,7 @@ DEF_MVE_FUNCTION (vrshlq, binary_round_lshift, all_integer, mx_or_none)
DEF_MVE_FUNCTION (vrshrnbq, binary_rshift_narrow, integer_16_32, m_or_none)
DEF_MVE_FUNCTION (vrshrntq, binary_rshift_narrow, integer_16_32, m_or_none)
DEF_MVE_FUNCTION (vrshrq, binary_rshift, all_integer, mx_or_none)
+DEF_MVE_FUNCTION (vshlcq, vshlc, all_integer, m_or_none)
DEF_MVE_FUNCTION (vshllbq, binary_widen_n, integer_8_16, mx_or_none)
DEF_MVE_FUNCTION (vshlltq, binary_widen_n, integer_8_16, mx_or_none)
DEF_MVE_FUNCTION (vshlq, binary_lshift, all_integer, mx_or_none)
@@ -188,6 +188,7 @@ extern const function_base *const vrshlq;
extern const function_base *const vrshrnbq;
extern const function_base *const vrshrntq;
extern const function_base *const vrshrq;
+extern const function_base *const vshlcq;
extern const function_base *const vshllbq;
extern const function_base *const vshlltq;
extern const function_base *const vshlq;
@@ -810,6 +810,7 @@ function_instance::has_inactive_argument () const
|| (base == functions::vrshlq && mode_suffix_id == MODE_n)
|| base == functions::vrshrnbq
|| base == functions::vrshrntq
+ || base == functions::vshlcq
|| base == functions::vshrnbq
|| base == functions::vshrntq
|| base == functions::vsliq
@@ -42,7 +42,6 @@
#ifndef __ARM_MVE_PRESERVE_USER_NAMESPACE
#define vst4q(__addr, __value) __arm_vst4q(__addr, __value)
-#define vshlcq(__a, __b, __imm) __arm_vshlcq(__a, __b, __imm)
#define vstrbq_scatter_offset(__base, __offset, __value) __arm_vstrbq_scatter_offset(__base, __offset, __value)
#define vstrbq(__addr, __value) __arm_vstrbq(__addr, __value)
#define vstrwq_scatter_base(__addr, __offset, __value) __arm_vstrwq_scatter_base(__addr, __offset, __value)
@@ -101,7 +100,6 @@
#define vld4q(__addr) __arm_vld4q(__addr)
#define vsetq_lane(__a, __b, __idx) __arm_vsetq_lane(__a, __b, __idx)
#define vgetq_lane(__a, __idx) __arm_vgetq_lane(__a, __idx)
-#define vshlcq_m(__a, __b, __imm, __p) __arm_vshlcq_m(__a, __b, __imm, __p)
#define vst4q_s8( __addr, __value) __arm_vst4q_s8( __addr, __value)
@@ -113,12 +111,6 @@
#define vst4q_f16( __addr, __value) __arm_vst4q_f16( __addr, __value)
#define vst4q_f32( __addr, __value) __arm_vst4q_f32( __addr, __value)
#define vpnot(__a) __arm_vpnot(__a)
-#define vshlcq_s8(__a, __b, __imm) __arm_vshlcq_s8(__a, __b, __imm)
-#define vshlcq_u8(__a, __b, __imm) __arm_vshlcq_u8(__a, __b, __imm)
-#define vshlcq_s16(__a, __b, __imm) __arm_vshlcq_s16(__a, __b, __imm)
-#define vshlcq_u16(__a, __b, __imm) __arm_vshlcq_u16(__a, __b, __imm)
-#define vshlcq_s32(__a, __b, __imm) __arm_vshlcq_s32(__a, __b, __imm)
-#define vshlcq_u32(__a, __b, __imm) __arm_vshlcq_u32(__a, __b, __imm)
#define vstrbq_s8( __addr, __value) __arm_vstrbq_s8( __addr, __value)
#define vstrbq_u8( __addr, __value) __arm_vstrbq_u8( __addr, __value)
#define vstrbq_u16( __addr, __value) __arm_vstrbq_u16( __addr, __value)
@@ -421,12 +413,6 @@
#define urshrl(__p0, __p1) __arm_urshrl(__p0, __p1)
#define lsll(__p0, __p1) __arm_lsll(__p0, __p1)
#define asrl(__p0, __p1) __arm_asrl(__p0, __p1)
-#define vshlcq_m_s8(__a, __b, __imm, __p) __arm_vshlcq_m_s8(__a, __b, __imm, __p)
-#define vshlcq_m_u8(__a, __b, __imm, __p) __arm_vshlcq_m_u8(__a, __b, __imm, __p)
-#define vshlcq_m_s16(__a, __b, __imm, __p) __arm_vshlcq_m_s16(__a, __b, __imm, __p)
-#define vshlcq_m_u16(__a, __b, __imm, __p) __arm_vshlcq_m_u16(__a, __b, __imm, __p)
-#define vshlcq_m_s32(__a, __b, __imm, __p) __arm_vshlcq_m_s32(__a, __b, __imm, __p)
-#define vshlcq_m_u32(__a, __b, __imm, __p) __arm_vshlcq_m_u32(__a, __b, __imm, __p)
#endif
/* For big-endian, GCC's vector indices are reversed within each 64 bits
@@ -502,60 +488,6 @@ __arm_vpnot (mve_pred16_t __a)
return __builtin_mve_vpnotv16bi (__a);
}
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_s8 (int8x16_t __a, uint32_t * __b, const int __imm)
-{
- int8x16_t __res = __builtin_mve_vshlcq_vec_sv16qi (__a, *__b, __imm);
- *__b = __builtin_mve_vshlcq_carry_sv16qi (__a, *__b, __imm);
- return __res;
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_u8 (uint8x16_t __a, uint32_t * __b, const int __imm)
-{
- uint8x16_t __res = __builtin_mve_vshlcq_vec_uv16qi (__a, *__b, __imm);
- *__b = __builtin_mve_vshlcq_carry_uv16qi (__a, *__b, __imm);
- return __res;
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_s16 (int16x8_t __a, uint32_t * __b, const int __imm)
-{
- int16x8_t __res = __builtin_mve_vshlcq_vec_sv8hi (__a, *__b, __imm);
- *__b = __builtin_mve_vshlcq_carry_sv8hi (__a, *__b, __imm);
- return __res;
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_u16 (uint16x8_t __a, uint32_t * __b, const int __imm)
-{
- uint16x8_t __res = __builtin_mve_vshlcq_vec_uv8hi (__a, *__b, __imm);
- *__b = __builtin_mve_vshlcq_carry_uv8hi (__a, *__b, __imm);
- return __res;
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_s32 (int32x4_t __a, uint32_t * __b, const int __imm)
-{
- int32x4_t __res = __builtin_mve_vshlcq_vec_sv4si (__a, *__b, __imm);
- *__b = __builtin_mve_vshlcq_carry_sv4si (__a, *__b, __imm);
- return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_u32 (uint32x4_t __a, uint32_t * __b, const int __imm)
-{
- uint32x4_t __res = __builtin_mve_vshlcq_vec_uv4si (__a, *__b, __imm);
- *__b = __builtin_mve_vshlcq_carry_uv4si (__a, *__b, __imm);
- return __res;
-}
-
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vstrbq_scatter_offset_s8 (int8_t * __base, uint8x16_t __offset, int8x16_t __value)
@@ -2404,60 +2336,6 @@ __arm_srshr (int32_t value, const int shift)
return __builtin_mve_srshr_si (value, shift);
}
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m_s8 (int8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
- int8x16_t __res = __builtin_mve_vshlcq_m_vec_sv16qi (__a, *__b, __imm, __p);
- *__b = __builtin_mve_vshlcq_m_carry_sv16qi (__a, *__b, __imm, __p);
- return __res;
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m_u8 (uint8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
- uint8x16_t __res = __builtin_mve_vshlcq_m_vec_uv16qi (__a, *__b, __imm, __p);
- *__b = __builtin_mve_vshlcq_m_carry_uv16qi (__a, *__b, __imm, __p);
- return __res;
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m_s16 (int16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
- int16x8_t __res = __builtin_mve_vshlcq_m_vec_sv8hi (__a, *__b, __imm, __p);
- *__b = __builtin_mve_vshlcq_m_carry_sv8hi (__a, *__b, __imm, __p);
- return __res;
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m_u16 (uint16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
- uint16x8_t __res = __builtin_mve_vshlcq_m_vec_uv8hi (__a, *__b, __imm, __p);
- *__b = __builtin_mve_vshlcq_m_carry_uv8hi (__a, *__b, __imm, __p);
- return __res;
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m_s32 (int32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
- int32x4_t __res = __builtin_mve_vshlcq_m_vec_sv4si (__a, *__b, __imm, __p);
- *__b = __builtin_mve_vshlcq_m_carry_sv4si (__a, *__b, __imm, __p);
- return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m_u32 (uint32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
- uint32x4_t __res = __builtin_mve_vshlcq_m_vec_uv4si (__a, *__b, __imm, __p);
- *__b = __builtin_mve_vshlcq_m_carry_uv4si (__a, *__b, __imm, __p);
- return __res;
-}
-
#if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */
__extension__ extern __inline void
@@ -2868,48 +2746,6 @@ __arm_vst4q (uint32_t * __addr, uint32x4x4_t __value)
__arm_vst4q_u32 (__addr, __value);
}
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq (int8x16_t __a, uint32_t * __b, const int __imm)
-{
- return __arm_vshlcq_s8 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq (uint8x16_t __a, uint32_t * __b, const int __imm)
-{
- return __arm_vshlcq_u8 (__a, __b, __imm);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq (int16x8_t __a, uint32_t * __b, const int __imm)
-{
- return __arm_vshlcq_s16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq (uint16x8_t __a, uint32_t * __b, const int __imm)
-{
- return __arm_vshlcq_u16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq (int32x4_t __a, uint32_t * __b, const int __imm)
-{
- return __arm_vshlcq_s32 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq (uint32x4_t __a, uint32_t * __b, const int __imm)
-{
- return __arm_vshlcq_u32 (__a, __b, __imm);
-}
-
__extension__ extern __inline void
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__arm_vstrbq_scatter_offset (int8_t * __base, uint8x16_t __offset, int8x16_t __value)
@@ -4240,48 +4076,6 @@ __arm_vgetq_lane (uint64x2_t __a, const int __idx)
return __arm_vgetq_lane_u64 (__a, __idx);
}
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m (int8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlcq_m_s8 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m (uint8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlcq_m_u8 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m (int16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlcq_m_s16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m (uint16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlcq_m_u16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m (int32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlcq_m_s32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m (uint32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlcq_m_u32 (__a, __b, __imm, __p);
-}
-
#if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */
__extension__ extern __inline void
@@ -4887,15 +4681,6 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8x4_t]: __arm_vst4q_f16 (__ARM_mve_coerce_f16_ptr(__p0, float16_t *), __ARM_mve_coerce(__p1, float16x8x4_t)), \
int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4x4_t]: __arm_vst4q_f32 (__ARM_mve_coerce_f32_ptr(__p0, float32_t *), __ARM_mve_coerce(__p1, float32x4x4_t)));})
-#define __arm_vshlcq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
- int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
- int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlcq_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
- int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlcq_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
- int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlcq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
- int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
- int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
-
#define __arm_vld1q_z(p0,p1) ( \
_Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), p1), \
@@ -5234,15 +5019,6 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8x4_t]: __arm_vst4q_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8x4_t)), \
int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4x4_t]: __arm_vst4q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4x4_t)));})
-#define __arm_vshlcq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
- int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
- int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlcq_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
- int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlcq_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
- int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlcq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
- int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
- int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
-
#define __arm_vstrwq_scatter_base(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
_Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_s32(p0, p1, __ARM_mve_coerce(__p2, int32x4_t)), \
@@ -5615,15 +5391,6 @@ extern void *__ARM_undef;
int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16(__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32(__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t)));})
-#define __arm_vshlcq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
- int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2, p3), \
- int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlcq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2, p3), \
- int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlcq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2, p3), \
- int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlcq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2, p3), \
- int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2, p3), \
- int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2, p3));})
-
#define __arm_vstrbq(p0,p1) ({ __typeof(p1) __p1 = (p1); \
_Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vstrbq_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t)), \
@@ -1719,7 +1719,7 @@ (define_expand "mve_vshlcq_carry_<supf><mode>"
DONE;
})
-(define_insn "mve_vshlcq_<supf><mode>"
+(define_insn "@mve_vshlcq_<supf><mode>"
[(set (match_operand:MVE_2 0 "s_register_operand" "=w")
(unspec:MVE_2 [(match_operand:MVE_2 2 "s_register_operand" "0")
(match_operand:SI 3 "s_register_operand" "1")
@@ -6279,7 +6279,7 @@ (define_expand "mve_vshlcq_m_carry_<supf><mode>"
DONE;
})
-(define_insn "mve_vshlcq_m_<supf><mode>"
+(define_insn "@mve_vshlcq_m_<supf><mode>"
[(set (match_operand:MVE_2 0 "s_register_operand" "=w")
(unspec:MVE_2 [(match_operand:MVE_2 2 "s_register_operand" "0")
(match_operand:SI 3 "s_register_operand" "1")