@@ -268,6 +268,8 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
aarch64_def_or_undef (TARGET_SVE_BF16,
"__ARM_FEATURE_SVE_BF16", pfile);
+ aarch64_def_or_undef (TARGET_LUT, "__ARM_FEATURE_LUT", pfile);
+
aarch64_def_or_undef (TARGET_FP8, "__ARM_FEATURE_FP8", pfile);
aarch64_def_or_undef (TARGET_FP8DOT2, "__ARM_FEATURE_FP8DOT2", pfile);
@@ -903,6 +903,50 @@ struct load_ext_gather_base : public overloaded_base<1>
}
};
+
+/* sv<v0>_t svlut[_<t0>_g](sv<t0>x<g>_t, svuint8_t, uint64_t)
+ where the final argument is a constant index, the instruction divides
+ the vector argument in BITS-bit quantities. */
+template<unsigned int BITS>
+struct luti_base : public overloaded_base<0>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ /* Format: return type, table vector, indices vector, immediate value. */
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "v0,t0,vu8,su64", group, MODE_none);
+ }
+
+ bool
+ check (function_checker &c) const override
+ {
+ auto max_range = c.type_suffix (0).element_bits / BITS - 1;
+ return c.require_immediate_range (2, 0, max_range);
+ }
+
+ tree
+ resolve (function_resolver &r) const override
+ {
+ sve_type type;
+ if (!r.check_num_arguments (3)
+ || !(type = r.infer_sve_type (0))
+ || !r.require_vector_type (1, VECTOR_TYPE_svuint8_t)
+ || !r.require_scalar_type (2, "uint64_t"))
+ return error_mark_node;
+
+ return r.resolve_to (r.mode_suffix_id, type);
+ }
+};
+
+/* Specializations for 2-bit and 4-bit indices. */
+using luti2_def = luti_base<2>;
+SHAPE (luti2)
+
+using luti4_def = luti_base<4>;
+SHAPE (luti4)
+
+
/* sv<t0>x<g>_t svfoo_t0_g(uint64_t, svuint8_t, uint64_t)
where the first argument is the ZT register number (currently always 0)
@@ -164,6 +164,8 @@ namespace aarch64_sve
extern const function_shape *const load_gather64_vs_offset;
extern const function_shape *const load_replicate;
extern const function_shape *const load_za;
+ extern const function_shape *const luti2;
+ extern const function_shape *const luti4;
extern const function_shape *const luti2_lane_zt;
extern const function_shape *const luti4_lane_zt;
extern const function_shape *const mmla;
@@ -914,6 +914,21 @@ public:
unsigned int m_base;
};
+class svluti_lane_impl : public function_base
+{
+public:
+ CONSTEXPR svluti_lane_impl (unsigned int bits) : m_bits (bits)
+ {}
+
+ rtx expand (function_expander &e) const override
+ {
+ auto mode = e.tuple_mode (0);
+ return e.use_exact_insn (code_for_aarch64_sve_luti (m_bits, mode));
+ }
+
+ unsigned int m_bits;
+};
+
} /* end anonymous namespace */
namespace aarch64_sve {
@@ -1205,5 +1220,7 @@ FUNCTION (svzip, multireg_permute, (UNSPEC_ZIP))
FUNCTION (svzipq, multireg_permute, (UNSPEC_ZIPQ))
FUNCTION (svzipq1, svzipq_impl, (0))
FUNCTION (svzipq2, svzipq_impl, (1))
+FUNCTION (svluti2_lane, svluti_lane_impl, (2))
+FUNCTION (svluti4_lane, svluti_lane_impl, (4))
} /* end namespace aarch64_sve */
@@ -336,6 +336,14 @@ DEF_SVE_FUNCTION (svamax, binary_opt_single_n, all_float, mxz)
DEF_SVE_FUNCTION (svamin, binary_opt_single_n, all_float, mxz)
#undef REQUIRED_EXTENSIONS
+#define REQUIRED_EXTENSIONS \
+ sve_and_sme (AARCH64_FL_SVE2 | AARCH64_FL_LUT, \
+ AARCH64_FL_SME2 | AARCH64_FL_LUT)
+DEF_SVE_FUNCTION (svluti2_lane, luti2, bh_data, none)
+DEF_SVE_FUNCTION (svluti4_lane, luti4, bh_data, none)
+DEF_SVE_FUNCTION_GS (svluti4_lane, luti4, bh_data, x2, none)
+#undef REQUIRED_EXTENSIONS
+
#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_F16F16)
DEF_SVE_FUNCTION_GS (svcvt, unary_convertxn, cvt_f32_f16, x2, none)
DEF_SVE_FUNCTION_GS (svcvtl, unary_convertxn, cvt_f32_f16, x2, none)
@@ -250,6 +250,8 @@ namespace aarch64_sve
extern const function_base *const svzipq;
extern const function_base *const svzipq1;
extern const function_base *const svzipq2;
+ extern const function_base *const svluti2_lane;
+ extern const function_base *const svluti4_lane;
}
}
@@ -283,7 +283,11 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
#define TYPES_bhs_integer(S, D) \
TYPES_bhs_signed (S, D), TYPES_bhs_unsigned (S, D)
-#define TYPES_bhs_data(S, D) \
+#define TYPES_bh_data(S, D) \
+ TYPES_b_data (S, D), \
+ TYPES_h_data (S, D)
+
+#define TYPES_bhs_data(S, D) \
TYPES_b_data (S, D), \
TYPES_h_data (S, D), \
TYPES_s_data (S, D)
@@ -782,6 +786,7 @@ DEF_SVE_TYPES_ARRAY (bs_unsigned);
DEF_SVE_TYPES_ARRAY (bhs_signed);
DEF_SVE_TYPES_ARRAY (bhs_unsigned);
DEF_SVE_TYPES_ARRAY (bhs_integer);
+DEF_SVE_TYPES_ARRAY (bh_data);
DEF_SVE_TYPES_ARRAY (bhs_data);
DEF_SVE_TYPES_ARRAY (bhs_widen);
DEF_SVE_TYPES_ARRAY (c);
@@ -133,6 +133,7 @@
;; ---- Optional AES extensions
;; ---- Optional SHA-3 extensions
;; ---- Optional SM4 extensions
+;; ---- Table lookup
;; =========================================================================
;; == Moves
@@ -4211,3 +4212,35 @@
"sm4ekey\t%0.s, %1.s, %2.s"
[(set_attr "type" "crypto_sm4")]
)
+
+;; -------------------------------------------------------------------------
+;; ---- Table lookup
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - LUTI2
+;; - LUTI4
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>"
+ [(set (match_operand:SVE_FULL_BH 0 "register_operand" "=w")
+ (unspec:SVE_FULL_BH
+ [(match_operand:SVE_FULL_BH 1 "register_operand" "w")
+ (match_operand:VNx16QI 2 "register_operand" "w")
+ (match_operand:DI 3 "const_int_operand")
+ (const_int LUTI_BITS)]
+ UNSPEC_SVE_LUTI))]
+ "TARGET_LUT && TARGET_SVE2_OR_SME2"
+ "luti<LUTI_BITS>\t%0.<Vetype>, { %1.<Vetype> }, %2[%3]"
+)
+
+(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>"
+ [(set (match_operand:<VSINGLE> 0 "register_operand" "=w")
+ (unspec:<VSINGLE>
+ [(match_operand:SVE_FULL_Hx2 1 "register_operand" "Uw2")
+ (match_operand:VNx16QI 2 "register_operand" "w")
+ (match_operand:DI 3 "const_int_operand")
+ (const_int LUTI_BITS)]
+ UNSPEC_SVE_LUTI))]
+ "TARGET_LUT && TARGET_SVE2_OR_SME2"
+ "luti<LUTI_BITS>\t%0.<Vetype>, %1, %2[%3]"
+)
@@ -553,6 +553,18 @@
(define_mode_iterator SVE_FULL_BHS [VNx16QI VNx8HI VNx4SI
VNx8BF VNx8HF VNx4SF])
+;; Fully-packed SVE vector byte modes that have 32-bit or smaller elements.
+(define_mode_iterator SVE_FULL_BS [VNx16QI VNx4SI VNx4SF])
+
+;; Fully-packed SVE vector byte modes that have 16-bit or smaller elements.
+(define_mode_iterator SVE_FULL_BH [VNx16QI VNx8HI VNx8HF VNx8BF])
+
+;; Fully-packed half word SVE vector modes
+(define_mode_iterator SVE_FULL_H [VNx8HI VNx8HF VNx8BF])
+
+;; Pairs of fully-packed SVE vector modes (half word only)
+(define_mode_iterator SVE_FULL_Hx2 [VNx16HI VNx16HF VNx16BF])
+
;; Fully-packed SVE vector modes that have 32-bit elements.
(define_mode_iterator SVE_FULL_S [VNx4SI VNx4SF])
@@ -1186,6 +1198,7 @@
UNSPEC_UZPQ2
UNSPEC_ZIPQ1
UNSPEC_ZIPQ2
+ UNSPEC_SVE_LUTI
;; All used in aarch64-sme.md
UNSPEC_SME_ADD
@@ -780,4 +780,20 @@
"w" (z16), "w" (z22), "w" (z29)); \
}
+#define TEST_1X2_NARROW(NAME, RTYPE, TTYPE, ZTYPE, CODE1, CODE2) \
+ PROTO(NAME, void, ()) \
+ { \
+ register RTYPE z0 __asm ("z0"); \
+ register ZTYPE z5 __asm ("z5"); \
+ register TTYPE z6 __asm ("z6"); \
+ register RTYPE z16 __asm ("z16"); \
+ register ZTYPE z22 __asm ("z22"); \
+ register TTYPE z29 __asm ("z29"); \
+ register RTYPE z0_res __asm ("z0"); \
+ __asm volatile ("" : "=w" (z0), "=w" (z5), "=w" (z6), \
+ "=w" (z16), "=w" (z22), "=w" (z29)); \
+ INVOKE (CODE1, CODE2); \
+ __asm volatile ("" :: "w" (z0_res), "w" (z5), "w" (z22)); \
+ }
+
#endif
new file mode 100644
@@ -0,0 +1,64 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target ("arch=armv9.2-a+sve2+lut")
+
+void
+test (svfloat16_t f16, svfloat32_t f32, svfloat64_t f64,
+ svfloat16x2_t f16x2, svfloat32x2_t f32x2, svfloat64x2_t f64x2,
+ svuint8_t u8, svuint16_t u16, svuint32_t u32, svuint64_t u64,
+ svuint8x2_t u8x2, svuint16x2_t u16x2,
+ svuint32x2_t u32x2, svuint64x2_t u64x2,
+ svint8_t s8, svint16_t s16, svint32_t s32, svint64_t s64,
+ svint8x2_t s8x2, svint16x2_t s16x2, svint32x2_t s32x2, svint64x2_t s64x2,
+ svbfloat16_t bf16, svbfloat16x2_t bf16x2)
+{
+ svluti2_lane (f16, u8, 0);
+ svluti2_lane (bf16, u8, 0);
+
+ svluti2_lane (f32, u8, 0); /* { dg-error {'svluti2_lane' has no form that takes 'svfloat32_t' arguments} } */
+ svluti2_lane (f64, u8, 0); /* { dg-error {'svluti2_lane' has no form that takes 'svfloat64_t' arguments} } */
+
+ svluti2_lane (u8, u8, 0);
+ svluti2_lane (u16, u8, 0);
+
+ svluti2_lane (u32, u8, 0); /* { dg-error {'svluti2_lane' has no form that takes 'svuint32_t' arguments} } */
+ svluti2_lane (u64, u8, 0); /* { dg-error {'svluti2_lane' has no form that takes 'svuint64_t' arguments} } */
+
+ svluti2_lane (s8, u8, 0);
+ svluti2_lane (s16, u8, 0);
+
+ svluti2_lane (s32, u8, 0); /* { dg-error {'svluti2_lane' has no form that takes 'svint32_t' arguments} } */
+ svluti2_lane (s64, u8, 0); /* { dg-error {'svluti2_lane' has no form that takes 'svint64_t' arguments} } */
+
+ svluti4_lane (f16, u8, 0);
+ svluti4_lane (bf16, u8, 0);
+ svluti4_lane_x2 (f16x2, u8, 0);
+ svluti4_lane_x2 (bf16x2, u8, 0);
+
+ svluti4_lane (f32, u8, 0); /* { dg-error {'svluti4_lane' has no form that takes 'svfloat32_t' arguments} } */
+ svluti4_lane (f64, u8, 0); /* { dg-error {'svluti4_lane' has no form that takes 'svfloat64_t' arguments} } */
+ svluti4_lane_x2 (f32x2, u8, 0); /* { dg-error {'svluti4_lane_x2' has no form that takes 'svfloat32x2_t' arguments} } */
+ svluti4_lane_x2 (f64x2, u8, 0); /* { dg-error {'svluti4_lane_x2' has no form that takes 'svfloat64x2_t' arguments} } */
+
+ svluti4_lane (u8, u8, 0);
+ svluti4_lane (u16, u8, 0);
+ svluti4_lane_x2 (u8x2, u8, 0);
+ svluti4_lane_x2 (u16x2, u8, 0);
+
+ svluti4_lane (u32, u8, 0); /* { dg-error {'svluti4_lane' has no form that takes 'svuint32_t' arguments} } */
+ svluti4_lane (u64, u8, 0); /* { dg-error {'svluti4_lane' has no form that takes 'svuint64_t' arguments} } */
+ svluti4_lane_x2 (u32x2, u8, 0); /* { dg-error {'svluti4_lane_x2' has no form that takes 'svuint32x2_t' arguments} } */
+ svluti4_lane_x2 (u64x2, u8, 0); /* { dg-error {'svluti4_lane_x2' has no form that takes 'svuint64x2_t' arguments} } */
+
+ svluti4_lane (s8, u8, 0);
+ svluti4_lane (s16, u8, 0);
+ svluti4_lane_x2 (s8x2, u8, 0);
+ svluti4_lane_x2 (s16x2, u8, 0);
+
+ svluti4_lane (s32, u8, 0); /* { dg-error {'svluti4_lane' has no form that takes 'svint32_t' arguments} } */
+ svluti4_lane (s64, u8, 0); /* { dg-error {'svluti4_lane' has no form that takes 'svint64_t' arguments} } */
+ svluti4_lane_x2 (s32x2, u8, 0); /* { dg-error {'svluti4_lane_x2' has no form that takes 'svint32x2_t' arguments} } */
+ svluti4_lane_x2 (s64x2, u8, 0); /* { dg-error {'svluti4_lane_x2' has no form that takes 'svint64x2_t' arguments} } */
+}
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target ("arch=armv9.2-a+sve2")
+
+void
+test (svfloat16_t f16, svuint8_t u8)
+{
+ svluti2_lane (f16, u8, 0); /* { dg-error {ACLE function 'svluti2_lane_f16' requires ISA extension 'lut'} } */
+}
new file mode 100644
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target ("arch=armv9.2-a+sve2+lut")
+
+void
+test (svfloat16_t f16, svfloat16x2_t f16x2,
+ svuint8_t u8, svuint16_t u16, svuint8x2_t u8x2, svuint16x2_t u16x2,
+ svint8_t s8, svint16_t s16, svint8x2_t s8x2, svint16x2_t s16x2,
+ svbfloat16_t bf16, svbfloat16x2_t bf16x2)
+{
+ svluti2_lane (f16, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti2_lane', which expects a value in the range \[0, 7\]} } */
+ svluti2_lane (f16, u8, 8); /* { dg-error {passing 8 to argument 3 of 'svluti2_lane', which expects a value in the range \[0, 7\]} } */
+
+ svluti2_lane (bf16, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti2_lane', which expects a value in the range \[0, 7\]} } */
+ svluti2_lane (bf16, u8, 8); /* { dg-error {passing 8 to argument 3 of 'svluti2_lane', which expects a value in the range \[0, 7\]} } */
+
+ svluti2_lane (u8, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti2_lane', which expects a value in the range \[0, 3\]} } */
+ svluti2_lane (u8, u8, 4); /* { dg-error {passing 4 to argument 3 of 'svluti2_lane', which expects a value in the range \[0, 3\]} } */
+ svluti2_lane (u16, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti2_lane', which expects a value in the range \[0, 7\]} } */
+ svluti2_lane (u16, u8, 8); /* { dg-error {passing 8 to argument 3 of 'svluti2_lane', which expects a value in the range \[0, 7\]} } */
+
+ svluti2_lane (s8, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti2_lane', which expects a value in the range \[0, 3\]} } */
+ svluti2_lane (s8, u8, 4); /* { dg-error {passing 4 to argument 3 of 'svluti2_lane', which expects a value in the range \[0, 3\]} } */
+ svluti2_lane (s16, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti2_lane', which expects a value in the range \[0, 7\]} } */
+ svluti2_lane (s16, u8, 8); /* { dg-error {passing 8 to argument 3 of 'svluti2_lane', which expects a value in the range \[0, 7\]} } */
+
+ svluti4_lane (f16, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti4_lane', which expects a value in the range \[0, 3\]} } */
+ svluti4_lane (f16, u8, 4); /* { dg-error {passing 4 to argument 3 of 'svluti4_lane', which expects a value in the range \[0, 3\]} } */
+ svluti4_lane_x2 (f16x2, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti4_lane_x2', which expects a value in the range \[0, 3\]} } */
+ svluti4_lane_x2 (f16x2, u8, 4); /* { dg-error {passing 4 to argument 3 of 'svluti4_lane_x2', which expects a value in the range \[0, 3\]} } */
+
+ svluti4_lane (bf16, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti4_lane', which expects a value in the range \[0, 3\]} } */
+ svluti4_lane (bf16, u8, 4); /* { dg-error {passing 4 to argument 3 of 'svluti4_lane', which expects a value in the range \[0, 3\]} } */
+ svluti4_lane_x2 (bf16x2, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti4_lane_x2', which expects a value in the range \[0, 3\]} } */
+ svluti4_lane_x2 (bf16x2, u8, 4); /* { dg-error {passing 4 to argument 3 of 'svluti4_lane_x2', which expects a value in the range \[0, 3\]} } */
+
+ svluti4_lane (u8, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti4_lane', which expects a value in the range \[0, 1\]} } */
+ svluti4_lane (u8, u8, 2); /* { dg-error {passing 2 to argument 3 of 'svluti4_lane', which expects a value in the range \[0, 1\]} } */
+ svluti4_lane (u16, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti4_lane', which expects a value in the range \[0, 3\]} } */
+ svluti4_lane (u16, u8, 4); /* { dg-error {passing 4 to argument 3 of 'svluti4_lane', which expects a value in the range \[0, 3\]} } */
+ svluti4_lane_x2 (u8x2, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti4_lane_x2', which expects a value in the range \[0, 1\]} } */
+ svluti4_lane_x2 (u8x2, u8, 2); /* { dg-error {passing 2 to argument 3 of 'svluti4_lane_x2', which expects a value in the range \[0, 1\]} } */
+ svluti4_lane_x2 (u16x2, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti4_lane_x2', which expects a value in the range \[0, 3\]} } */
+ svluti4_lane_x2 (u16x2, u8, 4); /* { dg-error {passing 4 to argument 3 of 'svluti4_lane_x2', which expects a value in the range \[0, 3\]} } */
+
+ svluti4_lane (s8, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti4_lane', which expects a value in the range \[0, 1\]} } */
+ svluti4_lane (s8, u8, 2); /* { dg-error {passing 2 to argument 3 of 'svluti4_lane', which expects a value in the range \[0, 1\]} } */
+ svluti4_lane (s16, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti4_lane', which expects a value in the range \[0, 3\]} } */
+ svluti4_lane (s16, u8, 4); /* { dg-error {passing 4 to argument 3 of 'svluti4_lane', which expects a value in the range \[0, 3\]} } */
+ svluti4_lane_x2 (s8x2, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti4_lane_x2', which expects a value in the range \[0, 1\]} } */
+ svluti4_lane_x2 (s8x2, u8, 2); /* { dg-error {passing 2 to argument 3 of 'svluti4_lane_x2', which expects a value in the range \[0, 1\]} } */
+ svluti4_lane_x2 (s16x2, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svluti4_lane_x2', which expects a value in the range \[0, 3\]} } */
+ svluti4_lane_x2 (s16x2, u8, 4); /* { dg-error {passing 4 to argument 3 of 'svluti4_lane_x2', which expects a value in the range \[0, 3\]} } */
+}
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-do assemble { target aarch64_asm_lut_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2+lut"
+#if STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** luti2_min_idx_test:
+** luti2 z1\.h, \{ z28\.h \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_min_idx_test, svbfloat16_t, svuint8_t, z1,
+ svluti2_lane_bf16 (z28, z0, 0),
+ svluti2_lane (z28, z0, 0))
+
+/*
+** luti2_max_idx_test:
+** luti2 z1\.h, \{ z28\.h \}, z0\[7\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_max_idx_test, svbfloat16_t, svuint8_t, z1,
+ svluti2_lane_bf16 (z28, z0, 7),
+ svluti2_lane (z28, z0, 7))
+
+/*
+** luti2_tied_min_idx_test:
+** luti2 z28\.h, \{ z28\.h \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_tied_min_idx_test, svbfloat16_t, svuint8_t, z28,
+ svluti2_lane_bf16 (z28, z0, 0),
+ svluti2_lane (z28, z0, 0))
+
+/*
+** luti2_tied_max_idx_test:
+** luti2 z28\.h, \{ z28\.h \}, z0\[7\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_tied_max_idx_test, svbfloat16_t, svuint8_t, z28,
+ svluti2_lane_bf16 (z28, z0, 7),
+ svluti2_lane (z28, z0, 7))
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-do assemble { target aarch64_asm_lut_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2+lut"
+#if STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** luti2_min_idx_test:
+** luti2 z1\.h, \{ z28\.h \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_min_idx_test, svfloat16_t, svuint8_t, z1,
+ svluti2_lane_f16 (z28, z0, 0),
+ svluti2_lane (z28, z0, 0))
+
+/*
+** luti2_max_idx_test:
+** luti2 z1\.h, \{ z28\.h \}, z0\[7\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_max_idx_test, svfloat16_t, svuint8_t, z1,
+ svluti2_lane_f16 (z28, z0, 7),
+ svluti2_lane (z28, z0, 7))
+
+/*
+** luti2_tied_min_idx_test:
+** luti2 z28\.h, \{ z28\.h \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_tied_min_idx_test, svfloat16_t, svuint8_t, z28,
+ svluti2_lane_f16 (z28, z0, 0),
+ svluti2_lane (z28, z0, 0))
+
+/*
+** luti2_tied_max_idx_test:
+** luti2 z28\.h, \{ z28\.h \}, z0\[7\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_tied_max_idx_test, svfloat16_t, svuint8_t, z28,
+ svluti2_lane_f16 (z28, z0, 7),
+ svluti2_lane (z28, z0, 7))
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-do assemble { target aarch64_asm_lut_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2+lut"
+#if STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** luti2_min_idx_test:
+** luti2 z1\.h, \{ z28\.h \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_min_idx_test, svint16_t, svuint8_t, z1,
+ svluti2_lane_s16 (z28, z0, 0),
+ svluti2_lane (z28, z0, 0))
+
+/*
+** luti2_max_idx_test:
+** luti2 z1\.h, \{ z28\.h \}, z0\[7\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_max_idx_test, svint16_t, svuint8_t, z1,
+ svluti2_lane_s16 (z28, z0, 7),
+ svluti2_lane (z28, z0, 7))
+
+/*
+** luti2_tied_min_idx_test:
+** luti2 z28\.h, \{ z28\.h \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_tied_min_idx_test, svint16_t, svuint8_t, z28,
+ svluti2_lane_s16 (z28, z0, 0),
+ svluti2_lane (z28, z0, 0))
+
+/*
+** luti2_tied_max_idx_test:
+** luti2 z28\.h, \{ z28\.h \}, z0\[7\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_tied_max_idx_test, svint16_t, svuint8_t, z28,
+ svluti2_lane_s16 (z28, z0, 7),
+ svluti2_lane (z28, z0, 7))
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-do assemble { target aarch64_asm_lut_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2+lut"
+#if STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** luti2_min_idx_test:
+** luti2 z1\.b, \{ z28\.b \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_min_idx_test, svint8_t, svuint8_t, z1,
+ svluti2_lane_s8 (z28, z0, 0),
+ svluti2_lane (z28, z0, 0))
+
+/*
+** luti2_max_idx_test:
+** luti2 z1\.b, \{ z28\.b \}, z0\[3\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_max_idx_test, svint8_t, svuint8_t, z1,
+ svluti2_lane_s8 (z28, z0, 3),
+ svluti2_lane (z28, z0, 3))
+
+/*
+** luti2_tied_min_idx_test:
+** luti2 z28\.b, \{ z28\.b \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_tied_min_idx_test, svint8_t, svuint8_t, z28,
+ svluti2_lane_s8 (z28, z0, 0),
+ svluti2_lane (z28, z0, 0))
+
+/*
+** luti2_tied_max_idx_test:
+** luti2 z28\.b, \{ z28\.b \}, z0\[3\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_tied_max_idx_test, svint8_t, svuint8_t, z28,
+ svluti2_lane_s8 (z28, z0, 3),
+ svluti2_lane (z28, z0, 3))
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-do assemble { target aarch64_asm_lut_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2+lut"
+#if STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** luti2_min_idx_test:
+** luti2 z1\.h, \{ z28\.h \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_min_idx_test, svuint16_t, svuint8_t, z1,
+ svluti2_lane_u16 (z28, z0, 0),
+ svluti2_lane (z28, z0, 0))
+
+/*
+** luti2_max_idx_test:
+** luti2 z1\.h, \{ z28\.h \}, z0\[7\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_max_idx_test, svuint16_t, svuint8_t, z1,
+ svluti2_lane_u16 (z28, z0, 7),
+ svluti2_lane (z28, z0, 7))
+
+/*
+** luti2_tied_min_idx_test:
+** luti2 z28\.h, \{ z28\.h \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_tied_min_idx_test, svuint16_t, svuint8_t, z28,
+ svluti2_lane_u16 (z28, z0, 0),
+ svluti2_lane (z28, z0, 0))
+
+/*
+** luti2_tied_max_idx_test:
+** luti2 z28\.h, \{ z28\.h \}, z0\[7\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_tied_max_idx_test, svuint16_t, svuint8_t, z28,
+ svluti2_lane_u16 (z28, z0, 7),
+ svluti2_lane (z28, z0, 7))
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-do assemble { target aarch64_asm_lut_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2+lut"
+#if STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** luti2_min_idx_test:
+** luti2 z1\.b, \{ z28\.b \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_min_idx_test, svuint8_t, svuint8_t, z1,
+ svluti2_lane_u8 (z28, z0, 0),
+ svluti2_lane (z28, z0, 0))
+
+/*
+** luti2_max_idx_test:
+** luti2 z1\.b, \{ z28\.b \}, z0\[3\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_max_idx_test, svuint8_t, svuint8_t, z1,
+ svluti2_lane_u8 (z28, z0, 3),
+ svluti2_lane (z28, z0, 3))
+
+/*
+** luti2_tied_min_idx_test:
+** luti2 z28\.b, \{ z28\.b \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_tied_min_idx_test, svuint8_t, svuint8_t, z28,
+ svluti2_lane_u8 (z28, z0, 0),
+ svluti2_lane (z28, z0, 0))
+
+/*
+** luti2_tied_max_idx_test:
+** luti2 z28\.b, \{ z28\.b \}, z0\[3\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_tied_max_idx_test, svuint8_t, svuint8_t, z28,
+ svluti2_lane_u8 (z28, z0, 3),
+ svluti2_lane (z28, z0, 3))
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-do assemble { target aarch64_asm_lut_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2+lut"
+#if STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** luti4_min_idx_test:
+** luti4 z1\.h, \{ z28\.h \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_min_idx_test, svbfloat16_t, svuint8_t, z1,
+ svluti4_lane_bf16 (z28, z0, 0),
+ svluti4_lane (z28, z0, 0))
+
+/*
+** luti4_max_idx_test:
+** luti4 z1\.h, \{ z28\.h \}, z0\[3\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_max_idx_test, svbfloat16_t, svuint8_t, z1,
+ svluti4_lane_bf16 (z28, z0, 3),
+ svluti4_lane (z28, z0, 3))
+
+/*
+** luti4_tied_min_idx_test:
+** luti4 z28\.h, \{ z28\.h \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_tied_min_idx_test, svbfloat16_t, svuint8_t, z28,
+ svluti4_lane_bf16 (z28, z0, 0),
+ svluti4_lane (z28, z0, 0))
+
+/*
+** luti4_tied_max_idx_test:
+** luti4 z28\.h, \{ z28\.h \}, z0\[3\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_tied_max_idx_test, svbfloat16_t, svuint8_t, z28,
+ svluti4_lane_bf16 (z28, z0, 3),
+ svluti4_lane (z28, z0, 3))
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-do assemble { target aarch64_asm_lut_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2+lut"
+#if STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** luti4_min_idx_test:
+** luti4 z0\.h, \{z6\.h \- z7\.h\}, z5\[0\]
+** ret
+*/
+
+TEST_1X2_NARROW(luti4_min_idx_test, svbfloat16_t, svbfloat16x2_t, svuint8_t,
+ z0_res = svluti4_lane_bf16_x2 (z6, z5, 0),
+ z0_res = svluti4_lane_x2 (z6, z5, 0))
+
+/*
+** luti4_max_idx_test:
+** luti4 z0\.h, \{z6\.h \- z7\.h\}, z5\[3\]
+** ret
+*/
+
+TEST_1X2_NARROW(luti4_max_idx_test, svbfloat16_t, svbfloat16x2_t, svuint8_t,
+ z0_res = svluti4_lane_bf16_x2 (z6, z5, 3),
+ z0_res = svluti4_lane_x2 (z6, z5, 3))
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-do assemble { target aarch64_asm_lut_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2+lut"
+#if STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** luti4_min_idx_test:
+** luti4 z1\.h, \{ z28\.h \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_min_idx_test, svfloat16_t, svuint8_t, z1,
+ svluti4_lane_f16 (z28, z0, 0),
+ svluti4_lane (z28, z0, 0))
+
+/*
+** luti4_max_idx_test:
+** luti4 z1\.h, \{ z28\.h \}, z0\[3\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_max_idx_test, svfloat16_t, svuint8_t, z1,
+ svluti4_lane_f16 (z28, z0, 3),
+ svluti4_lane (z28, z0, 3))
+
+/*
+** luti4_tied_min_idx_test:
+** luti4 z28\.h, \{ z28\.h \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_tied_min_idx_test, svfloat16_t, svuint8_t, z28,
+ svluti4_lane_f16 (z28, z0, 0),
+ svluti4_lane (z28, z0, 0))
+
+/*
+** luti4_tied_max_idx_test:
+** luti4 z28\.h, \{ z28\.h \}, z0\[3\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_tied_max_idx_test, svfloat16_t, svuint8_t, z28,
+ svluti4_lane_f16 (z28, z0, 3),
+ svluti4_lane (z28, z0, 3))
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-do assemble { target aarch64_asm_lut_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2+lut"
+#if STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** luti4_min_idx_test:
+** luti4 z0\.h, \{z6\.h \- z7\.h\}, z5\[0\]
+** ret
+*/
+
+TEST_1X2_NARROW(luti4_min_idx_test, svfloat16_t, svfloat16x2_t, svuint8_t,
+ z0_res = svluti4_lane_f16_x2 (z6, z5, 0),
+ z0_res = svluti4_lane_x2 (z6, z5, 0))
+
+/*
+** luti4_max_idx_test:
+** luti4 z0\.h, \{z6\.h \- z7\.h\}, z5\[3\]
+** ret
+*/
+
+TEST_1X2_NARROW(luti4_max_idx_test, svfloat16_t, svfloat16x2_t, svuint8_t,
+ z0_res = svluti4_lane_f16_x2 (z6, z5, 3),
+ z0_res = svluti4_lane_x2 (z6, z5, 3))
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-do assemble { target aarch64_asm_lut_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2+lut"
+#if STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** luti4_min_idx_test:
+** luti4 z1\.h, \{ z28\.h \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_min_idx_test, svint16_t, svuint8_t, z1,
+ svluti4_lane_s16 (z28, z0, 0),
+ svluti4_lane (z28, z0, 0))
+
+/*
+** luti4_max_idx_test:
+** luti4 z1\.h, \{ z28\.h \}, z0\[3\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_max_idx_test, svint16_t, svuint8_t, z1,
+ svluti4_lane_s16 (z28, z0, 3),
+ svluti4_lane (z28, z0, 3))
+
+/*
+** luti4_tied_min_idx_test:
+** luti4 z28\.h, \{ z28\.h \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_tied_min_idx_test, svint16_t, svuint8_t, z28,
+ svluti4_lane_s16 (z28, z0, 0),
+ svluti4_lane (z28, z0, 0))
+
+/*
+** luti4_tied_max_idx_test:
+** luti4 z28\.h, \{ z28\.h \}, z0\[3\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_tied_max_idx_test, svint16_t, svuint8_t, z28,
+ svluti4_lane_s16 (z28, z0, 3),
+ svluti4_lane (z28, z0, 3))
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-do assemble { target aarch64_asm_lut_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2+lut"
+#if STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** luti4_min_idx_test:
+** luti4 z0\.h, \{z6\.h \- z7\.h\}, z5\[0\]
+** ret
+*/
+
+TEST_1X2_NARROW(luti4_min_idx_test, svint16_t, svint16x2_t, svuint8_t,
+ z0_res = svluti4_lane_s16_x2 (z6, z5, 0),
+ z0_res = svluti4_lane_x2 (z6, z5, 0))
+
+/*
+** luti4_max_idx_test:
+** luti4 z0\.h, \{z6\.h \- z7\.h\}, z5\[3\]
+** ret
+*/
+
+TEST_1X2_NARROW(luti4_max_idx_test, svint16_t, svint16x2_t, svuint8_t,
+ z0_res = svluti4_lane_s16_x2 (z6, z5, 3),
+ z0_res = svluti4_lane_x2 (z6, z5, 3))
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-do assemble { target aarch64_asm_lut_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2+lut"
+#if STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** luti4_min_idx_test:
+** luti4 z1\.b, \{ z28\.b \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_min_idx_test, svint8_t, svuint8_t, z1,
+ svluti4_lane_s8 (z28, z0, 0),
+ svluti4_lane (z28, z0, 0))
+
+/*
+** luti4_max_idx_test:
+** luti4 z1\.b, \{ z28\.b \}, z0\[1\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_max_idx_test, svint8_t, svuint8_t, z1,
+ svluti4_lane_s8 (z28, z0, 1),
+ svluti4_lane (z28, z0, 1))
+
+/*
+** luti4_tied_min_idx_test:
+** luti4 z28\.b, \{ z28\.b \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_tied_min_idx_test, svint8_t, svuint8_t, z28,
+ svluti4_lane_s8 (z28, z0, 0),
+ svluti4_lane (z28, z0, 0))
+
+/*
+** luti4_tied_max_idx_test:
+** luti4 z28\.b, \{ z28\.b \}, z0\[1\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_tied_max_idx_test, svint8_t, svuint8_t, z28,
+ svluti4_lane_s8 (z28, z0, 1),
+ svluti4_lane (z28, z0, 1))
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-do assemble { target aarch64_asm_lut_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2+lut"
+#if STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** luti4_min_idx_test:
+** luti4 z1\.h, \{ z28\.h \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_min_idx_test, svuint16_t, svuint8_t, z1,
+ svluti4_lane_u16 (z28, z0, 0),
+ svluti4_lane (z28, z0, 0))
+
+/*
+** luti4_max_idx_test:
+** luti4 z1\.h, \{ z28\.h \}, z0\[3\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_max_idx_test, svuint16_t, svuint8_t, z1,
+ svluti4_lane_u16 (z28, z0, 3),
+ svluti4_lane (z28, z0, 3))
+
+/*
+** luti4_tied_min_idx_test:
+** luti4 z28\.h, \{ z28\.h \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_tied_min_idx_test, svuint16_t, svuint8_t, z28,
+ svluti4_lane_u16 (z28, z0, 0),
+ svluti4_lane (z28, z0, 0))
+
+/*
+** luti4_tied_max_idx_test:
+** luti4 z28\.h, \{ z28\.h \}, z0\[3\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_tied_max_idx_test, svuint16_t, svuint8_t, z28,
+ svluti4_lane_u16 (z28, z0, 3),
+ svluti4_lane (z28, z0, 3))
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-do assemble { target aarch64_asm_lut_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2+lut"
+#if STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** luti4_min_idx_test:
+** luti4 z0\.h, \{z6\.h \- z7\.h\}, z5\[0\]
+** ret
+*/
+
+TEST_1X2_NARROW(luti4_min_idx_test, svuint16_t, svuint16x2_t, svuint8_t,
+ z0_res = svluti4_lane_u16_x2 (z6, z5, 0),
+ z0_res = svluti4_lane_x2 (z6, z5, 0))
+
+/*
+** luti4_max_idx_test:
+** luti4 z0\.h, \{z6\.h \- z7\.h\}, z5\[3\]
+** ret
+*/
+
+TEST_1X2_NARROW(luti4_max_idx_test, svuint16_t, svuint16x2_t, svuint8_t,
+ z0_res = svluti4_lane_u16_x2 (z6, z5, 3),
+ z0_res = svluti4_lane_x2 (z6, z5, 3))
new file mode 100644
@@ -0,0 +1,50 @@
+/* { dg-do assemble { target aarch64_asm_lut_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_lut_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+#pragma GCC target "+sve2+lut"
+#if STREAMING_COMPATIBLE
+#pragma GCC target "+sme2"
+#endif
+
+/*
+** luti4_min_idx_test:
+** luti4 z1\.b, \{ z28\.b \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_min_idx_test, svuint8_t, svuint8_t, z1,
+ svluti4_lane_u8 (z28, z0, 0),
+ svluti4_lane (z28, z0, 0))
+
+/*
+** luti4_max_idx_test:
+** luti4 z1\.b, \{ z28\.b \}, z0\[1\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_max_idx_test, svuint8_t, svuint8_t, z1,
+ svluti4_lane_u8 (z28, z0, 1),
+ svluti4_lane (z28, z0, 1))
+
+/*
+** luti4_tied_min_idx_test:
+** luti4 z28\.b, \{ z28\.b \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_tied_min_idx_test, svuint8_t, svuint8_t, z28,
+ svluti4_lane_u8 (z28, z0, 0),
+ svluti4_lane (z28, z0, 0))
+
+/*
+** luti4_tied_max_idx_test:
+** luti4 z28\.b, \{ z28\.b \}, z0\[1\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_tied_max_idx_test, svuint8_t, svuint8_t, z28,
+ svluti4_lane_u8 (z28, z0, 1),
+ svluti4_lane (z28, z0, 1))
@@ -12304,7 +12304,7 @@ proc check_effective_target_aarch64_tiny { } {
foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve"
"i8mm" "f32mm" "f64mm" "bf16" "sb" "sve2" "ls64"
- "sme" "sme-i16i64" "sme2" "sve-b16b16"
+ "lut" "sme" "sme-i16i64" "sme2" "sve-b16b16"
"sme-b16b16" "sme-f16f16" "sme2p1" "fp8" "fp8fma"
"ssve-fp8fma" "fp8dot2" "ssve-fp8dot2" "fp8dot4"
"ssve-fp8dot4"} {