@@ -267,6 +267,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
aarch64_def_or_undef (TARGET_SME_I16I64, "__ARM_FEATURE_SME_I16I64", pfile);
aarch64_def_or_undef (TARGET_SME_F64F64, "__ARM_FEATURE_SME_F64F64", pfile);
aarch64_def_or_undef (TARGET_SME2, "__ARM_FEATURE_SME2", pfile);
+ aarch64_def_or_undef (TARGET_LUT, "__ARM_FEATURE_LUT", pfile);
/* Not for ACLE, but required to keep "float.h" correct if we switch
target between implementations that do or do not support ARMv8.2-A
@@ -232,6 +232,8 @@ AARCH64_OPT_EXTENSION("the", THE, (), (), (), "the")
AARCH64_OPT_EXTENSION("gcs", GCS, (), (), (), "gcs")
+AARCH64_OPT_EXTENSION("lut", LUT, (SVE2, SME2), (), (), "lut")
+
#undef AARCH64_OPT_FMV_EXTENSION
#undef AARCH64_OPT_EXTENSION
#undef AARCH64_FMV_FEATURE
@@ -856,6 +856,47 @@ struct load_ext_gather_base : public overloaded_base<1>
}
};
+
+/* sv<v0>_t svlut_<t0>(sv<t0>_t, svuint8_t, uint64_t)
+ where the final argument is a constant index, the instruction divides
+ the vector argument in BITS-bit quantities. */
+template<unsigned int BITS>
+struct luti_base : public nonoverloaded_base
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ /* Format: return type, table vector, indices vector, immediate value. */
+ build_all (b, "v0,t0,vu8,su64", group, MODE_none);
+ }
+
+ bool
+ check (function_checker &c) const override
+ {
+ int max_range;
+ bool byte_mode = c.type_suffix (0).element_bits == 8;
+
+ if (BITS == 2)
+ max_range = byte_mode ? 3 : 7;
+ else if (BITS == 4)
+ max_range = byte_mode ? 1 : 7;
+ else
+ /* Unsupported number of indices bits for LUTI. */
+ gcc_unreachable ();
+
+ return c.require_immediate_range (2, 0, max_range);
+ }
+
+};
+
+/* Specializations for 2-bit and 4-bit indices. */
+using luti2_def = luti_base<2>;
+SHAPE (luti2)
+
+using luti4_def = luti_base<4>;
+SHAPE (luti4)
+
+
/* sv<t0>x<g>_t svfoo_t0_g(uint64_t, svuint8_t, uint64_t)
where the first argument is the ZT register number (currently always 0)
@@ -154,6 +154,8 @@ namespace aarch64_sve
extern const function_shape *const load_gather_vs;
extern const function_shape *const load_replicate;
extern const function_shape *const load_za;
+ extern const function_shape *const luti2;
+ extern const function_shape *const luti4;
extern const function_shape *const luti2_lane_zt;
extern const function_shape *const luti4_lane_zt;
extern const function_shape *const mmla;
@@ -518,6 +518,21 @@ public:
int m_unspec;
};
+
+class svluti_lane_impl : public function_base
+{
+public:
+ CONSTEXPR svluti_lane_impl (unsigned int bits) : m_bits (bits) {}
+
+ rtx expand (function_expander &e) const override
+ {
+ auto mode = e.tuple_mode (0);
+ return e.use_exact_insn (code_for_aarch64_sve_luti (m_bits, mode));
+ }
+
+ unsigned int m_bits;
+};
+
} /* end anonymous namespace */
namespace aarch64_sve {
@@ -746,5 +761,7 @@ FUNCTION (svwhilegt, while_comparison, (UNSPEC_WHILEGT, UNSPEC_WHILEHI))
FUNCTION (svwhilerw, svwhilerw_svwhilewr_impl, (UNSPEC_WHILERW))
FUNCTION (svwhilewr, svwhilerw_svwhilewr_impl, (UNSPEC_WHILEWR))
FUNCTION (svxar, CODE_FOR_MODE0 (aarch64_sve2_xar),)
+FUNCTION (svluti2, svluti_lane_impl, (2))
+FUNCTION (svluti4, svluti_lane_impl, (4))
} /* end namespace aarch64_sve */
@@ -164,6 +164,10 @@ DEF_SVE_FUNCTION (svwhilegt, compare_scalar, while, none)
DEF_SVE_FUNCTION (svwhilerw, compare_ptr, all_data, none)
DEF_SVE_FUNCTION (svwhilewr, compare_ptr, all_data, none)
DEF_SVE_FUNCTION (svxar, ternary_shift_right_imm, all_integer, none)
+DEF_SVE_FUNCTION (svluti2, luti2, bhs_data, none)
+DEF_SVE_FUNCTION (svluti4, luti4, bhs_data, none)
+DEF_SVE_FUNCTION_GS (svluti4, luti4, bhs_data, x2, none)
+
#undef REQUIRED_EXTENSIONS
#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
@@ -205,6 +205,8 @@ namespace aarch64_sve
extern const function_base *const svwhilerw;
extern const function_base *const svwhilewr;
extern const function_base *const svxar;
+ extern const function_base *const svluti2;
+ extern const function_base *const svluti4;
}
}
@@ -114,6 +114,7 @@
;; ---- Optional AES extensions
;; ---- Optional SHA-3 extensions
;; ---- Optional SM4 extensions
+;; ---- Table lookup
;; =========================================================================
;; == Loads
@@ -3543,3 +3544,47 @@
"sm4ekey\t%0.s, %1.s, %2.s"
[(set_attr "type" "crypto_sm4")]
)
+
+;; -------------------------------------------------------------------------
+;; ---- Table lookup
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - LUTI2
+;; - LUTI4
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>"
+ [(set (match_operand:SVE_FULL_BS 0 "register_operand" "=w")
+ (unspec:SVE_FULL_BS
+ [(match_operand:SVE_FULL_BS 1 "register_operand" "w")
+ (match_operand:VNx16QI 2 "register_operand" "w")
+ (match_operand:DI 3 "const_int_operand")
+ (const_int LUTI_BITS)]
+ UNSPEC_SVE_LUTI))]
+ "TARGET_SVE2"
+ "luti<LUTI_BITS>\t%0.<Vetype>, { %1.<Vetype> }, %2[%3]"
+)
+
+(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>"
+ [(set (match_operand:<VSINGLE> 0 "register_operand")
+ (unspec:<VSINGLE>
+ [(match_operand:SVE_FULL_H 1 "aligned_register_operand" "w")
+ (match_operand:VNx16QI 2 "register_operand")
+ (match_operand:DI 3 "const_int_operand")
+ (const_int LUTI_BITS)]
+ UNSPEC_SVE_LUTI))]
+ "TARGET_SVE2"
+ "luti<LUTI_BITS>\t%0.<Vetype>, { %1.<Vetype> }, %2[%3]"
+)
+
+(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>"
+ [(set (match_operand:<VSINGLE> 0 "register_operand")
+ (unspec:<VSINGLE>
+ [(match_operand:SVE_FULL_Hx2 1 "aligned_register_operand" "Uw2")
+ (match_operand:VNx16QI 2 "register_operand")
+ (match_operand:DI 3 "const_int_operand")
+ (const_int LUTI_BITS)]
+ UNSPEC_SVE_LUTI))]
+ "TARGET_SVE2"
+ "luti<LUTI_BITS>\t%0.<Vetype>, %1, %2[%3]"
+)
@@ -285,6 +285,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
#define AARCH64_ISA_D128 (aarch64_isa_flags & AARCH64_FL_D128)
#define AARCH64_ISA_THE (aarch64_isa_flags & AARCH64_FL_THE)
#define AARCH64_ISA_GCS (aarch64_isa_flags & AARCH64_FL_GCS)
+#define AARCH64_ISA_LUT (aarch64_isa_flags & AARCH64_FL_LUT)
/* The current function is a normal non-streaming function. */
#define TARGET_NON_STREAMING (AARCH64_ISA_SM_OFF)
@@ -515,6 +516,10 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
&& (aarch64_tune_params.extra_tuning_flags \
& AARCH64_EXTRA_TUNE_AVOID_PRED_RMW))
+/* Armv9.2-A/9.5-A Lookup table instructions support with 2-bit and
+ 4-bit indices: LUTI2 and LUTI4. */
+#define TARGET_LUT (AARCH64_ISA_LUT)
+
/* Standard register usage. */
/* 31 64-bit general purpose registers R0-R30:
@@ -508,6 +508,15 @@
(define_mode_iterator SVE_FULL_BHS [VNx16QI VNx8HI VNx4SI
VNx8BF VNx8HF VNx4SF])
+;; Fully-packed SVE vector byte modes that have 32-bit or smaller elements.
+(define_mode_iterator SVE_FULL_BS [VNx16QI VNx4SI VNx4SF])
+
+;; Fully-packed half word SVE vector modes
+(define_mode_iterator SVE_FULL_H [VNx8HI VNx8HF VNx8BF])
+
+;; Pairs of fully-packed SVE vector modes (half word only)
+(define_mode_iterator SVE_FULL_Hx2 [VNx16HI VNx16HF VNx16BF])
+
;; Fully-packed SVE vector modes that have 32-bit elements.
(define_mode_iterator SVE_FULL_S [VNx4SI VNx4SF])
@@ -1063,6 +1072,7 @@
UNSPEC_SQCVTUN
UNSPEC_UQCVT
UNSPEC_UQCVTN
+ UNSPEC_SVE_LUTI
;; All used in aarch64-sme.md
UNSPEC_SME_ADD
@@ -755,5 +755,19 @@
__asm volatile ("" :: "w" (z0_res), "w" (z22_res), \
"w" (z25)); \
}
-
+ #define TEST_1X2_NARROW(NAME, RTYPE, TTYPE, ZTYPE, CODE1, CODE2) \
+ PROTO(NAME, void, ()) \
+ { \
+ register RTYPE z0 __asm ("z0"); \
+ register ZTYPE z5 __asm ("z5"); \
+ register TTYPE z6 __asm ("z6"); \
+ register RTYPE z16 __asm ("z16"); \
+ register ZTYPE z22 __asm ("z22"); \
+ register TTYPE z29 __asm ("z29"); \
+ register RTYPE z0_res __asm ("z0"); \
+ __asm volatile ("" : "=w" (z0), "=w" (z5), "=w" (z6), \
+ "=w" (z16), "=w" (z22), "=w" (z29)); \
+ INVOKE (CODE1, CODE2); \
+ __asm volatile ("" :: "w" (z0_res), "w" (z5), "w" (z22)); \
+ }
#endif
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti2_test_imm0:
+** luti2 z1\.h, \{ z28\.h \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_test_imm0, svbfloat16_t, svuint8_t, z1,
+ svluti2_bf16 (z28, z0, 0),
+ svluti2_bf16 (z28, z0, 0))
+
+/*
+** luti2_test_imm1:
+** luti2 z1\.h, \{ z28\.h \}, z0\[1\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_test_imm1, svbfloat16_t, svuint8_t, z1,
+ svluti2_bf16 (z28, z0, 1),
+ svluti2_bf16 (z28, z0, 1))
+
+/*
+** luti2_test_tied:
+** luti2 z28\.h, \{ z28\.h \}, z0\[2\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_test_tied, svbfloat16_t, svuint8_t, z28,
+ svluti2_bf16 (z28, z0, 2),
+ svluti2_bf16 (z28, z0, 2))
+
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti2_test_imm0:
+** luti2 z1\.h, \{ z28\.h \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_test_imm0, svfloat16_t, svuint8_t, z1,
+ svluti2_f16 (z28, z0, 0),
+ svluti2_f16 (z28, z0, 0))
+
+/*
+** luti2_test_imm1:
+** luti2 z1\.h, \{ z28\.h \}, z0\[1\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_test_imm1, svfloat16_t, svuint8_t, z1,
+ svluti2_f16 (z28, z0, 1),
+ svluti2_f16 (z28, z0, 1))
+
+/*
+** luti2_test_tied:
+** luti2 z28\.h, \{ z28\.h \}, z0\[2\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_test_tied, svfloat16_t, svuint8_t, z28,
+ svluti2_f16 (z28, z0, 2),
+ svluti2_f16 (z28, z0, 2))
+
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti2_test_imm0:
+** luti2 z1\.h, \{ z28\.h \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_test_imm0, svint16_t, svuint8_t, z1,
+ svluti2_s16 (z28, z0, 0),
+ svluti2_s16 (z28, z0, 0))
+
+/*
+** luti2_test_imm1:
+** luti2 z1\.h, \{ z28\.h \}, z0\[1\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_test_imm1, svint16_t, svuint8_t, z1,
+ svluti2_s16 (z28, z0, 1),
+ svluti2_s16 (z28, z0, 1))
+
+/*
+** luti2_test_tied:
+** luti2 z28\.h, \{ z28\.h \}, z0\[2\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_test_tied, svint16_t, svuint8_t, z28,
+ svluti2_s16 (z28, z0, 2),
+ svluti2_s16 (z28, z0, 2))
+
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti2_test_imm0:
+** luti2 z1\.b, \{ z28\.b \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_test_imm0, svint8_t, svuint8_t, z1,
+ svluti2_s8 (z28, z0, 0),
+ svluti2_s8 (z28, z0, 0))
+
+/*
+** luti2_test_imm1:
+** luti2 z1\.b, \{ z28\.b \}, z0\[1\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_test_imm1, svint8_t, svuint8_t, z1,
+ svluti2_s8 (z28, z0, 1),
+ svluti2_s8 (z28, z0, 1))
+
+/*
+** luti2_test_tied:
+** luti2 z28\.b, \{ z28\.b \}, z0\[2\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_test_tied, svint8_t, svuint8_t, z28,
+ svluti2_s8 (z28, z0, 2),
+ svluti2_s8 (z28, z0, 2))
+
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti2_test_imm0:
+** luti2 z1\.h, \{ z28\.h \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_test_imm0, svuint16_t, svuint8_t, z1,
+ svluti2_u16 (z28, z0, 0),
+ svluti2_u16 (z28, z0, 0))
+
+/*
+** luti2_test_imm1:
+** luti2 z1\.h, \{ z28\.h \}, z0\[1\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_test_imm1, svuint16_t, svuint8_t, z1,
+ svluti2_u16 (z28, z0, 1),
+ svluti2_u16 (z28, z0, 1))
+
+/*
+** luti2_test_tied:
+** luti2 z28\.h, \{ z28\.h \}, z0\[2\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_test_tied, svuint16_t, svuint8_t, z28,
+ svluti2_u16 (z28, z0, 2),
+ svluti2_u16 (z28, z0, 2))
+
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti2_test_imm0:
+** luti2 z1\.b, \{ z28\.b \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_test_imm0, svuint8_t, svuint8_t, z1,
+ svluti2_u8 (z28, z0, 0),
+ svluti2_u8 (z28, z0, 0))
+
+/*
+** luti2_test_imm1:
+** luti2 z1\.b, \{ z28\.b \}, z0\[1\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_test_imm1, svuint8_t, svuint8_t, z1,
+ svluti2_u8 (z28, z0, 1),
+ svluti2_u8 (z28, z0, 1))
+
+/*
+** luti2_test_tied:
+** luti2 z28\.b, \{ z28\.b \}, z0\[2\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti2_test_tied, svuint8_t, svuint8_t, z28,
+ svluti2_u8 (z28, z0, 2),
+ svluti2_u8 (z28, z0, 2))
+
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti4_test_imm0:
+** luti4 z1\.h, \{ z28\.h \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_test_imm0, svbfloat16_t, svuint8_t, z1,
+ svluti4_bf16 (z28, z0, 0),
+ svluti4_bf16 (z28, z0, 0))
+
+/*
+** luti4_test_imm1:
+** luti4 z1\.h, \{ z28\.h \}, z0\[1\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_test_imm1, svbfloat16_t, svuint8_t, z1,
+ svluti4_bf16 (z28, z0, 1),
+ svluti4_bf16 (z28, z0, 1))
+
+/*
+** luti4_test_tied:
+** luti4 z28\.h, \{ z28\.h \}, z0\[2\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_test_tied, svbfloat16_t, svuint8_t, z28,
+ svluti4_bf16 (z28, z0, 2),
+ svluti4_bf16 (z28, z0, 2))
+
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti4_test_bf16_x2:
+** luti4 z0\.h, \{z6\.h \- z7\.h\}, z5\[1\]
+** ret
+*/
+
+TEST_1X2_NARROW(luti4_test_bf16_x2, svbfloat16_t, svbfloat16x2_t, svuint8_t,
+ z0_res = svluti4_bf16_x2(z6, z5, 1),
+ z0_res = svluti4_bf16_x2(z6, z5, 1))
+
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti4_test_imm0:
+** luti4 z1\.h, \{ z28\.h \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_test_imm0, svfloat16_t, svuint8_t, z1,
+ svluti4_f16 (z28, z0, 0),
+ svluti4_f16 (z28, z0, 0))
+
+/*
+** luti4_test_imm1:
+** luti4 z1\.h, \{ z28\.h \}, z0\[1\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_test_imm1, svfloat16_t, svuint8_t, z1,
+ svluti4_f16 (z28, z0, 1),
+ svluti4_f16 (z28, z0, 1))
+
+/*
+** luti4_test_tied:
+** luti4 z28\.h, \{ z28\.h \}, z0\[2\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_test_tied, svfloat16_t, svuint8_t, z28,
+ svluti4_f16 (z28, z0, 2),
+ svluti4_f16 (z28, z0, 2))
+
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti4_test_f16_x2:
+** luti4 z0\.h, \{z6\.h \- z7\.h\}, z5\[1\]
+** ret
+*/
+
+TEST_1X2_NARROW(luti4_test_f16_x2, svfloat16_t, svfloat16x2_t, svuint8_t,
+ z0_res = svluti4_f16_x2(z6, z5, 1),
+ z0_res = svluti4_f16_x2(z6, z5, 1))
+
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti4_test_imm0:
+** luti4 z1\.h, \{ z28\.h \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_test_imm0, svint16_t, svuint8_t, z1,
+ svluti4_s16 (z28, z0, 0),
+ svluti4_s16 (z28, z0, 0))
+
+/*
+** luti4_test_imm1:
+** luti4 z1\.h, \{ z28\.h \}, z0\[1\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_test_imm1, svint16_t, svuint8_t, z1,
+ svluti4_s16 (z28, z0, 1),
+ svluti4_s16 (z28, z0, 1))
+
+/*
+** luti4_test_tied:
+** luti4 z28\.h, \{ z28\.h \}, z0\[2\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_test_tied, svint16_t, svuint8_t, z28,
+ svluti4_s16 (z28, z0, 2),
+ svluti4_s16 (z28, z0, 2))
+
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti4_test_s16_x2:
+** luti4 z0\.h, \{z6\.h \- z7\.h\}, z5\[1\]
+** ret
+*/
+
+TEST_1X2_NARROW(luti4_test_s16_x2, svint16_t, svint16x2_t, svuint8_t,
+ z0_res = svluti4_s16_x2(z6, z5, 1),
+ z0_res = svluti4_s16_x2(z6, z5, 1))
+
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti4_test_imm0:
+** luti4 z1\.b, \{ z28\.b \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_test_imm0, svint8_t, svuint8_t, z1,
+ svluti4_s8 (z28, z0, 0),
+ svluti4_s8 (z28, z0, 0))
+
+/*
+** luti4_test_imm1:
+** luti4 z1\.b, \{ z28\.b \}, z0\[1\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_test_imm1, svint8_t, svuint8_t, z1,
+ svluti4_s8 (z28, z0, 1),
+ svluti4_s8 (z28, z0, 1))
+
new file mode 100644
@@ -0,0 +1,35 @@
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti4_test_imm0:
+** luti4 z1\.h, \{ z28\.h \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_test_imm0, svuint16_t, svuint8_t, z1,
+ svluti4_u16 (z28, z0, 0),
+ svluti4_u16 (z28, z0, 0))
+
+/*
+** luti4_test_imm1:
+** luti4 z1\.h, \{ z28\.h \}, z0\[1\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_test_imm1, svuint16_t, svuint8_t, z1,
+ svluti4_u16 (z28, z0, 1),
+ svluti4_u16 (z28, z0, 1))
+
+/*
+** luti4_test_tied:
+** luti4 z28\.h, \{ z28\.h \}, z0\[2\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_test_tied, svuint16_t, svuint8_t, z28,
+ svluti4_u16 (z28, z0, 2),
+ svluti4_u16 (z28, z0, 2))
+
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti4_test_u16_x2:
+** luti4 z0\.h, \{z6\.h \- z7\.h\}, z5\[1\]
+** ret
+*/
+
+TEST_1X2_NARROW(luti4_test_u16_x2, svuint16_t, svuint16x2_t, svuint8_t,
+ z0_res = svluti4_u16_x2(z6, z5, 1),
+ z0_res = svluti4_u16_x2(z6, z5, 1))
+
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti4_test_imm0:
+** luti4 z1\.b, \{ z28\.b \}, z0\[0\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_test_imm0, svuint8_t, svuint8_t, z1,
+ svluti4_u8 (z28, z0, 0),
+ svluti4_u8 (z28, z0, 0))
+
+/*
+** luti4_test_imm1:
+** luti4 z1\.b, \{ z28\.b \}, z0\[1\]
+** ret
+*/
+
+TEST_XN_SINGLE (luti4_test_imm1, svuint8_t, svuint8_t, z1,
+ svluti4_u8 (z28, z0, 1),
+ svluti4_u8 (z28, z0, 1))
+
@@ -4598,6 +4598,18 @@ proc check_effective_target_aarch64_sve2 { } {
}]
}
+# Return 1 if this is an AArch64 target supporting LUT (Lookup table)
+proc check_effective_target_aarch64_lut { } {
+ if { ![istarget aarch64*-*-*] || ![check_effective_target_aarch64_sve2] } {
+ return 0
+ }
+ return [check_no_compiler_messages aarch64_lut assembly {
+ #if !defined (__ARM_FEATURE_LUT)
+ #error FOO
+ #endif
+ }]
+}
+
# Return 1 if this is an AArch64 target only supporting SVE (not SVE2).
proc check_effective_target_aarch64_sve1_only { } {
return [expr { [check_effective_target_aarch64_sve]