[1/1] AArch64: Add LUTI ACLE for SVE2

Message ID 20240710133414.741793-2-vladimir.miloserdov@arm.com
State New
Headers
Series AArch64: LUTI2/LUTI4 ACLE for SVE2 |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 fail Build failed
linaro-tcwg-bot/tcwg_gcc_check--master-arm success Test passed

Commit Message

vladimir.miloserdov@arm.com July 10, 2024, 1:34 p.m. UTC
  This patch introduces support for LUTI2/LUTI4 ACLE for SVE2.

LUTI instructions are used for efficient table lookups with 2-bit
or 4-bit indices. LUTI2 reads indexed 8-bit or 16-bit elements from
the low 128 bits of the table vector using packed 2-bit indices,
while LUTI4 can read from the low 128 or 256 bits of the table
vector or from two table vectors using packed 4-bit indices.
These instructions fill the destination vector by copying elements
indexed by segments of the source vector, selected by the vector
segment index.

The changes include the addition of a new AArch64 option
extension "lut", __ARM_FEATURE_LUT preprocessor macro, definitions
for the new LUTI instruction shapes, and implementations of the
svluti2 and svluti4 builtins.

New tests are added as well
---
 gcc/config/aarch64/aarch64-c.cc               |  1 +
 .../aarch64/aarch64-option-extensions.def     |  2 +
 .../aarch64/aarch64-sve-builtins-shapes.cc    | 41 +++++++++++++++++
 .../aarch64/aarch64-sve-builtins-shapes.h     |  2 +
 .../aarch64/aarch64-sve-builtins-sve2.cc      | 17 +++++++
 .../aarch64/aarch64-sve-builtins-sve2.def     |  4 ++
 .../aarch64/aarch64-sve-builtins-sve2.h       |  2 +
 gcc/config/aarch64/aarch64-sve2.md            | 45 +++++++++++++++++++
 gcc/config/aarch64/aarch64.h                  |  5 +++
 gcc/config/aarch64/iterators.md               | 10 +++++
 .../aarch64/sve/acle/asm/test_sve_acle.h      | 16 ++++++-
 .../aarch64/sve2/acle/asm/luti2_bf16.c        | 35 +++++++++++++++
 .../aarch64/sve2/acle/asm/luti2_f16.c         | 35 +++++++++++++++
 .../aarch64/sve2/acle/asm/luti2_s16.c         | 35 +++++++++++++++
 .../aarch64/sve2/acle/asm/luti2_s8.c          | 35 +++++++++++++++
 .../aarch64/sve2/acle/asm/luti2_u16.c         | 35 +++++++++++++++
 .../aarch64/sve2/acle/asm/luti2_u8.c          | 35 +++++++++++++++
 .../aarch64/sve2/acle/asm/luti4_bf16.c        | 35 +++++++++++++++
 .../aarch64/sve2/acle/asm/luti4_bf16_x2.c     | 15 +++++++
 .../aarch64/sve2/acle/asm/luti4_f16.c         | 35 +++++++++++++++
 .../aarch64/sve2/acle/asm/luti4_f16_x2.c      | 15 +++++++
 .../aarch64/sve2/acle/asm/luti4_s16.c         | 35 +++++++++++++++
 .../aarch64/sve2/acle/asm/luti4_s16_x2.c      | 15 +++++++
 .../aarch64/sve2/acle/asm/luti4_s8.c          | 25 +++++++++++
 .../aarch64/sve2/acle/asm/luti4_u16.c         | 35 +++++++++++++++
 .../aarch64/sve2/acle/asm/luti4_u16_x2.c      | 15 +++++++
 .../aarch64/sve2/acle/asm/luti4_u8.c          | 25 +++++++++++
 gcc/testsuite/lib/target-supports.exp         | 12 +++++
 28 files changed, 616 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_bf16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_f16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s8.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u8.c
  

Comments

Kyrylo Tkachov July 11, 2024, 7:18 a.m. UTC | #1
Hi Vladimir,

> On 10 Jul 2024, at 15:34, vladimir.miloserdov@arm.com wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> This patch introduces support for LUTI2/LUTI4 ACLE for SVE2.
> 
> LUTI instructions are used for efficient table lookups with 2-bit
> or 4-bit indices. LUTI2 reads indexed 8-bit or 16-bit elements from
> the low 128 bits of the table vector using packed 2-bit indices,
> while LUTI4 can read from the low 128 or 256 bits of the table
> vector or from two table vectors using packed 4-bit indices.
> These instructions fill the destination vector by copying elements
> indexed by segments of the source vector, selected by the vector
> segment index.
> 
> The changes include the addition of a new AArch64 option
> extension "lut", __ARM_FEATURE_LUT preprocessor macro, definitions
> for the new LUTI instruction shapes, and implementations of the
> svluti2 and svluti4 builtins.
> 
> New tests are added as well
> ---
> gcc/config/aarch64/aarch64-c.cc               |  1 +
> .../aarch64/aarch64-option-extensions.def     |  2 +
> .../aarch64/aarch64-sve-builtins-shapes.cc    | 41 +++++++++++++++++
> .../aarch64/aarch64-sve-builtins-shapes.h     |  2 +
> .../aarch64/aarch64-sve-builtins-sve2.cc      | 17 +++++++
> .../aarch64/aarch64-sve-builtins-sve2.def     |  4 ++
> .../aarch64/aarch64-sve-builtins-sve2.h       |  2 +
> gcc/config/aarch64/aarch64-sve2.md            | 45 +++++++++++++++++++
> gcc/config/aarch64/aarch64.h                  |  5 +++
> gcc/config/aarch64/iterators.md               | 10 +++++
> .../aarch64/sve/acle/asm/test_sve_acle.h      | 16 ++++++-
> .../aarch64/sve2/acle/asm/luti2_bf16.c        | 35 +++++++++++++++
> .../aarch64/sve2/acle/asm/luti2_f16.c         | 35 +++++++++++++++
> .../aarch64/sve2/acle/asm/luti2_s16.c         | 35 +++++++++++++++
> .../aarch64/sve2/acle/asm/luti2_s8.c          | 35 +++++++++++++++
> .../aarch64/sve2/acle/asm/luti2_u16.c         | 35 +++++++++++++++
> .../aarch64/sve2/acle/asm/luti2_u8.c          | 35 +++++++++++++++
> .../aarch64/sve2/acle/asm/luti4_bf16.c        | 35 +++++++++++++++
> .../aarch64/sve2/acle/asm/luti4_bf16_x2.c     | 15 +++++++
> .../aarch64/sve2/acle/asm/luti4_f16.c         | 35 +++++++++++++++
> .../aarch64/sve2/acle/asm/luti4_f16_x2.c      | 15 +++++++
> .../aarch64/sve2/acle/asm/luti4_s16.c         | 35 +++++++++++++++
> .../aarch64/sve2/acle/asm/luti4_s16_x2.c      | 15 +++++++
> .../aarch64/sve2/acle/asm/luti4_s8.c          | 25 +++++++++++
> .../aarch64/sve2/acle/asm/luti4_u16.c         | 35 +++++++++++++++
> .../aarch64/sve2/acle/asm/luti4_u16_x2.c      | 15 +++++++
> .../aarch64/sve2/acle/asm/luti4_u8.c          | 25 +++++++++++
> gcc/testsuite/lib/target-supports.exp         | 12 +++++
> 28 files changed, 616 insertions(+), 1 deletion(-)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_bf16.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_f16.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s16.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s8.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u16.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u8.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16_x2.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16_x2.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16_x2.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s8.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16_x2.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u8.c
> 

diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index 42ec0eec31e..840f52e08ed 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -232,6 +232,8 @@ AARCH64_OPT_EXTENSION("the", THE, (), (), (), "the")

AARCH64_OPT_EXTENSION("gcs", GCS, (), (), (), "gcs")

+AARCH64_OPT_EXTENSION("lut", LUT, (SVE2, SME2), (), (), "lut")
+

I think the LUT extension doesn’t require SME2, does it? It doesn’t seem to use any SME state. I don’t think +lut should be enabling +sme2 for the user

+;; -------------------------------------------------------------------------
+;; ---- Table lookup
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - LUTI2
+;; - LUTI4
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>"
+ [(set (match_operand:SVE_FULL_BS 0 "register_operand" "=w")
+ (unspec:SVE_FULL_BS
+ [(match_operand:SVE_FULL_BS 1 "register_operand" "w")
+ (match_operand:VNx16QI 2 "register_operand" "w")
+ (match_operand:DI 3 "const_int_operand")
+ (const_int LUTI_BITS)]
+ UNSPEC_SVE_LUTI))]
+ "TARGET_SVE2"
+ "luti<LUTI_BITS>\t%0.<Vetype>, { %1.<Vetype> }, %2[%3]"
+)


+
+(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>"
+ [(set (match_operand:<VSINGLE> 0 "register_operand")
+ (unspec:<VSINGLE>
+ [(match_operand:SVE_FULL_H 1 "aligned_register_operand" "w")
+ (match_operand:VNx16QI 2 "register_operand")
+ (match_operand:DI 3 "const_int_operand")
+ (const_int LUTI_BITS)]
+ UNSPEC_SVE_LUTI))]
+ "TARGET_SVE2"
+ "luti<LUTI_BITS>\t%0.<Vetype>, { %1.<Vetype> }, %2[%3]"
+)

Missing constraints on operands 0 and 3?

+
+(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>"
+ [(set (match_operand:<VSINGLE> 0 "register_operand")
+ (unspec:<VSINGLE>
+ [(match_operand:SVE_FULL_Hx2 1 "aligned_register_operand" "Uw2")
+ (match_operand:VNx16QI 2 "register_operand")
+ (match_operand:DI 3 "const_int_operand")
+ (const_int LUTI_BITS)]
+ UNSPEC_SVE_LUTI))]
+ "TARGET_SVE2"
+ "luti<LUTI_BITS>\t%0.<Vetype>, %1, %2[%3]"
+)

Likewise.

Thanks,
Kyrill
  
Kyrylo Tkachov July 11, 2024, 7:19 a.m. UTC | #2
> On 11 Jul 2024, at 09:18, Kyrylo Tkachov <ktkachov@nvidia.com> wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> Hi Vladimir,
> 
>> On 10 Jul 2024, at 15:34, vladimir.miloserdov@arm.com wrote:
>> 
>> External email: Use caution opening links or attachments
>> 
>> 
>> This patch introduces support for LUTI2/LUTI4 ACLE for SVE2.
>> 
>> LUTI instructions are used for efficient table lookups with 2-bit
>> or 4-bit indices. LUTI2 reads indexed 8-bit or 16-bit elements from
>> the low 128 bits of the table vector using packed 2-bit indices,
>> while LUTI4 can read from the low 128 or 256 bits of the table
>> vector or from two table vectors using packed 4-bit indices.
>> These instructions fill the destination vector by copying elements
>> indexed by segments of the source vector, selected by the vector
>> segment index.
>> 
>> The changes include the addition of a new AArch64 option
>> extension "lut", __ARM_FEATURE_LUT preprocessor macro, definitions
>> for the new LUTI instruction shapes, and implementations of the
>> svluti2 and svluti4 builtins.
>> 
>> New tests are added as well
>> ---
>> gcc/config/aarch64/aarch64-c.cc               |  1 +
>> .../aarch64/aarch64-option-extensions.def     |  2 +
>> .../aarch64/aarch64-sve-builtins-shapes.cc    | 41 +++++++++++++++++
>> .../aarch64/aarch64-sve-builtins-shapes.h     |  2 +
>> .../aarch64/aarch64-sve-builtins-sve2.cc      | 17 +++++++
>> .../aarch64/aarch64-sve-builtins-sve2.def     |  4 ++
>> .../aarch64/aarch64-sve-builtins-sve2.h       |  2 +
>> gcc/config/aarch64/aarch64-sve2.md            | 45 +++++++++++++++++++
>> gcc/config/aarch64/aarch64.h                  |  5 +++
>> gcc/config/aarch64/iterators.md               | 10 +++++
>> .../aarch64/sve/acle/asm/test_sve_acle.h      | 16 ++++++-
>> .../aarch64/sve2/acle/asm/luti2_bf16.c        | 35 +++++++++++++++
>> .../aarch64/sve2/acle/asm/luti2_f16.c         | 35 +++++++++++++++
>> .../aarch64/sve2/acle/asm/luti2_s16.c         | 35 +++++++++++++++
>> .../aarch64/sve2/acle/asm/luti2_s8.c          | 35 +++++++++++++++
>> .../aarch64/sve2/acle/asm/luti2_u16.c         | 35 +++++++++++++++
>> .../aarch64/sve2/acle/asm/luti2_u8.c          | 35 +++++++++++++++
>> .../aarch64/sve2/acle/asm/luti4_bf16.c        | 35 +++++++++++++++
>> .../aarch64/sve2/acle/asm/luti4_bf16_x2.c     | 15 +++++++
>> .../aarch64/sve2/acle/asm/luti4_f16.c         | 35 +++++++++++++++
>> .../aarch64/sve2/acle/asm/luti4_f16_x2.c      | 15 +++++++
>> .../aarch64/sve2/acle/asm/luti4_s16.c         | 35 +++++++++++++++
>> .../aarch64/sve2/acle/asm/luti4_s16_x2.c      | 15 +++++++
>> .../aarch64/sve2/acle/asm/luti4_s8.c          | 25 +++++++++++
>> .../aarch64/sve2/acle/asm/luti4_u16.c         | 35 +++++++++++++++
>> .../aarch64/sve2/acle/asm/luti4_u16_x2.c      | 15 +++++++
>> .../aarch64/sve2/acle/asm/luti4_u8.c          | 25 +++++++++++
>> gcc/testsuite/lib/target-supports.exp         | 12 +++++
>> 28 files changed, 616 insertions(+), 1 deletion(-)
>> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_bf16.c
>> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_f16.c
>> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s16.c
>> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s8.c
>> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u16.c
>> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u8.c
>> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16.c
>> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16_x2.c
>> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16.c
>> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16_x2.c
>> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16.c
>> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16_x2.c
>> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s8.c
>> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16.c
>> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16_x2.c
>> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u8.c
>> 
> 
> diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
> index 42ec0eec31e..840f52e08ed 100644
> --- a/gcc/config/aarch64/aarch64-option-extensions.def
> +++ b/gcc/config/aarch64/aarch64-option-extensions.def
> @@ -232,6 +232,8 @@ AARCH64_OPT_EXTENSION("the", THE, (), (), (), "the")
> 
> AARCH64_OPT_EXTENSION("gcs", GCS, (), (), (), "gcs")
> 
> +AARCH64_OPT_EXTENSION("lut", LUT, (SVE2, SME2), (), (), "lut")
> +
> 
> I think the LUT extension doesn’t require SME2, does it? It doesn’t seem to use any SME state. I don’t think +lut should be enabling +sme2 for the user
> 
> +;; -------------------------------------------------------------------------
> +;; ---- Table lookup
> +;; -------------------------------------------------------------------------
> +;; Includes:
> +;; - LUTI2
> +;; - LUTI4
> +;; -------------------------------------------------------------------------
> +
> +(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>"
> + [(set (match_operand:SVE_FULL_BS 0 "register_operand" "=w")
> + (unspec:SVE_FULL_BS
> + [(match_operand:SVE_FULL_BS 1 "register_operand" "w")
> + (match_operand:VNx16QI 2 "register_operand" "w")
> + (match_operand:DI 3 "const_int_operand")
> + (const_int LUTI_BITS)]
> + UNSPEC_SVE_LUTI))]
> + "TARGET_SVE2"
> + "luti<LUTI_BITS>\t%0.<Vetype>, { %1.<Vetype> }, %2[%3]"
> +)
> 
> 
> +
> +(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>"
> + [(set (match_operand:<VSINGLE> 0 "register_operand")
> + (unspec:<VSINGLE>
> + [(match_operand:SVE_FULL_H 1 "aligned_register_operand" "w")
> + (match_operand:VNx16QI 2 "register_operand")
> + (match_operand:DI 3 "const_int_operand")
> + (const_int LUTI_BITS)]
> + UNSPEC_SVE_LUTI))]
> + "TARGET_SVE2"
> + "luti<LUTI_BITS>\t%0.<Vetype>, { %1.<Vetype> }, %2[%3]"
> +)
> 
> Missing constraints on operands 0 and 3?

I meant operands 0 and 2, of course.

> 
> +
> +(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>"
> + [(set (match_operand:<VSINGLE> 0 "register_operand")
> + (unspec:<VSINGLE>
> + [(match_operand:SVE_FULL_Hx2 1 "aligned_register_operand" "Uw2")
> + (match_operand:VNx16QI 2 "register_operand")
> + (match_operand:DI 3 "const_int_operand")
> + (const_int LUTI_BITS)]
> + UNSPEC_SVE_LUTI))]
> + "TARGET_SVE2"
> + "luti<LUTI_BITS>\t%0.<Vetype>, %1, %2[%3]"
> +)
> 
> Likewise.
> 
> Thanks,
> Kyrill
  

Patch

diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index 6f2111434b3..099d9be8080 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -267,6 +267,7 @@  aarch64_update_cpp_builtins (cpp_reader *pfile)
   aarch64_def_or_undef (TARGET_SME_I16I64, "__ARM_FEATURE_SME_I16I64", pfile);
   aarch64_def_or_undef (TARGET_SME_F64F64, "__ARM_FEATURE_SME_F64F64", pfile);
   aarch64_def_or_undef (TARGET_SME2, "__ARM_FEATURE_SME2", pfile);
+  aarch64_def_or_undef (TARGET_LUT, "__ARM_FEATURE_LUT", pfile);
 
   /* Not for ACLE, but required to keep "float.h" correct if we switch
      target between implementations that do or do not support ARMv8.2-A
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index 42ec0eec31e..840f52e08ed 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -232,6 +232,8 @@  AARCH64_OPT_EXTENSION("the", THE, (), (), (), "the")
 
 AARCH64_OPT_EXTENSION("gcs", GCS, (), (), (), "gcs")
 
+AARCH64_OPT_EXTENSION("lut", LUT, (SVE2, SME2), (), (), "lut")
+
 #undef AARCH64_OPT_FMV_EXTENSION
 #undef AARCH64_OPT_EXTENSION
 #undef AARCH64_FMV_FEATURE
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
index f190770250f..6e9d65e9173 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
@@ -856,6 +856,47 @@  struct load_ext_gather_base : public overloaded_base<1>
   }
 };
 
+
+/* sv<v0>_t svlut_<t0>(sv<t0>_t, svuint8_t, uint64_t)
+   where the final argument is a constant index, the instruction divides
+   the vector argument in BITS-bit quantities.  */
+template<unsigned int BITS>
+struct luti_base : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    /* Format: return type, table vector, indices vector, immediate value.  */
+    build_all (b, "v0,t0,vu8,su64", group, MODE_none);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    int max_range;
+    bool byte_mode = c.type_suffix (0).element_bits == 8;
+
+    if (BITS == 2)
+      max_range = byte_mode ? 3 : 7;
+    else if (BITS == 4)
+      max_range = byte_mode ? 1 : 7;
+    else
+      /* Unsupported number of indices bits for LUTI.  */
+      gcc_unreachable ();
+
+    return c.require_immediate_range (2, 0, max_range);
+  }
+
+};
+
+/* Specializations for 2-bit and 4-bit indices.  */
+using luti2_def = luti_base<2>;
+SHAPE (luti2)
+
+using luti4_def = luti_base<4>;
+SHAPE (luti4)
+
+
 /* sv<t0>x<g>_t svfoo_t0_g(uint64_t, svuint8_t, uint64_t)
 
    where the first argument is the ZT register number (currently always 0)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
index ea87240518d..36cfb73e4ab 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
@@ -154,6 +154,8 @@  namespace aarch64_sve
     extern const function_shape *const load_gather_vs;
     extern const function_shape *const load_replicate;
     extern const function_shape *const load_za;
+    extern const function_shape *const luti2;
+    extern const function_shape *const luti4;
     extern const function_shape *const luti2_lane_zt;
     extern const function_shape *const luti4_lane_zt;
     extern const function_shape *const mmla;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
index 4f25cc68028..72bb909f259 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
@@ -518,6 +518,21 @@  public:
   int m_unspec;
 };
 
+
+class svluti_lane_impl : public function_base
+{
+public:
+  CONSTEXPR svluti_lane_impl (unsigned int bits) : m_bits (bits) {}
+
+  rtx expand (function_expander &e) const override
+  {
+    auto mode = e.tuple_mode (0);
+    return e.use_exact_insn (code_for_aarch64_sve_luti (m_bits, mode));
+  }
+
+  unsigned int m_bits;
+};
+
 } /* end anonymous namespace */
 
 namespace aarch64_sve {
@@ -746,5 +761,7 @@  FUNCTION (svwhilegt, while_comparison, (UNSPEC_WHILEGT, UNSPEC_WHILEHI))
 FUNCTION (svwhilerw, svwhilerw_svwhilewr_impl, (UNSPEC_WHILERW))
 FUNCTION (svwhilewr, svwhilerw_svwhilewr_impl, (UNSPEC_WHILEWR))
 FUNCTION (svxar, CODE_FOR_MODE0 (aarch64_sve2_xar),)
+FUNCTION (svluti2, svluti_lane_impl, (2))
+FUNCTION (svluti4, svluti_lane_impl, (4))
 
 } /* end namespace aarch64_sve */
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
index 4366925a971..86aa92dad2e 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
@@ -164,6 +164,10 @@  DEF_SVE_FUNCTION (svwhilegt, compare_scalar, while, none)
 DEF_SVE_FUNCTION (svwhilerw, compare_ptr, all_data, none)
 DEF_SVE_FUNCTION (svwhilewr, compare_ptr, all_data, none)
 DEF_SVE_FUNCTION (svxar, ternary_shift_right_imm, all_integer, none)
+DEF_SVE_FUNCTION (svluti2, luti2, bhs_data, none)
+DEF_SVE_FUNCTION (svluti4, luti4, bhs_data, none)
+DEF_SVE_FUNCTION_GS (svluti4, luti4, bhs_data, x2, none)
+
 #undef REQUIRED_EXTENSIONS
 
 #define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
index a612ace9415..897f686aca9 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
@@ -205,6 +205,8 @@  namespace aarch64_sve
     extern const function_base *const svwhilerw;
     extern const function_base *const svwhilewr;
     extern const function_base *const svxar;
+    extern const function_base *const svluti2;
+    extern const function_base *const svluti4;
   }
 }
 
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index 972b03a4fef..8c54986ca3b 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -114,6 +114,7 @@ 
 ;; ---- Optional AES extensions
 ;; ---- Optional SHA-3 extensions
 ;; ---- Optional SM4 extensions
+;; ---- Table lookup
 
 ;; =========================================================================
 ;; == Loads
@@ -3543,3 +3544,47 @@ 
   "sm4ekey\t%0.s, %1.s, %2.s"
   [(set_attr "type" "crypto_sm4")]
 )
+
+;; -------------------------------------------------------------------------
+;; ---- Table lookup
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - LUTI2
+;; - LUTI4
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>"
+  [(set (match_operand:SVE_FULL_BS 0 "register_operand" "=w")
+	(unspec:SVE_FULL_BS
+	  [(match_operand:SVE_FULL_BS 1 "register_operand" "w")
+	   (match_operand:VNx16QI 2 "register_operand" "w")
+	   (match_operand:DI 3 "const_int_operand")
+	   (const_int LUTI_BITS)]
+	  UNSPEC_SVE_LUTI))]
+  "TARGET_SVE2"
+  "luti<LUTI_BITS>\t%0.<Vetype>, { %1.<Vetype> }, %2[%3]"
+)
+
+(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>"
+  [(set (match_operand:<VSINGLE> 0 "register_operand")
+	(unspec:<VSINGLE>
+		[(match_operand:SVE_FULL_H 1 "aligned_register_operand" "w")
+		(match_operand:VNx16QI 2 "register_operand")
+		(match_operand:DI 3 "const_int_operand")
+		(const_int LUTI_BITS)]
+		UNSPEC_SVE_LUTI))]
+  "TARGET_SVE2"
+  "luti<LUTI_BITS>\t%0.<Vetype>, { %1.<Vetype> }, %2[%3]"
+)
+
+(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>"
+  [(set (match_operand:<VSINGLE> 0 "register_operand")
+	(unspec:<VSINGLE>
+		[(match_operand:SVE_FULL_Hx2 1 "aligned_register_operand" "Uw2")
+		(match_operand:VNx16QI 2 "register_operand")
+		(match_operand:DI 3 "const_int_operand")
+		(const_int LUTI_BITS)]
+		UNSPEC_SVE_LUTI))]
+  "TARGET_SVE2"
+  "luti<LUTI_BITS>\t%0.<Vetype>, %1, %2[%3]"
+)
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index c33f5da02f4..8542f01ec85 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -285,6 +285,7 @@  constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
 #define AARCH64_ISA_D128	   (aarch64_isa_flags & AARCH64_FL_D128)
 #define AARCH64_ISA_THE		   (aarch64_isa_flags & AARCH64_FL_THE)
 #define AARCH64_ISA_GCS		   (aarch64_isa_flags & AARCH64_FL_GCS)
+#define AARCH64_ISA_LUT		   (aarch64_isa_flags & AARCH64_FL_LUT)
 
 /* The current function is a normal non-streaming function.  */
 #define TARGET_NON_STREAMING (AARCH64_ISA_SM_OFF)
@@ -515,6 +516,10 @@  constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
 				 && (aarch64_tune_params.extra_tuning_flags \
 				     & AARCH64_EXTRA_TUNE_AVOID_PRED_RMW))
 
+/* Armv9.2-A/9.5-A Lookup table instructions support with 2-bit and
+    4-bit indices: LUTI2 and LUTI4.  */
+#define TARGET_LUT (AARCH64_ISA_LUT)
+
 /* Standard register usage.  */
 
 /* 31 64-bit general purpose registers R0-R30:
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index f527b2cfeb8..c55c1837a75 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -508,6 +508,15 @@ 
 (define_mode_iterator SVE_FULL_BHS [VNx16QI VNx8HI VNx4SI
 				    VNx8BF VNx8HF VNx4SF])
 
+;; Fully-packed SVE vector byte modes that have 32-bit or smaller elements.
+(define_mode_iterator SVE_FULL_BS [VNx16QI VNx4SI VNx4SF])
+
+;; Fully-packed half word SVE vector modes
+(define_mode_iterator SVE_FULL_H [VNx8HI VNx8HF VNx8BF])
+
+;; Pairs of fully-packed SVE vector modes (half word only)
+(define_mode_iterator SVE_FULL_Hx2 [VNx16HI VNx16HF VNx16BF])
+
 ;; Fully-packed SVE vector modes that have 32-bit elements.
 (define_mode_iterator SVE_FULL_S [VNx4SI VNx4SF])
 
@@ -1063,6 +1072,7 @@ 
     UNSPEC_SQCVTUN
     UNSPEC_UQCVT
     UNSPEC_UQCVTN
+    UNSPEC_SVE_LUTI
 
     ;; All used in aarch64-sme.md
     UNSPEC_SME_ADD
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h
index 367024be863..ea9081420ed 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h
@@ -755,5 +755,19 @@ 
     __asm volatile ("" :: "w" (z0_res), "w" (z22_res),		\
 		    "w" (z25));					\
   }
-
+ #define TEST_1X2_NARROW(NAME, RTYPE, TTYPE, ZTYPE, CODE1, CODE2) \
+    PROTO(NAME, void, ()) \
+    { \
+        register RTYPE z0 __asm ("z0"); \
+        register ZTYPE z5 __asm ("z5"); \
+        register TTYPE z6 __asm ("z6"); \
+        register RTYPE z16 __asm ("z16"); \
+        register ZTYPE z22 __asm ("z22"); \
+        register TTYPE z29 __asm ("z29"); \
+        register RTYPE z0_res __asm ("z0"); \
+        __asm volatile ("" : "=w" (z0), "=w" (z5), "=w" (z6), \
+                "=w" (z16), "=w" (z22), "=w" (z29)); \
+        INVOKE (CODE1, CODE2); \
+        __asm volatile ("" :: "w" (z0_res), "w" (z5), "w" (z22)); \
+    }
 #endif
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_bf16.c
new file mode 100644
index 00000000000..1764a7abd48
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_bf16.c
@@ -0,0 +1,35 @@ 
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti2_test_imm0:
+**	 luti2	z1\.h, \{ z28\.h \}, z0\[0\]
+**	 ret
+*/
+
+TEST_XN_SINGLE (luti2_test_imm0, svbfloat16_t, svuint8_t, z1,
+                svluti2_bf16 (z28, z0, 0),
+                svluti2_bf16 (z28, z0, 0))
+
+/*
+** luti2_test_imm1:
+**	 luti2	z1\.h, \{ z28\.h \}, z0\[1\]
+**	 ret
+*/
+
+TEST_XN_SINGLE (luti2_test_imm1, svbfloat16_t, svuint8_t, z1,
+                svluti2_bf16 (z28, z0, 1),
+                svluti2_bf16 (z28, z0, 1))
+
+/*
+** luti2_test_tied:
+**	 luti2	z28\.h, \{ z28\.h \}, z0\[2\]
+**	 ret
+*/
+
+TEST_XN_SINGLE (luti2_test_tied, svbfloat16_t, svuint8_t, z28,
+                svluti2_bf16 (z28, z0, 2),
+                svluti2_bf16 (z28, z0, 2))
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_f16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_f16.c
new file mode 100644
index 00000000000..c1f1d92a469
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_f16.c
@@ -0,0 +1,35 @@ 
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti2_test_imm0:
+**	 luti2	z1\.h, \{ z28\.h \}, z0\[0\]
+**	 ret
+*/
+
+TEST_XN_SINGLE (luti2_test_imm0, svfloat16_t, svuint8_t, z1,
+                svluti2_f16 (z28, z0, 0),
+                svluti2_f16 (z28, z0, 0))
+
+/*
+** luti2_test_imm1:
+**	 luti2	z1\.h, \{ z28\.h \}, z0\[1\]
+**	 ret
+*/
+
+TEST_XN_SINGLE (luti2_test_imm1, svfloat16_t, svuint8_t, z1,
+                svluti2_f16 (z28, z0, 1),
+                svluti2_f16 (z28, z0, 1))
+
+/*
+** luti2_test_tied:
+**	 luti2	z28\.h, \{ z28\.h \}, z0\[2\]
+**	 ret
+*/
+
+TEST_XN_SINGLE (luti2_test_tied, svfloat16_t, svuint8_t, z28,
+                svluti2_f16 (z28, z0, 2),
+                svluti2_f16 (z28, z0, 2))
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s16.c
new file mode 100644
index 00000000000..ffc92228fe0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s16.c
@@ -0,0 +1,35 @@ 
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti2_test_imm0:
+**	luti2	z1\.h, \{ z28\.h \}, z0\[0\]
+**	ret
+*/
+
+TEST_XN_SINGLE (luti2_test_imm0, svint16_t, svuint8_t, z1,
+                svluti2_s16 (z28, z0, 0),
+                svluti2_s16 (z28, z0, 0))
+
+/*
+** luti2_test_imm1:
+**	luti2	z1\.h, \{ z28\.h \}, z0\[1\]
+**	ret
+*/
+
+TEST_XN_SINGLE (luti2_test_imm1, svint16_t, svuint8_t, z1,
+                svluti2_s16 (z28, z0, 1),
+                svluti2_s16 (z28, z0, 1))
+
+/*
+** luti2_test_tied:
+**	luti2	z28\.h, \{ z28\.h \}, z0\[2\]
+**	ret
+*/
+
+TEST_XN_SINGLE (luti2_test_tied, svint16_t, svuint8_t, z28,
+                svluti2_s16 (z28, z0, 2),
+                svluti2_s16 (z28, z0, 2))
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s8.c
new file mode 100644
index 00000000000..189b5335692
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_s8.c
@@ -0,0 +1,35 @@ 
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti2_test_imm0:
+**	 luti2	z1\.b, \{ z28\.b \}, z0\[0\]
+**	 ret
+*/
+
+TEST_XN_SINGLE (luti2_test_imm0, svint8_t, svuint8_t, z1,
+                svluti2_s8 (z28, z0, 0),
+                svluti2_s8 (z28, z0, 0))
+
+/*
+** luti2_test_imm1:
+**	 luti2	z1\.b, \{ z28\.b \}, z0\[1\]
+**	 ret
+*/
+
+TEST_XN_SINGLE (luti2_test_imm1, svint8_t, svuint8_t, z1,
+                svluti2_s8 (z28, z0, 1),
+                svluti2_s8 (z28, z0, 1))
+
+/*
+** luti2_test_tied:
+**	 luti2	z28\.b, \{ z28\.b \}, z0\[2\]
+**	 ret
+*/
+
+TEST_XN_SINGLE (luti2_test_tied, svint8_t, svuint8_t, z28,
+                svluti2_s8 (z28, z0, 2),
+                svluti2_s8 (z28, z0, 2))
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u16.c
new file mode 100644
index 00000000000..682d848e4ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u16.c
@@ -0,0 +1,35 @@ 
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti2_test_imm0:
+**	luti2	z1\.h, \{ z28\.h \}, z0\[0\]
+**	ret
+*/
+
+TEST_XN_SINGLE (luti2_test_imm0, svuint16_t, svuint8_t, z1,
+                svluti2_u16 (z28, z0, 0),
+                svluti2_u16 (z28, z0, 0))
+
+/*
+** luti2_test_imm1:
+**	luti2	z1\.h, \{ z28\.h \}, z0\[1\]
+**	ret
+*/
+
+TEST_XN_SINGLE (luti2_test_imm1, svuint16_t, svuint8_t, z1,
+                svluti2_u16 (z28, z0, 1),
+                svluti2_u16 (z28, z0, 1))
+
+/*
+** luti2_test_tied:
+**	luti2	z28\.h, \{ z28\.h \}, z0\[2\]
+**	ret
+*/
+
+TEST_XN_SINGLE (luti2_test_tied, svuint16_t, svuint8_t, z28,
+                svluti2_u16 (z28, z0, 2),
+                svluti2_u16 (z28, z0, 2))
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u8.c
new file mode 100644
index 00000000000..65de112012c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti2_u8.c
@@ -0,0 +1,35 @@ 
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti2_test_imm0:
+**	 luti2	z1\.b, \{ z28\.b \}, z0\[0\]
+**	 ret
+*/
+
+TEST_XN_SINGLE (luti2_test_imm0, svuint8_t, svuint8_t, z1,
+                svluti2_u8 (z28, z0, 0),
+                svluti2_u8 (z28, z0, 0))
+
+/*
+** luti2_test_imm1:
+**	 luti2	z1\.b, \{ z28\.b \}, z0\[1\]
+**	 ret
+*/
+
+TEST_XN_SINGLE (luti2_test_imm1, svuint8_t, svuint8_t, z1,
+                svluti2_u8 (z28, z0, 1),
+                svluti2_u8 (z28, z0, 1))
+
+/*
+** luti2_test_tied:
+**	 luti2	z28\.b, \{ z28\.b \}, z0\[2\]
+**	 ret
+*/
+
+TEST_XN_SINGLE (luti2_test_tied, svuint8_t, svuint8_t, z28,
+                svluti2_u8 (z28, z0, 2),
+                svluti2_u8 (z28, z0, 2))
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16.c
new file mode 100644
index 00000000000..108fb3b7667
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16.c
@@ -0,0 +1,35 @@ 
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti4_test_imm0:
+**	 luti4	z1\.h, \{ z28\.h \}, z0\[0\]
+**	 ret
+*/
+
+TEST_XN_SINGLE (luti4_test_imm0, svbfloat16_t, svuint8_t, z1,
+                svluti4_bf16 (z28, z0, 0),
+                svluti4_bf16 (z28, z0, 0))
+
+/*
+** luti4_test_imm1:
+**	 luti4	z1\.h, \{ z28\.h \}, z0\[1\]
+**	 ret
+*/
+
+TEST_XN_SINGLE (luti4_test_imm1, svbfloat16_t, svuint8_t, z1,
+                svluti4_bf16 (z28, z0, 1),
+                svluti4_bf16 (z28, z0, 1))
+
+/*
+** luti4_test_tied:
+**	 luti4	z28\.h, \{ z28\.h \}, z0\[2\]
+**	 ret
+*/
+
+TEST_XN_SINGLE (luti4_test_tied, svbfloat16_t, svuint8_t, z28,
+                svluti4_bf16 (z28, z0, 2),
+                svluti4_bf16 (z28, z0, 2))
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16_x2.c
new file mode 100644
index 00000000000..4d72e8aa21b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_bf16_x2.c
@@ -0,0 +1,15 @@ 
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti4_test_bf16_x2:
+**	luti4	z0\.h, \{z6\.h \- z7\.h\}, z5\[1\]
+**	ret
+*/
+
+TEST_1X2_NARROW(luti4_test_bf16_x2, svbfloat16_t, svbfloat16x2_t, svuint8_t,
+                 z0_res = svluti4_bf16_x2(z6, z5, 1),
+                 z0_res = svluti4_bf16_x2(z6, z5, 1))
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16.c
new file mode 100644
index 00000000000..1af3836b28b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16.c
@@ -0,0 +1,35 @@ 
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti4_test_imm0:
+**	 luti4	z1\.h, \{ z28\.h \}, z0\[0\]
+**	 ret
+*/
+
+TEST_XN_SINGLE (luti4_test_imm0, svfloat16_t, svuint8_t, z1,
+                svluti4_f16 (z28, z0, 0),
+                svluti4_f16 (z28, z0, 0))
+
+/*
+** luti4_test_imm1:
+**	 luti4	z1\.h, \{ z28\.h \}, z0\[1\]
+**	 ret
+*/
+
+TEST_XN_SINGLE (luti4_test_imm1, svfloat16_t, svuint8_t, z1,
+                svluti4_f16 (z28, z0, 1),
+                svluti4_f16 (z28, z0, 1))
+
+/*
+** luti4_test_tied:
+**	 luti4	z28\.h, \{ z28\.h \}, z0\[2\]
+**	 ret
+*/
+
+TEST_XN_SINGLE (luti4_test_tied, svfloat16_t, svuint8_t, z28,
+                svluti4_f16 (z28, z0, 2),
+                svluti4_f16 (z28, z0, 2))
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16_x2.c
new file mode 100644
index 00000000000..7e322ebaad8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_f16_x2.c
@@ -0,0 +1,15 @@ 
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti4_test_f16_x2:
+**	luti4	z0\.h, \{z6\.h \- z7\.h\}, z5\[1\]
+**	ret
+*/
+
+TEST_1X2_NARROW(luti4_test_f16_x2, svfloat16_t, svfloat16x2_t, svuint8_t,
+                 z0_res = svluti4_f16_x2(z6, z5, 1),
+                 z0_res = svluti4_f16_x2(z6, z5, 1))
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16.c
new file mode 100644
index 00000000000..ef5ab5ce1a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16.c
@@ -0,0 +1,35 @@ 
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti4_test_imm0:
+**	luti4	z1\.h, \{ z28\.h \}, z0\[0\]
+**	ret
+*/
+
+TEST_XN_SINGLE (luti4_test_imm0, svint16_t, svuint8_t, z1,
+                svluti4_s16 (z28, z0, 0),
+                svluti4_s16 (z28, z0, 0))
+
+/*
+** luti4_test_imm1:
+**	luti4	z1\.h, \{ z28\.h \}, z0\[1\]
+**	ret
+*/
+
+TEST_XN_SINGLE (luti4_test_imm1, svint16_t, svuint8_t, z1,
+                svluti4_s16 (z28, z0, 1),
+                svluti4_s16 (z28, z0, 1))
+
+/*
+** luti4_test_tied:
+**	luti4	z28\.h, \{ z28\.h \}, z0\[2\]
+**	ret
+*/
+
+TEST_XN_SINGLE (luti4_test_tied, svint16_t, svuint8_t, z28,
+                svluti4_s16 (z28, z0, 2),
+                svluti4_s16 (z28, z0, 2))
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16_x2.c
new file mode 100644
index 00000000000..453db37194b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s16_x2.c
@@ -0,0 +1,15 @@ 
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti4_test_s16_x2:
+**	luti4	z0\.h, \{z6\.h \- z7\.h\}, z5\[1\]
+**	ret
+*/
+
+TEST_1X2_NARROW(luti4_test_s16_x2, svint16_t, svint16x2_t, svuint8_t,
+                 z0_res = svluti4_s16_x2(z6, z5, 1),
+                 z0_res = svluti4_s16_x2(z6, z5, 1))
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s8.c
new file mode 100644
index 00000000000..3603c3f0a43
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_s8.c
@@ -0,0 +1,25 @@ 
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti4_test_imm0:
+**	 luti4	z1\.b, \{ z28\.b \}, z0\[0\]
+**	 ret
+*/
+
+TEST_XN_SINGLE (luti4_test_imm0, svint8_t, svuint8_t, z1,
+                svluti4_s8 (z28, z0, 0),
+                svluti4_s8 (z28, z0, 0))
+
+/*
+** luti4_test_imm1:
+**	 luti4	z1\.b, \{ z28\.b \}, z0\[1\]
+**	 ret
+*/
+
+TEST_XN_SINGLE (luti4_test_imm1, svint8_t, svuint8_t, z1,
+                svluti4_s8 (z28, z0, 1),
+                svluti4_s8 (z28, z0, 1))
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16.c
new file mode 100644
index 00000000000..2b97290c2b2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16.c
@@ -0,0 +1,35 @@ 
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti4_test_imm0:
+**	luti4	z1\.h, \{ z28\.h \}, z0\[0\]
+**	ret
+*/
+
+TEST_XN_SINGLE (luti4_test_imm0, svuint16_t, svuint8_t, z1,
+                svluti4_u16 (z28, z0, 0),
+                svluti4_u16 (z28, z0, 0))
+
+/*
+** luti4_test_imm1:
+**	luti4	z1\.h, \{ z28\.h \}, z0\[1\]
+**	ret
+*/
+
+TEST_XN_SINGLE (luti4_test_imm1, svuint16_t, svuint8_t, z1,
+                svluti4_u16 (z28, z0, 1),
+                svluti4_u16 (z28, z0, 1))
+
+/*
+** luti4_test_tied:
+**	luti4	z28\.h, \{ z28\.h \}, z0\[2\]
+**	ret
+*/
+
+TEST_XN_SINGLE (luti4_test_tied, svuint16_t, svuint8_t, z28,
+                svluti4_u16 (z28, z0, 2),
+                svluti4_u16 (z28, z0, 2))
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16_x2.c
new file mode 100644
index 00000000000..4444aa91856
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u16_x2.c
@@ -0,0 +1,15 @@ 
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti4_test_u16_x2:
+**	luti4	z0\.h, \{z6\.h \- z7\.h\}, z5\[1\]
+**	ret
+*/
+
+TEST_1X2_NARROW(luti4_test_u16_x2, svuint16_t, svuint16x2_t, svuint8_t,
+                 z0_res = svluti4_u16_x2(z6, z5, 1),
+                 z0_res = svluti4_u16_x2(z6, z5, 1))
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u8.c
new file mode 100644
index 00000000000..012d0bb84b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/luti4_u8.c
@@ -0,0 +1,25 @@ 
+/* { dg-options "-march=armv9.4-a+sve2+lut" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** luti4_test_imm0:
+**	 luti4	z1\.b, \{ z28\.b \}, z0\[0\]
+**	 ret
+*/
+
+TEST_XN_SINGLE (luti4_test_imm0, svuint8_t, svuint8_t, z1,
+                svluti4_u8 (z28, z0, 0),
+                svluti4_u8 (z28, z0, 0))
+
+/*
+** luti4_test_imm1:
+**	 luti4	z1\.b, \{ z28\.b \}, z0\[1\]
+**	 ret
+*/
+
+TEST_XN_SINGLE (luti4_test_imm1, svuint8_t, svuint8_t, z1,
+                svluti4_u8 (z28, z0, 1),
+                svluti4_u8 (z28, z0, 1))
+
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index b7df6150bcb..bd532d56ff5 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4598,6 +4598,18 @@  proc check_effective_target_aarch64_sve2 { } {
     }]
 }
 
+# Return 1 if this is an AArch64 target supporting LUT (Lookup table)
+proc check_effective_target_aarch64_lut { } {
+    if { ![istarget aarch64*-*-*] || ![check_effective_target_aarch64_sve2] } {
+        return 0
+    }
+    return [check_no_compiler_messages aarch64_lut assembly {
+        #if !defined (__ARM_FEATURE_LUT)
+        #error FOO
+        #endif
+    }]
+}
+
 # Return 1 if this is an AArch64 target only supporting SVE (not SVE2).
 proc check_effective_target_aarch64_sve1_only { } {
     return [expr { [check_effective_target_aarch64_sve]