[v1,1/6] aarch64: Add support for sme2.1 luti2 and luti4 instructions.

Message ID 20240708153645.1134208-2-srinath.parvathaneni@arm.com
State New
Headers
Series Binutils] aarch64: Add support for sme2p1 instructions. |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 fail Test failed
linaro-tcwg-bot/tcwg_binutils_check--master-arm success Test passed

Commit Message

Srinath Parvathaneni July 8, 2024, 3:36 p.m. UTC
  This patch adds support for following sme2.1 luti2 and luti4 instructions, spec is
available here [1]

1. LUTI2 (two registers) strided.
2. LUTI2 (four registers) strided.
3. LUTI4 (two registers) strided.
4. LUTI4 (four registers) strided.

[1]: https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions?lang=en
---
 gas/testsuite/gas/aarch64/sme2p1-2-bad.d |  4 ++
 gas/testsuite/gas/aarch64/sme2p1-2-bad.l | 62 +++++++++++++++++
 gas/testsuite/gas/aarch64/sme2p1-2-bad.s | 48 +++++++++++++
 gas/testsuite/gas/aarch64/sme2p1-2.d     | 87 ++++++++++++++++++++++++
 gas/testsuite/gas/aarch64/sme2p1-2.s     | 87 ++++++++++++++++++++++++
 include/opcode/aarch64.h                 |  1 +
 opcodes/aarch64-asm.c                    |  4 ++
 opcodes/aarch64-dis.c                    |  6 ++
 opcodes/aarch64-tbl.h                    | 10 +++
 9 files changed, 309 insertions(+)
 create mode 100644 gas/testsuite/gas/aarch64/sme2p1-2-bad.d
 create mode 100644 gas/testsuite/gas/aarch64/sme2p1-2-bad.l
 create mode 100644 gas/testsuite/gas/aarch64/sme2p1-2-bad.s
 create mode 100644 gas/testsuite/gas/aarch64/sme2p1-2.d
 create mode 100644 gas/testsuite/gas/aarch64/sme2p1-2.s
  

Patch

diff --git a/gas/testsuite/gas/aarch64/sme2p1-2-bad.d b/gas/testsuite/gas/aarch64/sme2p1-2-bad.d
new file mode 100644
index 00000000000..f165f1f960a
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/sme2p1-2-bad.d
@@ -0,0 +1,4 @@ 
+#name: Negative test of SME2.1 luti2 and luti4 instructions.
+#as: -march=armv9.4-a+sme2p1
+#source: sme2p1-2-bad.s
+#error_output: sme2p1-2-bad.l
diff --git a/gas/testsuite/gas/aarch64/sme2p1-2-bad.l b/gas/testsuite/gas/aarch64/sme2p1-2-bad.l
new file mode 100644
index 00000000000..8fd4039f37e
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/sme2p1-2-bad.l
@@ -0,0 +1,62 @@ 
+.*: Assembler messages:
+.*: Error: the register list must have a stride of 1 or 8 at operand 1 -- `luti2 {z0.b,z7.b},zt0,z0\[0\]`
+.*: Error: the register list must have a stride of 1 at operand 1 -- `luti2 {Z0.s,Z8.s},ZT0,Z0\[0\]'
+.*: Error: operand mismatch -- `luti2 {z7.d,z15.d},zt0,z0\[0\]'
+.*: Info:    did you mean this\?
+.*: Info:    	luti2 {z7.b, z15.b}, zt0, z0\[0\]
+.*: Info:    other valid variant\(s\):
+.*: Info:    	luti2 {z7.h, z15.h}, zt0, z0\[0\]
+.*: Info:    	luti2 {z7.s, z15.s}, zt0, z0\[0\]
+.*: Error: operand 3 must be an indexed SVE vector register -- `luti2 {z16.b,z24.b},zt0,z0'
+.*: Error: register element index out of range 0 to 7 at operand 3 -- `luti2 {z23.b,z31.b},zt0,z0\[8\]'
+.*: Error: register element index out of range 0 to 7 at operand 3 -- `luti2 {z0.b,z8.b},zt0,z31\[15\]'
+.*: Error: unexpected register type at operand 2 -- `luti2 {z0.b,z8.b},z0\[7\]'
+.*: Error: the register list must have a stride of 1 or 8 at operand 1 -- `luti2 {z8.b,z24.b},zt0,z31\[0\]`
+.*: Error: invalid register list at operand 1 -- `luti2 {z24.b,z24.b},zt0,z0\[7\]'
+.*: Error: the register list must have a stride of 1 or 8 at operand 1 -- `luti2 {z4.h,z16.h},zt0,z20\[4\]`
+.*: Error: the register list must have a stride of 1 or 8 at operand 1 -- `luti2 {z20.h,z22.h},zt0,z12\[2\]`
+.*: Error: invalid register list at operand 1 -- `luti2 {z0.b,z3.b,z18.b,z1.b},zt0,z0\[0\]'
+.*: Error: the register list must have a stride of 1 at operand 1 -- `luti2 {Z0.s,Z4.s,Z8.s,Z12.s},ZT0,Z0\[0\]'
+.*: Error: operand mismatch -- `luti2 {z3.d,z7.d,z11.d,z15.d},zt0,z0\[0\]'
+.*: Info:    did you mean this\?
+.*: Info:    	luti2 {z3.b, z7.b, z11.b, z15.b}, zt0, z0\[0\]
+.*: Info:    other valid variant\(s\):
+.*: Info:    	luti2 {z3.h, z7.h, z11.h, z15.h}, zt0, z0\[0\]
+.*: Info:    	luti2 {z3.s, z7.s, z11.s, z15.s}, zt0, z0\[0\]
+.*: Error: operand 3 must be an indexed SVE vector register -- `luti2 {z16.h,z20.h,z24.h,z28.h},zt0,z0'
+.*: Error: register element index out of range 0 to 3 at operand 3 -- `luti2 {z19.h,z23.h,z27.h,z31.h},zt0,z0\[5\]'
+.*: Error: start register out of range at operand 1 -- `luti2 {z10.b,z14.b,z18.b,z22.b},zt0,z31\[0\]'
+.*: Error: invalid register list at operand 1 -- `luti2 {z20.b,z24.b,z28.b,z30.b},z0\[3\]'
+.*: Error: invalid register list at operand 1 -- `luti2 {z4.b,z7.b,z11.b,z18.b},zt0,z31\[0\]'
+.*: Error: type mismatch in vector register list at operand 1 -- `luti2 {z6.b,z0.s,z2.d,z28.h},zt0,z0\[7\]'
+.*: Error: the register list must have a stride of 1 or 8 at operand 1 -- `luti4 {z0.b,z7.b},zt0,z0\[0\]`
+.*: Error: the register list must have a stride of 1 at operand 1 -- `luti2 {Z0.s,Z8.s},ZT0,Z0\[0\]'
+.*: Error: operand mismatch -- `luti4 {z7.d,z15.d},zt0,z0\[0\]'
+.*: Info:    did you mean this\?
+.*: Info:    	luti4 {z7.b, z15.b}, zt0, z0\[0\]
+.*: Info:    other valid variant\(s\):
+.*: Info:    	luti4 {z7.h, z15.h}, zt0, z0\[0\]
+.*: Info:    	luti4 {z7.s, z15.s}, zt0, z0\[0\]
+.*: Error: missing braces at operand 3 -- `luti4 {z16.b,z24.b},zt0,z0'
+.*: Error: register element index out of range 0 to 3 at operand 3 -- `luti4 {z23.b,z31.b},zt0,z0\[8\]'
+.*: Error: register element index out of range 0 to 3 at operand 3 -- `luti4 {z0.b,z8.b},zt0,z31\[15\]'
+.*: Error: unexpected register type at operand 2 -- `luti4 {z0.b,z8.b},z0\[7\]'
+.*: Error: the register list must have a stride of 1 or 8 at operand 1 -- `luti4 {z8.b,z24.b},zt0,z31\[0\]`
+.*: Error: invalid register list at operand 1 -- `luti4 {z24.b,z24.b},zt0,z0\[7\]'
+.*: Error: the register list must have a stride of 1 or 8 at operand 1 -- `luti4 {z4.h,z16.h},zt0,z20\[4\]`
+.*: Error: the register list must have a stride of 1 or 8 at operand 1 -- `luti4 {z20.h,z22.h},zt0,z12\[2\]`
+.*: Error: the register list must have a stride of 1 at operand 1 -- `luti4 {z0.s,z4.s,z8.s,z12.s},zt0,z0\[0\]'
+.*: Error: invalid register list at operand 1 -- `luti4 {z0.b,z3.b,z18.b,z1.b},zt0,z0\[0\]'
+.*: Error: the register list must have a stride of 1 at operand 1 -- `luti2 {Z0.s,Z4.s,Z8.s,Z12.s},ZT0,Z0\[0\]'
+.*: Error: operand mismatch -- `luti4 {z3.d,z7.d,z11.d,z15.d},zt0,z0\[0\]'
+.*: Info:    did you mean this\?
+.*: Info:    	luti4 {z3.b, z7.b, z11.b, z15.b}, zt0, z0\[0\]
+.*: Info:    other valid variant\(s\):
+.*: Info:    	luti4 {z3.h, z7.h, z11.h, z15.h}, zt0, z0\[0\]
+.*: Info:    	luti4 {z3.s, z7.s, z11.s, z15.s}, zt0, z0\[0\]
+.*: Error: missing braces at operand 3 -- `luti4 {z16.h,z20.h,z24.h,z28.h},zt0,z0'
+.*: Error: register element index out of range 0 to 1 at operand 3 -- `luti4 {z19.h,z23.h,z27.h,z31.h},zt0,z0\[5\]'
+.*: Error: expected a list of 2 registers at operand 1 -- `luti4 {z10.b,z14.b,z18.b,z22.b},zt0,z31\[0\]'
+.*: Error: invalid register list at operand 1 -- `luti4 {z20.b,z24.b,z28.b,z30.b},z0\[3\]'
+.*: Error: invalid register list at operand 1 -- `luti4 {z4.b,z7.b,z11.b,z18.b},zt0,z31\[0\]'
+.*: Error: type mismatch in vector register list at operand 1 -- `luti4 {z6.b,z0.s,z2.d,z28.h},zt0,z0\[7\]'
diff --git a/gas/testsuite/gas/aarch64/sme2p1-2-bad.s b/gas/testsuite/gas/aarch64/sme2p1-2-bad.s
new file mode 100644
index 00000000000..52af11f4ead
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/sme2p1-2-bad.s
@@ -0,0 +1,48 @@ 
+/* LUTI2 (two registers) strided.  */
+luti2	{ z0.b , z7.b }, zt0, z0[0]
+LUTI2	{ Z0.s , Z8.s }, ZT0, Z0[0]
+luti2	{ z7.d , z15.d }, zt0, z0[0]
+luti2	{ z16.b , z24.b }, zt0, z0
+luti2	{ z23.b , z31.b }, zt0, z0[8]
+luti2	{ z0.b , z8.b }, zt0, z31[15]
+luti2	{ z0.b , z8.b }, z0[7]
+luti2	{ z8.b , z24.b }, zt0, z31[0]
+luti2	{ z24.b , z24.b }, zt0, z0[7]
+luti2	{ z4.h , z16.h }, zt0, z20[4]
+luti2	{ z20.h , z22.h }, zt0, z12[2]
+
+/* LUTI2 (four registers) strided.  */
+luti2	{ z0.b , z3.b , z18.b , z1.b }, zt0, z0[0]
+LUTI2	{ Z0.s , Z4.s, Z8.s , Z12.s }, ZT0, Z0[0]
+luti2	{ z3.d , z7.d, z11.d, z15.d }, zt0, z0[0]
+luti2	{ z16.h , z20.h , z24.h , z28.h }, zt0, z0
+luti2	{ z19.h , z23.h , z27.h , z31.h }, zt0, z0[5]
+luti2	{ z10.b , z14.b , z18.b , z22.b }, zt0, z31[0]
+luti2	{ z20.b , z24.b , z28.b , z30.b }, z0[3]
+luti2	{ z4.b , z7.b , z11.b , z18.b }, zt0, z31[0]
+luti2	{ z6.b , z0.s , z2.d , z28.h }, zt0, z0[7]
+
+/* LUTI4 (two registers) strided.  */
+luti4	{ z0.b , z7.b }, zt0, z0[0]
+LUTI2	{ Z0.s , Z8.s }, ZT0, Z0[0]
+luti4	{ z7.d , z15.d }, zt0, z0[0]
+luti4	{ z16.b , z24.b }, zt0, z0
+luti4	{ z23.b , z31.b }, zt0, z0[8]
+luti4	{ z0.b , z8.b }, zt0, z31[15]
+luti4	{ z0.b , z8.b }, z0[7]
+luti4	{ z8.b , z24.b }, zt0, z31[0]
+luti4	{ z24.b , z24.b }, zt0, z0[7]
+luti4	{ z4.h , z16.h }, zt0, z20[4]
+luti4	{ z20.h , z22.h }, zt0, z12[2]
+
+/* LUTI4 (four registers) strided.  */
+luti4	{ z0.s , z4.s , z8.s , z12.s }, zt0, z0[0]
+luti4	{ z0.b , z3.b , z18.b , z1.b }, zt0, z0[0]
+LUTI2	{ Z0.s , Z4.s, Z8.s , Z12.s }, ZT0, Z0[0]
+luti4	{ z3.d , z7.d, z11.d, z15.d }, zt0, z0[0]
+luti4	{ z16.h , z20.h , z24.h , z28.h }, zt0, z0
+luti4	{ z19.h , z23.h , z27.h , z31.h }, zt0, z0[5]
+luti4	{ z10.b , z14.b , z18.b , z22.b }, zt0, z31[0]
+luti4	{ z20.b , z24.b , z28.b , z30.b }, z0[3]
+luti4	{ z4.b , z7.b , z11.b , z18.b }, zt0, z31[0]
+luti4	{ z6.b , z0.s , z2.d , z28.h }, zt0, z0[7]
diff --git a/gas/testsuite/gas/aarch64/sme2p1-2.d b/gas/testsuite/gas/aarch64/sme2p1-2.d
new file mode 100644
index 00000000000..8be6db0d34f
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/sme2p1-2.d
@@ -0,0 +1,87 @@ 
+#name: Test of SME2.1 luti2 and luti4 instructions.
+#as: -march=armv9.4-a+sme2p1
+#objdump: -dr
+
+[^:]+:     file format .*
+
+
+[^:]+:
+
+[^:]+:
+.*:	c09c4000 	luti2	{z0.b, z8.b}, zt0, z0\[0\]
+.*:	c09c4000 	luti2	{z0.b, z8.b}, zt0, z0\[0\]
+.*:	c09c4007 	luti2	{z7.b, z15.b}, zt0, z0\[0\]
+.*:	c09c4010 	luti2	{z16.b, z24.b}, zt0, z0\[0\]
+.*:	c09c4017 	luti2	{z23.b, z31.b}, zt0, z0\[0\]
+.*:	c09c43e0 	luti2	{z0.b, z8.b}, zt0, z31\[0\]
+.*:	c09fc000 	luti2	{z0.b, z8.b}, zt0, z0\[7\]
+.*:	c09c43f0 	luti2	{z16.b, z24.b}, zt0, z31\[0\]
+.*:	c09fc010 	luti2	{z16.b, z24.b}, zt0, z0\[7\]
+.*:	c09e4284 	luti2	{z4.b, z12.b}, zt0, z20\[4\]
+.*:	c09d4194 	luti2	{z20.b, z28.b}, zt0, z12\[2\]
+.*:	c09c5000 	luti2	{z0.h, z8.h}, zt0, z0\[0\]
+.*:	c09c5000 	luti2	{z0.h, z8.h}, zt0, z0\[0\]
+.*:	c09c5007 	luti2	{z7.h, z15.h}, zt0, z0\[0\]
+.*:	c09c5010 	luti2	{z16.h, z24.h}, zt0, z0\[0\]
+.*:	c09c5017 	luti2	{z23.h, z31.h}, zt0, z0\[0\]
+.*:	c09c53e0 	luti2	{z0.h, z8.h}, zt0, z31\[0\]
+.*:	c09fd000 	luti2	{z0.h, z8.h}, zt0, z0\[7\]
+.*:	c09c53f0 	luti2	{z16.h, z24.h}, zt0, z31\[0\]
+.*:	c09fd010 	luti2	{z16.h, z24.h}, zt0, z0\[7\]
+.*:	c09e5284 	luti2	{z4.h, z12.h}, zt0, z20\[4\]
+.*:	c09d5194 	luti2	{z20.h, z28.h}, zt0, z12\[2\]
+.*:	c09c8000 	luti2	{z0.b, z4.b, z8.b, z12.b}, zt0, z0\[0\]
+.*:	c09c8000 	luti2	{z0.b, z4.b, z8.b, z12.b}, zt0, z0\[0\]
+.*:	c09c8003 	luti2	{z3.b, z7.b, z11.b, z15.b}, zt0, z0\[0\]
+.*:	c09c8010 	luti2	{z16.b, z20.b, z24.b, z28.b}, zt0, z0\[0\]
+.*:	c09c8013 	luti2	{z19.b, z23.b, z27.b, z31.b}, zt0, z0\[0\]
+.*:	c09c83e0 	luti2	{z0.b, z4.b, z8.b, z12.b}, zt0, z31\[0\]
+.*:	c09f8000 	luti2	{z0.b, z4.b, z8.b, z12.b}, zt0, z0\[3\]
+.*:	c09c83f0 	luti2	{z16.b, z20.b, z24.b, z28.b}, zt0, z31\[0\]
+.*:	c09f8010 	luti2	{z16.b, z20.b, z24.b, z28.b}, zt0, z0\[3\]
+.*:	c09d8282 	luti2	{z2.b, z6.b, z10.b, z14.b}, zt0, z20\[1\]
+.*:	c09e8151 	luti2	{z17.b, z21.b, z25.b, z29.b}, zt0, z10\[2\]
+.*:	c09c9000 	luti2	{z0.h, z4.h, z8.h, z12.h}, zt0, z0\[0\]
+.*:	c09c9000 	luti2	{z0.h, z4.h, z8.h, z12.h}, zt0, z0\[0\]
+.*:	c09c9003 	luti2	{z3.h, z7.h, z11.h, z15.h}, zt0, z0\[0\]
+.*:	c09c9010 	luti2	{z16.h, z20.h, z24.h, z28.h}, zt0, z0\[0\]
+.*:	c09c9013 	luti2	{z19.h, z23.h, z27.h, z31.h}, zt0, z0\[0\]
+.*:	c09c93e0 	luti2	{z0.h, z4.h, z8.h, z12.h}, zt0, z31\[0\]
+.*:	c09f9000 	luti2	{z0.h, z4.h, z8.h, z12.h}, zt0, z0\[3\]
+.*:	c09c93f0 	luti2	{z16.h, z20.h, z24.h, z28.h}, zt0, z31\[0\]
+.*:	c09f9010 	luti2	{z16.h, z20.h, z24.h, z28.h}, zt0, z0\[3\]
+.*:	c09d9282 	luti2	{z2.h, z6.h, z10.h, z14.h}, zt0, z20\[1\]
+.*:	c09e9151 	luti2	{z17.h, z21.h, z25.h, z29.h}, zt0, z10\[2\]
+.*:	c09a4000 	luti4	{z0.b, z8.b}, zt0, z0\[0\]
+.*:	c09a4000 	luti4	{z0.b, z8.b}, zt0, z0\[0\]
+.*:	c09a4007 	luti4	{z7.b, z15.b}, zt0, z0\[0\]
+.*:	c09a4010 	luti4	{z16.b, z24.b}, zt0, z0\[0\]
+.*:	c09a4017 	luti4	{z23.b, z31.b}, zt0, z0\[0\]
+.*:	c09a43e0 	luti4	{z0.b, z8.b}, zt0, z31\[0\]
+.*:	c09bc000 	luti4	{z0.b, z8.b}, zt0, z0\[3\]
+.*:	c09a43f0 	luti4	{z16.b, z24.b}, zt0, z31\[0\]
+.*:	c09bc010 	luti4	{z16.b, z24.b}, zt0, z0\[3\]
+.*:	c09ac284 	luti4	{z4.b, z12.b}, zt0, z20\[1\]
+.*:	c09b4194 	luti4	{z20.b, z28.b}, zt0, z12\[2\]
+.*:	c09a5000 	luti4	{z0.h, z8.h}, zt0, z0\[0\]
+.*:	c09a5000 	luti4	{z0.h, z8.h}, zt0, z0\[0\]
+.*:	c09a5007 	luti4	{z7.h, z15.h}, zt0, z0\[0\]
+.*:	c09a5010 	luti4	{z16.h, z24.h}, zt0, z0\[0\]
+.*:	c09a5017 	luti4	{z23.h, z31.h}, zt0, z0\[0\]
+.*:	c09a53e0 	luti4	{z0.h, z8.h}, zt0, z31\[0\]
+.*:	c09bd000 	luti4	{z0.h, z8.h}, zt0, z0\[3\]
+.*:	c09a53f0 	luti4	{z16.h, z24.h}, zt0, z31\[0\]
+.*:	c09bd010 	luti4	{z16.h, z24.h}, zt0, z0\[3\]
+.*:	c09ad284 	luti4	{z4.h, z12.h}, zt0, z20\[1\]
+.*:	c09b5194 	luti4	{z20.h, z28.h}, zt0, z12\[2\]
+.*:	c09a9000 	luti4	{z0.h, z4.h, z8.h, z12.h}, zt0, z0\[0\]
+.*:	c09a9000 	luti4	{z0.h, z4.h, z8.h, z12.h}, zt0, z0\[0\]
+.*:	c09a9003 	luti4	{z3.h, z7.h, z11.h, z15.h}, zt0, z0\[0\]
+.*:	c09a9010 	luti4	{z16.h, z20.h, z24.h, z28.h}, zt0, z0\[0\]
+.*:	c09a9013 	luti4	{z19.h, z23.h, z27.h, z31.h}, zt0, z0\[0\]
+.*:	c09a93e0 	luti4	{z0.h, z4.h, z8.h, z12.h}, zt0, z31\[0\]
+.*:	c09b9000 	luti4	{z0.h, z4.h, z8.h, z12.h}, zt0, z0\[1\]
+.*:	c09a93f0 	luti4	{z16.h, z20.h, z24.h, z28.h}, zt0, z31\[0\]
+.*:	c09b9010 	luti4	{z16.h, z20.h, z24.h, z28.h}, zt0, z0\[1\]
+.*:	c09b9282 	luti4	{z2.h, z6.h, z10.h, z14.h}, zt0, z20\[1\]
+.*:	c09a9151 	luti4	{z17.h, z21.h, z25.h, z29.h}, zt0, z10\[0\]
diff --git a/gas/testsuite/gas/aarch64/sme2p1-2.s b/gas/testsuite/gas/aarch64/sme2p1-2.s
new file mode 100644
index 00000000000..a57baadf738
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/sme2p1-2.s
@@ -0,0 +1,87 @@ 
+/* LUTI2 (two registers) strided.  */
+luti2	{ z0.b , z8.b }, zt0, z0[0]
+LUTI2	{ Z0.B , Z8.B }, ZT0, Z0[0]
+luti2	{ z7.b , z15.b }, zt0, z0[0]
+luti2	{ z16.b , z24.b }, zt0, z0[0]
+luti2	{ z23.b , z31.b }, zt0, z0[0]
+luti2	{ z0.b , z8.b }, zt0, z31[0]
+luti2	{ z0.b , z8.b }, zt0, z0[7]
+luti2	{ z16.b , z24.b }, zt0, z31[0]
+luti2	{ z16.b , z24.b }, zt0, z0[7]
+luti2	{ z4.b , z12.b }, zt0, z20[4]
+luti2	{ z20.b , z28.b }, zt0, z12[2]
+
+luti2	{ z0.h , z8.h }, zt0, z0[0]
+LUTI2	{ Z0.H , Z8.H }, ZT0, Z0[0]
+luti2	{ z7.h , z15.h }, zt0, z0[0]
+luti2	{ z16.h , z24.h }, zt0, z0[0]
+luti2	{ z23.h , z31.h }, zt0, z0[0]
+luti2	{ z0.h , z8.h }, zt0, z31[0]
+luti2	{ z0.h , z8.h }, zt0, z0[7]
+luti2	{ z16.h , z24.h }, zt0, z31[0]
+luti2	{ z16.h , z24.h }, zt0, z0[7]
+luti2	{ z4.h , z12.h }, zt0, z20[4]
+luti2	{ z20.h , z28.h }, zt0, z12[2]
+
+/* LUTI2 (four registers) strided.  */
+luti2	{ z0.b , z4.b , z8.b , z12.b }, zt0, z0[0]
+LUTI2	{ Z0.B , Z4.B, Z8.B , Z12.B }, ZT0, Z0[0]
+luti2	{ z3.b , z7.b, z11.b, z15.b }, zt0, z0[0]
+luti2	{ z16.b , z20.b , z24.b , z28.b }, zt0, z0[0]
+luti2	{ z19.b , z23.b , z27.b , z31.b }, zt0, z0[0]
+luti2	{ z0.b , z4.b , z8.b , z12.b }, zt0, z31[0]
+luti2	{ z0.b , z4.b , z8.b , z12.b }, zt0, z0[3]
+luti2	{ z16.b , z20.b , z24.b , z28.b }, zt0, z31[0]
+luti2	{ z16.b , z20.b , z24.b , z28.b }, zt0, z0[3]
+luti2	{ z2.b , z6.b, z10.b , z14.b }, zt0, z20[1]
+luti2	{ z17.b , z21.b, z25.b , z29.b }, zt0, z10[2]
+
+luti2	{ z0.h , z4.h , z8.h , z12.h }, zt0, z0[0]
+LUTI2	{ Z0.H , Z4.H, Z8.H , Z12.H }, ZT0, Z0[0]
+luti2	{ z3.h , z7.h, z11.h, z15.h }, zt0, z0[0]
+luti2	{ z16.h , z20.h , z24.h , z28.h }, zt0, z0[0]
+luti2	{ z19.h , z23.h , z27.h , z31.h }, zt0, z0[0]
+luti2	{ z0.h , z4.h , z8.h , z12.h }, zt0, z31[0]
+luti2	{ z0.h , z4.h , z8.h , z12.h }, zt0, z0[3]
+luti2	{ z16.h , z20.h , z24.h , z28.h }, zt0, z31[0]
+luti2	{ z16.h , z20.h , z24.h , z28.h }, zt0, z0[3]
+luti2	{ z2.h , z6.h, z10.h , z14.h }, zt0, z20[1]
+luti2	{ z17.h , z21.h, z25.h , z29.h }, zt0, z10[2]
+
+/* LUTI4 (two registers) strided.  */
+luti4	{ z0.b , z8.b }, zt0, z0[0]
+LUTI4	{ Z0.B , Z8.B }, ZT0, Z0[0]
+luti4	{ z7.b , z15.b }, zt0, z0[0]
+luti4	{ z16.b , z24.b }, zt0, z0[0]
+luti4	{ z23.b , z31.b }, zt0, z0[0]
+luti4	{ z0.b , z8.b }, zt0, z31[0]
+luti4	{ z0.b , z8.b }, zt0, z0[3]
+luti4	{ z16.b , z24.b }, zt0, z31[0]
+luti4	{ z16.b , z24.b }, zt0, z0[3]
+luti4	{ z4.b , z12.b }, zt0, z20[1]
+luti4	{ z20.b , z28.b }, zt0, z12[2]
+
+luti4	{ z0.h , z8.h }, zt0, z0[0]
+LUTI4	{ Z0.H , Z8.H }, ZT0, Z0[0]
+luti4	{ z7.h , z15.h }, zt0, z0[0]
+luti4	{ z16.h , z24.h }, zt0, z0[0]
+luti4	{ z23.h , z31.h }, zt0, z0[0]
+luti4	{ z0.h , z8.h }, zt0, z31[0]
+luti4	{ z0.h , z8.h }, zt0, z0[3]
+luti4	{ z16.h , z24.h }, zt0, z31[0]
+luti4	{ z16.h , z24.h }, zt0, z0[3]
+luti4	{ z4.h , z12.h }, zt0, z20[1]
+luti4	{ z20.h , z28.h }, zt0, z12[2]
+
+/* LUTI4 (four registers) strided.  */
+luti4	{ z0.h , z4.h , z8.h , z12.h }, zt0, z0[0]
+LUTI4	{ Z0.H , Z4.H, Z8.H , Z12.H }, ZT0, Z0[0]
+luti4	{ z3.h , z7.h, z11.h, z15.h }, zt0, z0[0]
+luti4	{ z16.h , z20.h , z24.h , z28.h }, zt0, z0[0]
+luti4	{ z19.h , z23.h , z27.h , z31.h }, zt0, z0[0]
+luti4	{ z0.h , z4.h , z8.h , z12.h }, zt0, z31[0]
+luti4	{ z0.h , z4.h , z8.h , z12.h }, zt0, z0[1]
+luti4	{ z16.h , z20.h , z24.h , z28.h }, zt0, z31[0]
+luti4	{ z16.h , z20.h , z24.h , z28.h }, zt0, z0[1]
+luti4	{ z2.h , z6.h, z10.h , z14.h }, zt0, z20[1]
+luti4	{ z17.h , z21.h, z25.h , z29.h }, zt0, z10[0]
diff --git a/include/opcode/aarch64.h b/include/opcode/aarch64.h
index fc749fa280d..64959a8c50e 100644
--- a/include/opcode/aarch64.h
+++ b/include/opcode/aarch64.h
@@ -1067,6 +1067,7 @@  enum aarch64_insn_class
   sme_ldr,
   sme_psel,
   sme_shift,
+  sme_size_12_bh,
   sme_size_12_bhs,
   sme_size_12_hs,
   sme_size_12_b,
diff --git a/opcodes/aarch64-asm.c b/opcodes/aarch64-asm.c
index 0867c08940c..e621bd86e87 100644
--- a/opcodes/aarch64-asm.c
+++ b/opcodes/aarch64-asm.c
@@ -2139,6 +2139,10 @@  aarch64_encode_variant_using_iclass (struct aarch64_inst *inst)
       /* The variant is encoded as part of the immediate.  */
       break;
 
+    case sme_size_12_bh:
+      insert_field (FLD_S, &inst->value, aarch64_get_variant (inst), 0);
+      break;
+
     case sme_size_12_bhs:
     case sme_size_12_b:
       insert_field (FLD_SME_size_12, &inst->value,
diff --git a/opcodes/aarch64-dis.c b/opcodes/aarch64-dis.c
index d3f38c3cda5..81ebbe0bd55 100644
--- a/opcodes/aarch64-dis.c
+++ b/opcodes/aarch64-dis.c
@@ -3397,6 +3397,12 @@  aarch64_decode_variant_using_iclass (aarch64_inst *inst)
       i = extract_field (FLD_SVE_tszh, inst->value, 0);
       goto sve_shift;
 
+    case sme_size_12_bh:
+      variant = extract_field (FLD_S, inst->value, 0);
+      if (variant > 1)
+	return false;
+      break;
+
     case sme_size_12_bhs:
       variant = extract_field (FLD_SME_size_12, inst->value, 0);
       if (variant >= 3)
diff --git a/opcodes/aarch64-tbl.h b/opcodes/aarch64-tbl.h
index ad0d8ae7be6..eccfac53ad1 100644
--- a/opcodes/aarch64-tbl.h
+++ b/opcodes/aarch64-tbl.h
@@ -2084,6 +2084,11 @@ 
 {                                                       \
   QLF3(S_B,NIL,NIL),                                    \
 }
+#define OP_SVE_VUU_BH					\
+{                                                       \
+  QLF3(S_B,NIL,NIL),                                    \
+  QLF3(S_H,NIL,NIL),                                    \
+}
 #define OP_SVE_VUU_BHS                                  \
 {                                                       \
   QLF3(S_B,NIL,NIL),                                    \
@@ -6641,6 +6646,11 @@  const struct aarch64_opcode aarch64_opcode_table[] =
   SME2p1_INSN ("movaz", 0xc0860200, 0xffff1f01, sme2_movaz, 0, OP2 (SME_Zdnx2, SME_ZA_array_vrss_1), OP_SVE_SS, 0, 0),
   SME2p1_INSN ("movaz", 0xc0c60200, 0xffff1f01, sme2_movaz, 0, OP2 (SME_Zdnx2, SME_ZA_array_vrsd_1), OP_SVE_DD, 0, 0),
 
+  SME2p1_INSN ("luti2", 0xc09c4000, 0xfffc4c08, sme_size_12_bh, 0, OP3 (SME_Ztx2_STRIDED, SME_ZT0, SME_Zn_INDEX3_15), OP_SVE_VUU_BH, 0, 0),
+  SME2p1_INSN ("luti2", 0xc09c8000, 0xfffccc0c, sme_size_12_bh, 0, OP3 (SME_Ztx4_STRIDED, SME_ZT0, SME_Zn_INDEX2_16), OP_SVE_VUU_BH, 0, 0),
+  SME2p1_INSN ("luti4", 0xc09a4000, 0xfffe4c08, sme_size_12_bh, 0, OP3 (SME_Ztx2_STRIDED, SME_ZT0, SME_Zn_INDEX2_15), OP_SVE_VUU_BH, 0, 0),
+  SME2p1_INSN ("luti4", 0xc09a9000, 0xfffefc0c, sme_misc, 0, OP3 (SME_Ztx4_STRIDED, SME_ZT0, SME_Zn_INDEX1_16), OP_SVE_HUU, 0, 0),
+
 /* SVE2p1 Instructions.  */
   SVE2p1_INSN("addqv",0x04052000, 0xff3fe000, sve2_urqvs, 0, OP3 (Vd, SVE_Pg3, SVE_Zn), OP_SVE_vUS_BHSD_BHSD, F_OPD_SIZE, 0),
   SVE2p1_INSN("andqv",0x041e2000, 0xff3fe000, sve2_urqvs, 0, OP3 (Vd, SVE_Pg3, SVE_Zn), OP_SVE_vUS_BHSD_BHSD, F_OPD_SIZE, 0),