new file mode 100644
@@ -0,0 +1,4 @@
+#name: Negative test of SME2.1 luti2 and luti4 instructions.
+#as: -march=armv9.4-a+sme2p1
+#source: sme2p1-2-bad.s
+#error_output: sme2p1-2-bad.l
new file mode 100644
@@ -0,0 +1,62 @@
+.*: Assembler messages:
+.*: Error: the register list must have a stride of 1 or 8 at operand 1 -- `luti2 {z0.b,z7.b},zt0,z0\[0\]`
+.*: Error: the register list must have a stride of 1 at operand 1 -- `luti2 {Z0.s,Z8.s},ZT0,Z0\[0\]'
+.*: Error: operand mismatch -- `luti2 {z7.d,z15.d},zt0,z0\[0\]'
+.*: Info: did you mean this\?
+.*: Info: luti2 {z7.b, z15.b}, zt0, z0\[0\]
+.*: Info: other valid variant\(s\):
+.*: Info: luti2 {z7.h, z15.h}, zt0, z0\[0\]
+.*: Info: luti2 {z7.s, z15.s}, zt0, z0\[0\]
+.*: Error: operand 3 must be an indexed SVE vector register -- `luti2 {z16.b,z24.b},zt0,z0'
+.*: Error: register element index out of range 0 to 7 at operand 3 -- `luti2 {z23.b,z31.b},zt0,z0\[8\]'
+.*: Error: register element index out of range 0 to 7 at operand 3 -- `luti2 {z0.b,z8.b},zt0,z31\[15\]'
+.*: Error: unexpected register type at operand 2 -- `luti2 {z0.b,z8.b},z0\[7\]'
+.*: Error: the register list must have a stride of 1 or 8 at operand 1 -- `luti2 {z8.b,z24.b},zt0,z31\[0\]`
+.*: Error: invalid register list at operand 1 -- `luti2 {z24.b,z24.b},zt0,z0\[7\]'
+.*: Error: the register list must have a stride of 1 or 8 at operand 1 -- `luti2 {z4.h,z16.h},zt0,z20\[4\]`
+.*: Error: the register list must have a stride of 1 or 8 at operand 1 -- `luti2 {z20.h,z22.h},zt0,z12\[2\]`
+.*: Error: invalid register list at operand 1 -- `luti2 {z0.b,z3.b,z18.b,z1.b},zt0,z0\[0\]'
+.*: Error: the register list must have a stride of 1 at operand 1 -- `luti2 {Z0.s,Z4.s,Z8.s,Z12.s},ZT0,Z0\[0\]'
+.*: Error: operand mismatch -- `luti2 {z3.d,z7.d,z11.d,z15.d},zt0,z0\[0\]'
+.*: Info: did you mean this\?
+.*: Info: luti2 {z3.b, z7.b, z11.b, z15.b}, zt0, z0\[0\]
+.*: Info: other valid variant\(s\):
+.*: Info: luti2 {z3.h, z7.h, z11.h, z15.h}, zt0, z0\[0\]
+.*: Info: luti2 {z3.s, z7.s, z11.s, z15.s}, zt0, z0\[0\]
+.*: Error: operand 3 must be an indexed SVE vector register -- `luti2 {z16.h,z20.h,z24.h,z28.h},zt0,z0'
+.*: Error: register element index out of range 0 to 3 at operand 3 -- `luti2 {z19.h,z23.h,z27.h,z31.h},zt0,z0\[5\]'
+.*: Error: start register out of range at operand 1 -- `luti2 {z10.b,z14.b,z18.b,z22.b},zt0,z31\[0\]'
+.*: Error: invalid register list at operand 1 -- `luti2 {z20.b,z24.b,z28.b,z30.b},z0\[3\]'
+.*: Error: invalid register list at operand 1 -- `luti2 {z4.b,z7.b,z11.b,z18.b},zt0,z31\[0\]'
+.*: Error: type mismatch in vector register list at operand 1 -- `luti2 {z6.b,z0.s,z2.d,z28.h},zt0,z0\[7\]'
+.*: Error: the register list must have a stride of 1 or 8 at operand 1 -- `luti4 {z0.b,z7.b},zt0,z0\[0\]`
+.*: Error: the register list must have a stride of 1 at operand 1 -- `luti2 {Z0.s,Z8.s},ZT0,Z0\[0\]'
+.*: Error: operand mismatch -- `luti4 {z7.d,z15.d},zt0,z0\[0\]'
+.*: Info: did you mean this\?
+.*: Info: luti4 {z7.b, z15.b}, zt0, z0\[0\]
+.*: Info: other valid variant\(s\):
+.*: Info: luti4 {z7.h, z15.h}, zt0, z0\[0\]
+.*: Info: luti4 {z7.s, z15.s}, zt0, z0\[0\]
+.*: Error: missing braces at operand 3 -- `luti4 {z16.b,z24.b},zt0,z0'
+.*: Error: register element index out of range 0 to 3 at operand 3 -- `luti4 {z23.b,z31.b},zt0,z0\[8\]'
+.*: Error: register element index out of range 0 to 3 at operand 3 -- `luti4 {z0.b,z8.b},zt0,z31\[15\]'
+.*: Error: unexpected register type at operand 2 -- `luti4 {z0.b,z8.b},z0\[7\]'
+.*: Error: the register list must have a stride of 1 or 8 at operand 1 -- `luti4 {z8.b,z24.b},zt0,z31\[0\]`
+.*: Error: invalid register list at operand 1 -- `luti4 {z24.b,z24.b},zt0,z0\[7\]'
+.*: Error: the register list must have a stride of 1 or 8 at operand 1 -- `luti4 {z4.h,z16.h},zt0,z20\[4\]`
+.*: Error: the register list must have a stride of 1 or 8 at operand 1 -- `luti4 {z20.h,z22.h},zt0,z12\[2\]`
+.*: Error: the register list must have a stride of 1 at operand 1 -- `luti4 {z0.s,z4.s,z8.s,z12.s},zt0,z0\[0\]'
+.*: Error: invalid register list at operand 1 -- `luti4 {z0.b,z3.b,z18.b,z1.b},zt0,z0\[0\]'
+.*: Error: the register list must have a stride of 1 at operand 1 -- `luti2 {Z0.s,Z4.s,Z8.s,Z12.s},ZT0,Z0\[0\]'
+.*: Error: operand mismatch -- `luti4 {z3.d,z7.d,z11.d,z15.d},zt0,z0\[0\]'
+.*: Info: did you mean this\?
+.*: Info: luti4 {z3.b, z7.b, z11.b, z15.b}, zt0, z0\[0\]
+.*: Info: other valid variant\(s\):
+.*: Info: luti4 {z3.h, z7.h, z11.h, z15.h}, zt0, z0\[0\]
+.*: Info: luti4 {z3.s, z7.s, z11.s, z15.s}, zt0, z0\[0\]
+.*: Error: missing braces at operand 3 -- `luti4 {z16.h,z20.h,z24.h,z28.h},zt0,z0'
+.*: Error: register element index out of range 0 to 1 at operand 3 -- `luti4 {z19.h,z23.h,z27.h,z31.h},zt0,z0\[5\]'
+.*: Error: expected a list of 2 registers at operand 1 -- `luti4 {z10.b,z14.b,z18.b,z22.b},zt0,z31\[0\]'
+.*: Error: invalid register list at operand 1 -- `luti4 {z20.b,z24.b,z28.b,z30.b},z0\[3\]'
+.*: Error: invalid register list at operand 1 -- `luti4 {z4.b,z7.b,z11.b,z18.b},zt0,z31\[0\]'
+.*: Error: type mismatch in vector register list at operand 1 -- `luti4 {z6.b,z0.s,z2.d,z28.h},zt0,z0\[7\]'
new file mode 100644
@@ -0,0 +1,48 @@
+/* LUTI2 (two registers) strided. */
+luti2 { z0.b , z7.b }, zt0, z0[0]
+LUTI2 { Z0.s , Z8.s }, ZT0, Z0[0]
+luti2 { z7.d , z15.d }, zt0, z0[0]
+luti2 { z16.b , z24.b }, zt0, z0
+luti2 { z23.b , z31.b }, zt0, z0[8]
+luti2 { z0.b , z8.b }, zt0, z31[15]
+luti2 { z0.b , z8.b }, z0[7]
+luti2 { z8.b , z24.b }, zt0, z31[0]
+luti2 { z24.b , z24.b }, zt0, z0[7]
+luti2 { z4.h , z16.h }, zt0, z20[4]
+luti2 { z20.h , z22.h }, zt0, z12[2]
+
+/* LUTI2 (four registers) strided. */
+luti2 { z0.b , z3.b , z18.b , z1.b }, zt0, z0[0]
+LUTI2 { Z0.s , Z4.s, Z8.s , Z12.s }, ZT0, Z0[0]
+luti2 { z3.d , z7.d, z11.d, z15.d }, zt0, z0[0]
+luti2 { z16.h , z20.h , z24.h , z28.h }, zt0, z0
+luti2 { z19.h , z23.h , z27.h , z31.h }, zt0, z0[5]
+luti2 { z10.b , z14.b , z18.b , z22.b }, zt0, z31[0]
+luti2 { z20.b , z24.b , z28.b , z30.b }, z0[3]
+luti2 { z4.b , z7.b , z11.b , z18.b }, zt0, z31[0]
+luti2 { z6.b , z0.s , z2.d , z28.h }, zt0, z0[7]
+
+/* LUTI4 (two registers) strided. */
+luti4 { z0.b , z7.b }, zt0, z0[0]
+LUTI2 { Z0.s , Z8.s }, ZT0, Z0[0]
+luti4 { z7.d , z15.d }, zt0, z0[0]
+luti4 { z16.b , z24.b }, zt0, z0
+luti4 { z23.b , z31.b }, zt0, z0[8]
+luti4 { z0.b , z8.b }, zt0, z31[15]
+luti4 { z0.b , z8.b }, z0[7]
+luti4 { z8.b , z24.b }, zt0, z31[0]
+luti4 { z24.b , z24.b }, zt0, z0[7]
+luti4 { z4.h , z16.h }, zt0, z20[4]
+luti4 { z20.h , z22.h }, zt0, z12[2]
+
+/* LUTI4 (four registers) strided. */
+luti4 { z0.s , z4.s , z8.s , z12.s }, zt0, z0[0]
+luti4 { z0.b , z3.b , z18.b , z1.b }, zt0, z0[0]
+LUTI2 { Z0.s , Z4.s, Z8.s , Z12.s }, ZT0, Z0[0]
+luti4 { z3.d , z7.d, z11.d, z15.d }, zt0, z0[0]
+luti4 { z16.h , z20.h , z24.h , z28.h }, zt0, z0
+luti4 { z19.h , z23.h , z27.h , z31.h }, zt0, z0[5]
+luti4 { z10.b , z14.b , z18.b , z22.b }, zt0, z31[0]
+luti4 { z20.b , z24.b , z28.b , z30.b }, z0[3]
+luti4 { z4.b , z7.b , z11.b , z18.b }, zt0, z31[0]
+luti4 { z6.b , z0.s , z2.d , z28.h }, zt0, z0[7]
new file mode 100644
@@ -0,0 +1,87 @@
+#name: Test of SME2.1 luti2 and luti4 instructions.
+#as: -march=armv9.4-a+sme2p1
+#objdump: -dr
+
+[^:]+: file format .*
+
+
+[^:]+:
+
+[^:]+:
+.*: c09c4000 luti2 {z0.b, z8.b}, zt0, z0\[0\]
+.*: c09c4000 luti2 {z0.b, z8.b}, zt0, z0\[0\]
+.*: c09c4007 luti2 {z7.b, z15.b}, zt0, z0\[0\]
+.*: c09c4010 luti2 {z16.b, z24.b}, zt0, z0\[0\]
+.*: c09c4017 luti2 {z23.b, z31.b}, zt0, z0\[0\]
+.*: c09c43e0 luti2 {z0.b, z8.b}, zt0, z31\[0\]
+.*: c09fc000 luti2 {z0.b, z8.b}, zt0, z0\[7\]
+.*: c09c43f0 luti2 {z16.b, z24.b}, zt0, z31\[0\]
+.*: c09fc010 luti2 {z16.b, z24.b}, zt0, z0\[7\]
+.*: c09e4284 luti2 {z4.b, z12.b}, zt0, z20\[4\]
+.*: c09d4194 luti2 {z20.b, z28.b}, zt0, z12\[2\]
+.*: c09c5000 luti2 {z0.h, z8.h}, zt0, z0\[0\]
+.*: c09c5000 luti2 {z0.h, z8.h}, zt0, z0\[0\]
+.*: c09c5007 luti2 {z7.h, z15.h}, zt0, z0\[0\]
+.*: c09c5010 luti2 {z16.h, z24.h}, zt0, z0\[0\]
+.*: c09c5017 luti2 {z23.h, z31.h}, zt0, z0\[0\]
+.*: c09c53e0 luti2 {z0.h, z8.h}, zt0, z31\[0\]
+.*: c09fd000 luti2 {z0.h, z8.h}, zt0, z0\[7\]
+.*: c09c53f0 luti2 {z16.h, z24.h}, zt0, z31\[0\]
+.*: c09fd010 luti2 {z16.h, z24.h}, zt0, z0\[7\]
+.*: c09e5284 luti2 {z4.h, z12.h}, zt0, z20\[4\]
+.*: c09d5194 luti2 {z20.h, z28.h}, zt0, z12\[2\]
+.*: c09c8000 luti2 {z0.b, z4.b, z8.b, z12.b}, zt0, z0\[0\]
+.*: c09c8000 luti2 {z0.b, z4.b, z8.b, z12.b}, zt0, z0\[0\]
+.*: c09c8003 luti2 {z3.b, z7.b, z11.b, z15.b}, zt0, z0\[0\]
+.*: c09c8010 luti2 {z16.b, z20.b, z24.b, z28.b}, zt0, z0\[0\]
+.*: c09c8013 luti2 {z19.b, z23.b, z27.b, z31.b}, zt0, z0\[0\]
+.*: c09c83e0 luti2 {z0.b, z4.b, z8.b, z12.b}, zt0, z31\[0\]
+.*: c09f8000 luti2 {z0.b, z4.b, z8.b, z12.b}, zt0, z0\[3\]
+.*: c09c83f0 luti2 {z16.b, z20.b, z24.b, z28.b}, zt0, z31\[0\]
+.*: c09f8010 luti2 {z16.b, z20.b, z24.b, z28.b}, zt0, z0\[3\]
+.*: c09d8282 luti2 {z2.b, z6.b, z10.b, z14.b}, zt0, z20\[1\]
+.*: c09e8151 luti2 {z17.b, z21.b, z25.b, z29.b}, zt0, z10\[2\]
+.*: c09c9000 luti2 {z0.h, z4.h, z8.h, z12.h}, zt0, z0\[0\]
+.*: c09c9000 luti2 {z0.h, z4.h, z8.h, z12.h}, zt0, z0\[0\]
+.*: c09c9003 luti2 {z3.h, z7.h, z11.h, z15.h}, zt0, z0\[0\]
+.*: c09c9010 luti2 {z16.h, z20.h, z24.h, z28.h}, zt0, z0\[0\]
+.*: c09c9013 luti2 {z19.h, z23.h, z27.h, z31.h}, zt0, z0\[0\]
+.*: c09c93e0 luti2 {z0.h, z4.h, z8.h, z12.h}, zt0, z31\[0\]
+.*: c09f9000 luti2 {z0.h, z4.h, z8.h, z12.h}, zt0, z0\[3\]
+.*: c09c93f0 luti2 {z16.h, z20.h, z24.h, z28.h}, zt0, z31\[0\]
+.*: c09f9010 luti2 {z16.h, z20.h, z24.h, z28.h}, zt0, z0\[3\]
+.*: c09d9282 luti2 {z2.h, z6.h, z10.h, z14.h}, zt0, z20\[1\]
+.*: c09e9151 luti2 {z17.h, z21.h, z25.h, z29.h}, zt0, z10\[2\]
+.*: c09a4000 luti4 {z0.b, z8.b}, zt0, z0\[0\]
+.*: c09a4000 luti4 {z0.b, z8.b}, zt0, z0\[0\]
+.*: c09a4007 luti4 {z7.b, z15.b}, zt0, z0\[0\]
+.*: c09a4010 luti4 {z16.b, z24.b}, zt0, z0\[0\]
+.*: c09a4017 luti4 {z23.b, z31.b}, zt0, z0\[0\]
+.*: c09a43e0 luti4 {z0.b, z8.b}, zt0, z31\[0\]
+.*: c09bc000 luti4 {z0.b, z8.b}, zt0, z0\[3\]
+.*: c09a43f0 luti4 {z16.b, z24.b}, zt0, z31\[0\]
+.*: c09bc010 luti4 {z16.b, z24.b}, zt0, z0\[3\]
+.*: c09ac284 luti4 {z4.b, z12.b}, zt0, z20\[1\]
+.*: c09b4194 luti4 {z20.b, z28.b}, zt0, z12\[2\]
+.*: c09a5000 luti4 {z0.h, z8.h}, zt0, z0\[0\]
+.*: c09a5000 luti4 {z0.h, z8.h}, zt0, z0\[0\]
+.*: c09a5007 luti4 {z7.h, z15.h}, zt0, z0\[0\]
+.*: c09a5010 luti4 {z16.h, z24.h}, zt0, z0\[0\]
+.*: c09a5017 luti4 {z23.h, z31.h}, zt0, z0\[0\]
+.*: c09a53e0 luti4 {z0.h, z8.h}, zt0, z31\[0\]
+.*: c09bd000 luti4 {z0.h, z8.h}, zt0, z0\[3\]
+.*: c09a53f0 luti4 {z16.h, z24.h}, zt0, z31\[0\]
+.*: c09bd010 luti4 {z16.h, z24.h}, zt0, z0\[3\]
+.*: c09ad284 luti4 {z4.h, z12.h}, zt0, z20\[1\]
+.*: c09b5194 luti4 {z20.h, z28.h}, zt0, z12\[2\]
+.*: c09a9000 luti4 {z0.h, z4.h, z8.h, z12.h}, zt0, z0\[0\]
+.*: c09a9000 luti4 {z0.h, z4.h, z8.h, z12.h}, zt0, z0\[0\]
+.*: c09a9003 luti4 {z3.h, z7.h, z11.h, z15.h}, zt0, z0\[0\]
+.*: c09a9010 luti4 {z16.h, z20.h, z24.h, z28.h}, zt0, z0\[0\]
+.*: c09a9013 luti4 {z19.h, z23.h, z27.h, z31.h}, zt0, z0\[0\]
+.*: c09a93e0 luti4 {z0.h, z4.h, z8.h, z12.h}, zt0, z31\[0\]
+.*: c09b9000 luti4 {z0.h, z4.h, z8.h, z12.h}, zt0, z0\[1\]
+.*: c09a93f0 luti4 {z16.h, z20.h, z24.h, z28.h}, zt0, z31\[0\]
+.*: c09b9010 luti4 {z16.h, z20.h, z24.h, z28.h}, zt0, z0\[1\]
+.*: c09b9282 luti4 {z2.h, z6.h, z10.h, z14.h}, zt0, z20\[1\]
+.*: c09a9151 luti4 {z17.h, z21.h, z25.h, z29.h}, zt0, z10\[0\]
new file mode 100644
@@ -0,0 +1,87 @@
+/* LUTI2 (two registers) strided. */
+luti2 { z0.b , z8.b }, zt0, z0[0]
+LUTI2 { Z0.B , Z8.B }, ZT0, Z0[0]
+luti2 { z7.b , z15.b }, zt0, z0[0]
+luti2 { z16.b , z24.b }, zt0, z0[0]
+luti2 { z23.b , z31.b }, zt0, z0[0]
+luti2 { z0.b , z8.b }, zt0, z31[0]
+luti2 { z0.b , z8.b }, zt0, z0[7]
+luti2 { z16.b , z24.b }, zt0, z31[0]
+luti2 { z16.b , z24.b }, zt0, z0[7]
+luti2 { z4.b , z12.b }, zt0, z20[4]
+luti2 { z20.b , z28.b }, zt0, z12[2]
+
+luti2 { z0.h , z8.h }, zt0, z0[0]
+LUTI2 { Z0.H , Z8.H }, ZT0, Z0[0]
+luti2 { z7.h , z15.h }, zt0, z0[0]
+luti2 { z16.h , z24.h }, zt0, z0[0]
+luti2 { z23.h , z31.h }, zt0, z0[0]
+luti2 { z0.h , z8.h }, zt0, z31[0]
+luti2 { z0.h , z8.h }, zt0, z0[7]
+luti2 { z16.h , z24.h }, zt0, z31[0]
+luti2 { z16.h , z24.h }, zt0, z0[7]
+luti2 { z4.h , z12.h }, zt0, z20[4]
+luti2 { z20.h , z28.h }, zt0, z12[2]
+
+/* LUTI2 (four registers) strided. */
+luti2 { z0.b , z4.b , z8.b , z12.b }, zt0, z0[0]
+LUTI2 { Z0.B , Z4.B, Z8.B , Z12.B }, ZT0, Z0[0]
+luti2 { z3.b , z7.b, z11.b, z15.b }, zt0, z0[0]
+luti2 { z16.b , z20.b , z24.b , z28.b }, zt0, z0[0]
+luti2 { z19.b , z23.b , z27.b , z31.b }, zt0, z0[0]
+luti2 { z0.b , z4.b , z8.b , z12.b }, zt0, z31[0]
+luti2 { z0.b , z4.b , z8.b , z12.b }, zt0, z0[3]
+luti2 { z16.b , z20.b , z24.b , z28.b }, zt0, z31[0]
+luti2 { z16.b , z20.b , z24.b , z28.b }, zt0, z0[3]
+luti2 { z2.b , z6.b, z10.b , z14.b }, zt0, z20[1]
+luti2 { z17.b , z21.b, z25.b , z29.b }, zt0, z10[2]
+
+luti2 { z0.h , z4.h , z8.h , z12.h }, zt0, z0[0]
+LUTI2 { Z0.H , Z4.H, Z8.H , Z12.H }, ZT0, Z0[0]
+luti2 { z3.h , z7.h, z11.h, z15.h }, zt0, z0[0]
+luti2 { z16.h , z20.h , z24.h , z28.h }, zt0, z0[0]
+luti2 { z19.h , z23.h , z27.h , z31.h }, zt0, z0[0]
+luti2 { z0.h , z4.h , z8.h , z12.h }, zt0, z31[0]
+luti2 { z0.h , z4.h , z8.h , z12.h }, zt0, z0[3]
+luti2 { z16.h , z20.h , z24.h , z28.h }, zt0, z31[0]
+luti2 { z16.h , z20.h , z24.h , z28.h }, zt0, z0[3]
+luti2 { z2.h , z6.h, z10.h , z14.h }, zt0, z20[1]
+luti2 { z17.h , z21.h, z25.h , z29.h }, zt0, z10[2]
+
+/* LUTI4 (two registers) strided. */
+luti4 { z0.b , z8.b }, zt0, z0[0]
+LUTI4 { Z0.B , Z8.B }, ZT0, Z0[0]
+luti4 { z7.b , z15.b }, zt0, z0[0]
+luti4 { z16.b , z24.b }, zt0, z0[0]
+luti4 { z23.b , z31.b }, zt0, z0[0]
+luti4 { z0.b , z8.b }, zt0, z31[0]
+luti4 { z0.b , z8.b }, zt0, z0[3]
+luti4 { z16.b , z24.b }, zt0, z31[0]
+luti4 { z16.b , z24.b }, zt0, z0[3]
+luti4 { z4.b , z12.b }, zt0, z20[1]
+luti4 { z20.b , z28.b }, zt0, z12[2]
+
+luti4 { z0.h , z8.h }, zt0, z0[0]
+LUTI4 { Z0.H , Z8.H }, ZT0, Z0[0]
+luti4 { z7.h , z15.h }, zt0, z0[0]
+luti4 { z16.h , z24.h }, zt0, z0[0]
+luti4 { z23.h , z31.h }, zt0, z0[0]
+luti4 { z0.h , z8.h }, zt0, z31[0]
+luti4 { z0.h , z8.h }, zt0, z0[3]
+luti4 { z16.h , z24.h }, zt0, z31[0]
+luti4 { z16.h , z24.h }, zt0, z0[3]
+luti4 { z4.h , z12.h }, zt0, z20[1]
+luti4 { z20.h , z28.h }, zt0, z12[2]
+
+/* LUTI4 (four registers) strided. */
+luti4 { z0.h , z4.h , z8.h , z12.h }, zt0, z0[0]
+LUTI4 { Z0.H , Z4.H, Z8.H , Z12.H }, ZT0, Z0[0]
+luti4 { z3.h , z7.h, z11.h, z15.h }, zt0, z0[0]
+luti4 { z16.h , z20.h , z24.h , z28.h }, zt0, z0[0]
+luti4 { z19.h , z23.h , z27.h , z31.h }, zt0, z0[0]
+luti4 { z0.h , z4.h , z8.h , z12.h }, zt0, z31[0]
+luti4 { z0.h , z4.h , z8.h , z12.h }, zt0, z0[1]
+luti4 { z16.h , z20.h , z24.h , z28.h }, zt0, z31[0]
+luti4 { z16.h , z20.h , z24.h , z28.h }, zt0, z0[1]
+luti4 { z2.h , z6.h, z10.h , z14.h }, zt0, z20[1]
+luti4 { z17.h , z21.h, z25.h , z29.h }, zt0, z10[0]
@@ -1067,6 +1067,7 @@ enum aarch64_insn_class
sme_ldr,
sme_psel,
sme_shift,
+ sme_size_12_bh,
sme_size_12_bhs,
sme_size_12_hs,
sme_size_12_b,
@@ -2139,6 +2139,10 @@ aarch64_encode_variant_using_iclass (struct aarch64_inst *inst)
/* The variant is encoded as part of the immediate. */
break;
+ case sme_size_12_bh:
+ insert_field (FLD_S, &inst->value, aarch64_get_variant (inst), 0);
+ break;
+
case sme_size_12_bhs:
case sme_size_12_b:
insert_field (FLD_SME_size_12, &inst->value,
@@ -3397,6 +3397,12 @@ aarch64_decode_variant_using_iclass (aarch64_inst *inst)
i = extract_field (FLD_SVE_tszh, inst->value, 0);
goto sve_shift;
+ case sme_size_12_bh:
+ variant = extract_field (FLD_S, inst->value, 0);
+ if (variant > 1)
+ return false;
+ break;
+
case sme_size_12_bhs:
variant = extract_field (FLD_SME_size_12, inst->value, 0);
if (variant >= 3)
@@ -2084,6 +2084,11 @@
{ \
QLF3(S_B,NIL,NIL), \
}
+#define OP_SVE_VUU_BH \
+{ \
+ QLF3(S_B,NIL,NIL), \
+ QLF3(S_H,NIL,NIL), \
+}
#define OP_SVE_VUU_BHS \
{ \
QLF3(S_B,NIL,NIL), \
@@ -6641,6 +6646,11 @@ const struct aarch64_opcode aarch64_opcode_table[] =
SME2p1_INSN ("movaz", 0xc0860200, 0xffff1f01, sme2_movaz, 0, OP2 (SME_Zdnx2, SME_ZA_array_vrss_1), OP_SVE_SS, 0, 0),
SME2p1_INSN ("movaz", 0xc0c60200, 0xffff1f01, sme2_movaz, 0, OP2 (SME_Zdnx2, SME_ZA_array_vrsd_1), OP_SVE_DD, 0, 0),
+ SME2p1_INSN ("luti2", 0xc09c4000, 0xfffc4c08, sme_size_12_bh, 0, OP3 (SME_Ztx2_STRIDED, SME_ZT0, SME_Zn_INDEX3_15), OP_SVE_VUU_BH, 0, 0),
+ SME2p1_INSN ("luti2", 0xc09c8000, 0xfffccc0c, sme_size_12_bh, 0, OP3 (SME_Ztx4_STRIDED, SME_ZT0, SME_Zn_INDEX2_16), OP_SVE_VUU_BH, 0, 0),
+ SME2p1_INSN ("luti4", 0xc09a4000, 0xfffe4c08, sme_size_12_bh, 0, OP3 (SME_Ztx2_STRIDED, SME_ZT0, SME_Zn_INDEX2_15), OP_SVE_VUU_BH, 0, 0),
+ SME2p1_INSN ("luti4", 0xc09a9000, 0xfffefc0c, sme_misc, 0, OP3 (SME_Ztx4_STRIDED, SME_ZT0, SME_Zn_INDEX1_16), OP_SVE_HUU, 0, 0),
+
/* SVE2p1 Instructions. */
SVE2p1_INSN("addqv",0x04052000, 0xff3fe000, sve2_urqvs, 0, OP3 (Vd, SVE_Pg3, SVE_Zn), OP_SVE_vUS_BHSD_BHSD, F_OPD_SIZE, 0),
SVE2p1_INSN("andqv",0x041e2000, 0xff3fe000, sve2_urqvs, 0, OP3 (Vd, SVE_Pg3, SVE_Zn), OP_SVE_vUS_BHSD_BHSD, F_OPD_SIZE, 0),