[v2,4/5] aarch64: Add support for FEAT_SME_B16B16 feature.

Message ID 20240719121438.2929286-5-srinath.parvathaneni@arm.com
State Superseded
Headers
Series aarch64: Add support for sme2 and sve2 BFloat16 feature. |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 fail Test failed
linaro-tcwg-bot/tcwg_binutils_check--master-arm success Test passed

Commit Message

Srinath Parvathaneni July 19, 2024, 12:14 p.m. UTC
  This patch adds support for SME ZA-targeting non-widening BFloat16 instructions,
under tick FEAT_SME_B16B16 and command line flag "+sme-b16b16".

FEAT_SME_B16B16 implements FEAT_SME2 and FEAT_SVE_B16B16, in accordance with that
"+sme-b16b16" enables "+sme2" and "+sve-b16b16".

The spec for this feature and instructions is availabe here [1]:
[1]: https://developer.arm.com/documentation/ddi0602/2024-06/SME-Instructions?lang=en
---
 gas/NEWS                                      |   2 +
 gas/config/tc-aarch64.c                       |   2 +
 gas/doc/c-aarch64.texi                        |   8 +-
 .../gas/aarch64/bfloat16-2-invalid.l          |  16 +-
 .../gas/aarch64/bfloat16-sme2-1-bad.d         |   4 +
 .../gas/aarch64/bfloat16-sme2-1-bad.l         | 193 ++++++++++++++++++
 .../gas/aarch64/bfloat16-sme2-1-bad.s         | 173 ++++++++++++++++
 gas/testsuite/gas/aarch64/bfloat16-sme2-1.d   | 122 +++++++++++
 gas/testsuite/gas/aarch64/bfloat16-sme2-1.s   | 139 +++++++++++++
 include/opcode/aarch64.h                      |   2 +
 opcodes/aarch64-tbl.h                         |  30 +++
 11 files changed, 681 insertions(+), 10 deletions(-)
 create mode 100644 gas/testsuite/gas/aarch64/bfloat16-sme2-1-bad.d
 create mode 100644 gas/testsuite/gas/aarch64/bfloat16-sme2-1-bad.l
 create mode 100644 gas/testsuite/gas/aarch64/bfloat16-sme2-1-bad.s
 create mode 100644 gas/testsuite/gas/aarch64/bfloat16-sme2-1.d
 create mode 100644 gas/testsuite/gas/aarch64/bfloat16-sme2-1.s
  

Patch

diff --git a/gas/NEWS b/gas/NEWS
index a677cc67947..aa73f277714 100644
--- a/gas/NEWS
+++ b/gas/NEWS
@@ -18,6 +18,8 @@ 
   
 * Add support for 'armv9.5-a' for -march in AArch64 GAS.
 
+* Support for 'BFloat16' feature in AArch64 GAS.
+
 * In x86 Intel syntax undue mnemonic suffixes are now warned about.  This is
   a first step towards rejecting their use where unjustified.
 
diff --git a/gas/config/tc-aarch64.c b/gas/config/tc-aarch64.c
index 0699bd0eaed..30f664b72c9 100644
--- a/gas/config/tc-aarch64.c
+++ b/gas/config/tc-aarch64.c
@@ -10744,6 +10744,8 @@  static const struct aarch64_option_cpu_value_table aarch64_features[] = {
 			AARCH64_FEATURES (2, FP8, SME2)},
   {"sme-f8f16",		AARCH64_FEATURE (SME_F8F16),
 			AARCH64_FEATURE (SME_F8F32)},
+  {"sme-b16b16",	AARCH64_FEATURES (3, SME_B16B16, SVE_B16B16, SME2),
+			AARCH64_NO_FEATURES},
   {NULL,		AARCH64_NO_FEATURES, AARCH64_NO_FEATURES},
 };
 
diff --git a/gas/doc/c-aarch64.texi b/gas/doc/c-aarch64.texi
index 8e46038a787..5682a53f442 100644
--- a/gas/doc/c-aarch64.texi
+++ b/gas/doc/c-aarch64.texi
@@ -161,8 +161,6 @@  automatically cause those extensions to be disabled.
 @headitem Extension @tab Depends upon @tab Description
 @item @code{aes} @tab @code{simd}
  @tab Enable the AES and PMULL cryptographic extensions.
-@c @item @code{b16b16} @tab @code{sve2}
-@c  @tab Enable BFloat16 to BFloat16 arithmetic for SVE2 and SME2.
 @item @code{bf16} @tab @code{fp}
  @tab Enable BFloat16 extension.
 @item @code{brbe} @tab
@@ -271,6 +269,8 @@  automatically cause those extensions to be disabled.
  @tab Enable the SM3 and SM4 cryptographic extensions.
 @item @code{sme} @tab @code{sve2}, @code{bf16}
  @tab Enable the Scalable Matrix Extension.
+@item @code{sme-b16b16} @tab @code{sme2}, @code{sve-b16b16}
+ @tab Enable SME ZA-targeting non-widening BFloat16 instructions..
 @item @code{sme-f8f16} @tab @code{sme-f8f32}
  @tab Enable the SME F8F16 Extension.
 @item @code{sme-f8f32} @tab @code{sme2}, @code{fp8}
@@ -301,6 +301,10 @@  automatically cause those extensions to be disabled.
  @tab Enable the SVE2 AES and PMULL Extensions.
 @item @code{sve2-bitperm} @tab @code{sve2}
  @tab Enable the SVE2 BITPERM Extension.
+@item @code{sve-b16b16} @tab @code{sve2}
+ @tab Enable SVE Z-targeting non-widening BFloat16 instructions in Non-streaming SVE mode.
+@item @code{sve-b16b16} @tab @code{sme2}
+ @tab Enable SME Z-targeting multi-vector non-widening BFloat16 instructions.
 @item @code{sve2-sha3} @tab @code{sve2}, @code{sha3}
  @tab Enable the SVE2 SHA3 Extension.
 @item @code{sve2-sm4} @tab @code{sve2}, @code{sm4}
diff --git a/gas/testsuite/gas/aarch64/bfloat16-2-invalid.l b/gas/testsuite/gas/aarch64/bfloat16-2-invalid.l
index 7742e9d4865..cd8981017ab 100644
--- a/gas/testsuite/gas/aarch64/bfloat16-2-invalid.l
+++ b/gas/testsuite/gas/aarch64/bfloat16-2-invalid.l
@@ -165,8 +165,8 @@ 
 .*: Error: operand mismatch -- `bfadd z31.b,z31.s,z31.d'
 .*: Info:    did you mean this\?
 .*: Info:    	bfadd z31.h, z31.h, z31.h
-.*: Error: expected an SVE vector register at operand 1 -- `bfadd {z0.h},z0.h,z0.h'
-.*: Error: expected an SVE vector register at operand 1 -- `bfadd {z0.h-z0.h},z0.h'
+.*: Error: expected a register at operand 1 -- `bfadd {z0.h},z0.h,z0.h'
+.*: Error: expected a register at operand 1 -- `bfadd {z0.h-z0.h},z0.h'
 .*: Error: comma expected between operands at operand 3 -- `bfadd z0.h,z0.h'
 .*: Error: operand mismatch -- `bfclamp z0.b,z0.h,z0.h'
 .*: Info:    did you mean this\?
@@ -201,8 +201,8 @@ 
 .*: Error: operand mismatch -- `bfmla z31.b,z31.s,z31.d\[8\]'
 .*: Info:    did you mean this\?
 .*: Info:    	bfmla z31.h, z31.h, z31.h\[8\]
-.*: Error: expected an SVE vector register at operand 1 -- `bfmla {z0.h},z0.h,z0.h\[1\]'
-.*: Error: expected an SVE vector register at operand 1 -- `bfmla {z0.h-z0.h},z0.h\[2\]'
+.*: Error: expected a register at operand 1 -- `bfmla {z0.h},z0.h,z0.h\[1\]'
+.*: Error: expected a register at operand 1 -- `bfmla {z0.h-z0.h},z0.h\[2\]'
 .*: Error: expected an SVE predicate register at operand 2 -- `bfmla z0.h,z0.h\[3\]'
 .*: Error: operand mismatch -- `bfmls z0.b,z0.h,z0.h\[0\]'
 .*: Info:    did you mean this\?
@@ -219,8 +219,8 @@ 
 .*: Error: operand mismatch -- `bfmls z31.b,z31.s,z31.d\[8\]'
 .*: Info:    did you mean this\?
 .*: Info:    	bfmls z31.h, z31.h, z31.h\[8\]
-.*: Error: expected an SVE vector register at operand 1 -- `bfmls {z0.h},z0.h,z0.h\[1\]'
-.*: Error: expected an SVE vector register at operand 1 -- `bfmls {z0.h-z0.h},z0.h\[2\]'
+.*: Error: expected a register at operand 1 -- `bfmls {z0.h},z0.h,z0.h\[1\]'
+.*: Error: expected a register at operand 1 -- `bfmls {z0.h-z0.h},z0.h\[2\]'
 .*: Error: expected an SVE predicate register at operand 2 -- `bfmls z0.h,z0.h\[3\]'
 .*: Error: operand mismatch -- `bfmul z0.b,z0.h,z0.h\[0\]'
 .*: Info:    did you mean this\?
@@ -255,8 +255,8 @@ 
 .*: Error: operand mismatch -- `bfsub z31.b,z31.s,z31.d'
 .*: Info:    did you mean this\?
 .*: Info:    	bfsub z31.h, z31.h, z31.h
-.*: Error: expected an SVE vector register at operand 1 -- `bfsub {z0.h},z0.h,z0.h'
-.*: Error: expected an SVE vector register at operand 1 -- `bfsub {z0.h-z0.h},z0.h'
+.*: Error: expected a register at operand 1 -- `bfsub {z0.h},z0.h,z0.h'
+.*: Error: expected a register at operand 1 -- `bfsub {z0.h-z0.h},z0.h'
 .*: Error: comma expected between operands at operand 3 -- `bfsub z0.h,z0.h'
 .*: Warning: output register of preceding `movprfx' expected as output at operand 1 -- `bfclamp z1.h,z3.h,z16.h'
 .*: Warning: output register of preceding `movprfx' not used in current instruction at operand 1 -- `bfmla z10.h,z16.h,z3.h\[7\]'
diff --git a/gas/testsuite/gas/aarch64/bfloat16-sme2-1-bad.d b/gas/testsuite/gas/aarch64/bfloat16-sme2-1-bad.d
new file mode 100644
index 00000000000..e03774e1733
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/bfloat16-sme2-1-bad.d
@@ -0,0 +1,4 @@ 
+#name: Test of invalid SME2 non-widening BFloat16 instructions.
+#as: -march=armv9.4-a+sme-b16b16
+#source: bfloat16-sme2-1-bad.s
+#error_output: bfloat16-sme2-1-bad.l
diff --git a/gas/testsuite/gas/aarch64/bfloat16-sme2-1-bad.l b/gas/testsuite/gas/aarch64/bfloat16-sme2-1-bad.l
new file mode 100644
index 00000000000..c77c9457e0e
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/bfloat16-sme2-1-bad.l
@@ -0,0 +1,193 @@ 
+.*: Assembler messages:
+.*: Error: operand mismatch -- `bfadd za.s\[w8,0,vgx2\],{z0.h-z1.h}'
+.*: Info:    did you mean this\?
+.*: Info:    	bfadd za.h\[w8, 0, vgx2\], {z0.h-z1.h}
+.*: Error: too many registers in vector register list at operand 2 -- `bfadd za.h\[w13,0,vgx2\],{z1.h-z0.h}'
+.*: Error: invalid vector group size at operand 1 -- `bfadd za.h\[w8,11,vgx3\],{z0.h-z1.h}'
+.*: Error: too many registers in vector register list at operand 2 -- `bfadd za.h\[w8,0,vgx2\],{z0.h-z4.h}'
+.*: Error: operand mismatch -- `bfadd za.s\[w8,0,vgx4\],{z0.h-z3.h}'
+.*: Info:    did you mean this\?
+.*: Info:    	bfadd za.h\[w8, 0, vgx4\], {z0.h-z3.h}
+.*: Error: too many registers in vector register list at operand 2 -- `bfadd za.h\[w14,0,vgx4\],{z10.h-z3.h}'
+.*: Error: invalid vector group size at operand 1 -- `bfadd za.h\[w8,15,vgx1\],{z3.h-z2.h}'
+.*: Error: expected a list of 4 registers at operand 2 -- `bfadd za.h\[w8,0,vgx4\],{z30.h-z31.h}'
+.*: Error: operand mismatch -- `bfsub za.s\[w8,0,vgx2\],{z0.h-z1.h}'
+.*: Info:    did you mean this\?
+.*: Info:    	bfsub za.h\[w8, 0, vgx2\], {z0.h-z1.h}
+.*: Error: too many registers in vector register list at operand 2 -- `bfsub za.h\[w13,0,vgx2\],{z1.h-z0.h}'
+.*: Error: invalid vector group size at operand 1 -- `bfsub za.h\[w8,11,vgx3\],{z0.h-z1.h}'
+.*: Error: too many registers in vector register list at operand 2 -- `bfsub za.h\[w8,0,vgx2\],{z0.h-z4.h}'
+.*: Error: operand mismatch -- `bfsub za.s\[w8,0,vgx4\],{z0.h-z3.h}'
+.*: Info:    did you mean this\?
+.*: Info:    	bfsub za.h\[w8, 0, vgx4\], {z0.h-z3.h}
+.*: Error: too many registers in vector register list at operand 2 -- `bfsub za.h\[w14,0,vgx4\],{z10.h-z3.h}'
+.*: Error: invalid vector group size at operand 1 -- `bfsub za.h\[w8,15,vgx1\],{z3.h-z2.h}'
+.*: Error: expected a list of 4 registers at operand 2 -- `bfsub za.h\[w8,0,vgx4\],{z30.h-z31.h}'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.s\[w8,0,vgx3\],{z0.h-z1.h},z0.h\[0\]'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.h\[w14,0,vgx3\],{z10.h-z1.h},z0.h\[0\]'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.h\[w8,15,vgx3\],{z0.h-z1.h},z0.h\[0\]'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.d\[w8,0,vgx3\],{z30.h-z31.h},z0.h\[0\]'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.h\[w8,0,vgx3\],{z0.h-z1.h},z15.h\[0\]'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.h\[w8,0,vgx3\],{z0.h-z1.h},z0.h\[7\]'
+.*: Error: expected a list of 2 registers at operand 2 -- `bfmla za.h\[w8,0,vgx2\],{z0.h},z0.h\[7\]'
+.*: Error: expected a list of 2 registers at operand 2 -- `bfmla za.h\[w8,0,vgx2\],{z0.h},z0.h'
+.*: Error: missing type suffix at operand 2 -- `bfmla za.h\[w8,0,vgx2\],{z0-z1},z0.h\[7\]'
+.*: Error: missing type suffix at operand 2 -- `bfmla za.h\[w8,0,vgx2\],{z0-z1}'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.b\[w8,0,vgx1\],{z0.h-z1.h},z0.h\[0\]'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.h\[w14,0,vgx1\],{z10.h-z1.h},z0.h\[0\]'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.h\[w8,15,vgx1\],{z0.h-z1.h},z0.h\[0\]'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.h\[w8,0,vgx1\],{z30.h-z31.h},z0.h\[0\]'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.s\[w8,0,vgx1\],{z0.h-z1.h},z15.h\[0\]'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.h\[w8,0,vgx1\],{z0.h-z1.h},z0.h\[7\]'
+.*: Error: expected a list of 4 registers at operand 2 -- `bfmla za.h\[w8,0,vgx4\],{z0.h},z0.h\[7\]'
+.*: Error: expected a list of 4 registers at operand 2 -- `bfmla za.h\[w8,0,vgx4\],{z0.h},z0.h'
+.*: Error: missing type suffix at operand 2 -- `bfmla za.h\[w8,0,vgx4\],{z0-z1},z0.h\[7\]'
+.*: Error: missing type suffix at operand 2 -- `bfmla za.h\[w8,0,vgx4\],{z0-z1}'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.s\[w8,0,vgx3\],{z0.h-z1.h},z0.h'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.h\[w14,0,vgx3\],{z10.h-z1.h},z0.h'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.h\[w8,15,vgx3\],{z0.h-z1.h},z0.h'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.d\[w8,0,vgx3\],{z31.h-z0.h},z0.h'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.h\[w8,0,vgx3\],{z0.h-z1.h},z15.h'
+.*: Error: expected a list of 2 registers at operand 2 -- `bfmla za.h\[w8,0,vgx2\],{z0.h},z15.h'
+.*: Error: operand mismatch -- `bfmla za.h\[w8,0,vgx2\],{z0.h-z1.h},z15'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmla za.h\[w8, 0, vgx2\], {z0.h-z1.h}, z15.h
+.*: Error: operand mismatch -- `bfmla za.h\[w8,0,vgx2\],{z0.h-z1.h},z20'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmla za.h\[w8, 0, vgx2\], {z0.h-z1.h}, z20.h
+.*: Error: comma expected between operands at operand 3 -- `bfmla za.h\[w8,0,vgx2\],{z0.h-z1.h}'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.h\[w8,0,vgx1\],{z0.h-z1.h},z0.h'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.s\[w14,0,vgx1\],{z10.h-z1.h},z0.h'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.h\[w8,15,vgx1\],{z0.h-z1.h},z0.h'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.h\[w8,0,vgx1\],{z31.h-z2.h},z0.h'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.b\[w8,0,vgx1\],{z0.h-z1.h},z15.h'
+.*: Error: expected a list of 4 registers at operand 2 -- `bfmla za.h\[w8,0,vgx4\],{z0.h},z15.h'
+.*: Error: operand mismatch -- `bfmla za.h\[w8,0,vgx4\],{z0.h-z1.h},z15'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmla za.h\[w8, 0, vgx4\], {z0.h-z1.h}, z15.h
+.*: Error: operand mismatch -- `bfmla za.h\[w8,0,vgx4\],{z0.h-z1.h},z20'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmla za.h\[w8, 0, vgx4\], {z0.h-z1.h}, z20.h
+.*: Error: comma expected between operands at operand 3 -- `bfmla za.h\[w8,0,vgx4\],{z0.h-z1.h}'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.s\[w8,0,vgx3\],{z0.h-z1.h},{z0.h-z1.h}'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.h\[w14,0,vgx3\],{z10.h-z1.h},{z0.h-z1.h}'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.d\[w8,15,vgx3\],{z0.h-z1.h},{z0.h-z1.h}'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.h\[w8,0,vgx3\],{z30.h-z31.h},{z0.h-z1.h}'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.b\[w8,0,vgx3\],{z0.h-z1.h},{z30.h-z31.h}'
+.*: Error: expected a list of 2 registers at operand 2 -- `bfmla za.h\[w8,0,vgx2\],{z0.h},{z30.h-z31.h}'
+.*: Error: expected a list of 2 registers at operand 3 -- `bfmla za.h\[w8,0,vgx2\],{z0.h-z1.h},{z30.h}'
+.*: Error: expected a list of 2 registers at operand 2 -- `bfmla za.h\[w8,0,vgx2\],{z0.h},{z30.h}'
+.*: Error: expected a list of 2 registers at operand 2 -- `bfmla za.h\[w8,0,vgx2\],{z0.h},{z30.h}'
+.*: Error: operand mismatch -- `bfmla za.b\[w8,20,vgx2\],{z0.h},{z30.h}'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmla za.h\[w8, 20, vgx2\], {z0.h}, {z30.h}
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.s\[w8,0,vgx1\],{z0.h-z1.h},{z0.h-z3.h}'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.h\[w14,0,vgx1\],{z10.h-z1.h},{z0.h-z3.h}'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.s\[w8,15,vgx1\],{z0.h-z1.h},{z0.h-z3.h}'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.h\[w8,0,vgx1\],{z30.h-z31.h},{z0.h-z1.h}'
+.*: Error: invalid vector group size at operand 1 -- `bfmla za.b\[w8,0,vgx1\],{z0.h-z1.h},{z30.h-z31.h}'
+.*: Error: expected a list of 4 registers at operand 2 -- `bfmla za.h\[w8,0,vgx4\],{z0.h},{z30.h-z31.h}'
+.*: Error: expected a list of 4 registers at operand 2 -- `bfmla za.h\[w8,0,vgx4\],{z0.h-z1.h},{z30.h}'
+.*: Error: expected a list of 4 registers at operand 2 -- `bfmla za.h\[w8,0,vgx4\],{z0.h},{z30.h}'
+.*: Error: expected a list of 4 registers at operand 2 -- `bfmla za.h\[w8,0,vgx4\],{z0.h},{z30.h}'
+.*: Error: operand mismatch -- `bfmla za.b\[w8,20,vgx4\],{z0.h},{z30.h}'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmla za.h\[w8, 20, vgx4\], {z0.h}, {z30.h}
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.s\[w8,0,vgx3\],{z0.h-z1.h},z0.h\[0\]'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.h\[w14,0,vgx3\],{z10.h-z1.h},z0.h\[0\]'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.h\[w8,15,vgx3\],{z0.h-z1.h},z0.h\[0\]'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.d\[w8,0,vgx3\],{z30.h-z31.h},z0.h\[0\]'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.h\[w8,0,vgx3\],{z0.h-z1.h},z15.h\[0\]'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.h\[w8,0,vgx3\],{z0.h-z1.h},z0.h\[7\]'
+.*: Error: expected a list of 2 registers at operand 2 -- `bfmls za.h\[w8,0,vgx2\],{z0.h},z0.h\[7\]'
+.*: Error: expected a list of 2 registers at operand 2 -- `bfmls za.h\[w8,0,vgx2\],{z0.h},z0.h'
+.*: Error: missing type suffix at operand 2 -- `bfmls za.h\[w8,0,vgx2\],{z0-z1},z0.h\[7\]'
+.*: Error: missing type suffix at operand 2 -- `bfmls za.h\[w8,0,vgx2\],{z0-z1}'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.b\[w8,0,vgx1\],{z0.h-z1.h},z0.h\[0\]'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.h\[w14,0,vgx1\],{z10.h-z1.h},z0.h\[0\]'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.h\[w8,15,vgx1\],{z0.h-z1.h},z0.h\[0\]'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.h\[w8,0,vgx1\],{z30.h-z31.h},z0.h\[0\]'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.s\[w8,0,vgx1\],{z0.h-z1.h},z15.h\[0\]'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.h\[w8,0,vgx1\],{z0.h-z1.h},z0.h\[7\]'
+.*: Error: expected a list of 4 registers at operand 2 -- `bfmls za.h\[w8,0,vgx4\],{z0.h},z0.h\[7\]'
+.*: Error: expected a list of 4 registers at operand 2 -- `bfmls za.h\[w8,0,vgx4\],{z0.h},z0.h'
+.*: Error: missing type suffix at operand 2 -- `bfmls za.h\[w8,0,vgx4\],{z0-z1},z0.h\[7\]'
+.*: Error: missing type suffix at operand 2 -- `bfmls za.h\[w8,0,vgx4\],{z0-z1}'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.s\[w8,0,vgx3\],{z0.h-z1.h},z0.h'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.h\[w14,0,vgx3\],{z10.h-z1.h},z0.h'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.h\[w8,15,vgx3\],{z0.h-z1.h},z0.h'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.d\[w8,0,vgx3\],{z31.h-z0.h},z0.h'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.h\[w8,0,vgx3\],{z0.h-z1.h},z15.h'
+.*: Error: expected a list of 2 registers at operand 2 -- `bfmls za.h\[w8,0,vgx2\],{z0.h},z15.h'
+.*: Error: operand mismatch -- `bfmls za.h\[w8,0,vgx2\],{z0.h-z1.h},z15'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmls za.h\[w8, 0, vgx2\], {z0.h-z1.h}, z15.h
+.*: Error: operand mismatch -- `bfmls za.h\[w8,0,vgx2\],{z0.h-z1.h},z20'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmls za.h\[w8, 0, vgx2\], {z0.h-z1.h}, z20.h
+.*: Error: comma expected between operands at operand 3 -- `bfmls za.h\[w8,0,vgx2\],{z0.h-z1.h}'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.h\[w8,0,vgx1\],{z0.h-z1.h},z0.h'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.s\[w14,0,vgx1\],{z10.h-z1.h},z0.h'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.h\[w8,15,vgx1\],{z0.h-z1.h},z0.h'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.h\[w8,0,vgx1\],{z31.h-z2.h},z0.h'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.b\[w8,0,vgx1\],{z0.h-z1.h},z15.h'
+.*: Error: expected a list of 4 registers at operand 2 -- `bfmls za.h\[w8,0,vgx4\],{z0.h},z15.h'
+.*: Error: operand mismatch -- `bfmls za.h\[w8,0,vgx4\],{z0.h-z1.h},z15'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmls za.h\[w8, 0, vgx4\], {z0.h-z1.h}, z15.h
+.*: Error: operand mismatch -- `bfmls za.h\[w8,0,vgx4\],{z0.h-z1.h},z20'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmls za.h\[w8, 0, vgx4\], {z0.h-z1.h}, z20.h
+.*: Error: comma expected between operands at operand 3 -- `bfmls za.h\[w8,0,vgx4\],{z0.h-z1.h}'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.s\[w8,0,vgx3\],{z0.h-z1.h},{z0.h-z1.h}'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.h\[w14,0,vgx3\],{z10.h-z1.h},{z0.h-z1.h}'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.d\[w8,15,vgx3\],{z0.h-z1.h},{z0.h-z1.h}'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.h\[w8,0,vgx3\],{z30.h-z31.h},{z0.h-z1.h}'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.b\[w8,0,vgx3\],{z0.h-z1.h},{z30.h-z31.h}'
+.*: Error: expected a list of 2 registers at operand 2 -- `bfmls za.h\[w8,0,vgx2\],{z0.h},{z30.h-z31.h}'
+.*: Error: expected a list of 2 registers at operand 3 -- `bfmls za.h\[w8,0,vgx2\],{z0.h-z1.h},{z30.h}'
+.*: Error: expected a list of 2 registers at operand 2 -- `bfmls za.h\[w8,0,vgx2\],{z0.h},{z30.h}'
+.*: Error: expected a list of 2 registers at operand 2 -- `bfmls za.h\[w8,0,vgx2\],{z0.h},{z30.h}'
+.*: Error: operand mismatch -- `bfmls za.b\[w8,20,vgx2\],{z0.h},{z30.h}'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmls za.h\[w8, 20, vgx2\], {z0.h}, {z30.h}
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.s\[w8,0,vgx1\],{z0.h-z1.h},{z0.h-z3.h}'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.h\[w14,0,vgx1\],{z10.h-z1.h},{z0.h-z3.h}'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.s\[w8,15,vgx1\],{z0.h-z1.h},{z0.h-z3.h}'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.h\[w8,0,vgx1\],{z30.h-z31.h},{z0.h-z1.h}'
+.*: Error: invalid vector group size at operand 1 -- `bfmls za.b\[w8,0,vgx1\],{z0.h-z1.h},{z30.h-z31.h}'
+.*: Error: expected a list of 4 registers at operand 2 -- `bfmls za.h\[w8,0,vgx4\],{z0.h},{z30.h-z31.h}'
+.*: Error: expected a list of 4 registers at operand 2 -- `bfmls za.h\[w8,0,vgx4\],{z0.h-z1.h},{z30.h}'
+.*: Error: expected a list of 4 registers at operand 2 -- `bfmls za.h\[w8,0,vgx4\],{z0.h},{z30.h}'
+.*: Error: expected a list of 4 registers at operand 2 -- `bfmls za.h\[w8,0,vgx4\],{z0.h},{z30.h}'
+.*: Error: operand mismatch -- `bfmls za.b\[w8,20,vgx4\],{z0.h},{z30.h}'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmls za.h\[w8, 20, vgx4\], {z0.h}, {z30.h}
+.*: Error: operand mismatch -- `bfmopa ZA1.h,p0,p0/m,z0.h,z0.h'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmopa za1.h, p0/m, p0/m, z0.h, z0.h
+.*: Error: operand mismatch -- `bfmopa ZA0.h,p7/m,p0,z0.h,z0.h'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmopa za0.h, p7/m, p0/m, z0.h, z0.h
+.*: Error: operand mismatch -- `bfmopa ZA0.h,p0/m,p7/m,z0.s,z0.s'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmopa za0.h, p0/m, p7/m, z0.h, z0.h
+.*: Error: operand mismatch -- `bfmopa ZA0.h,p0/m,p0/m,z31.d,z0.d'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmopa za0.h, p0/m, p0/m, z31.h, z0.h
+.*: Error: ZA tile number out of range at operand 1 -- `bfmopa ZA2.h,p0/m,p8/m,z0.s,z31.b'
+.*: Error: ZA tile number out of range at operand 1 -- `bfmopa ZA4.h,p15/m,p11/m,z0.s,z31.b'
+.*: Error: operand mismatch -- `bfmops ZA1.h,p0,p0/m,z0.h,z0.h'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmops za1.h, p0/m, p0/m, z0.h, z0.h
+.*: Error: operand mismatch -- `bfmops ZA0.h,p7/m,p0,z0.h,z0.h'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmops za0.h, p7/m, p0/m, z0.h, z0.h
+.*: Error: operand mismatch -- `bfmops ZA0.h,p0/m,p7/m,z0.s,z0.s'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmops za0.h, p0/m, p7/m, z0.h, z0.h
+.*: Error: operand mismatch -- `bfmops ZA0.h,p0/m,p0/m,z31.d,z0.d'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmops za0.h, p0/m, p0/m, z31.h, z0.h
+.*: Error: ZA tile number out of range at operand 1 -- `bfmops ZA2.h,p0/m,p8/m,z0.s,z31.b'
+.*: Error: ZA tile number out of range at operand 1 -- `bfmops ZA4.h,p15/m,p11/m,z0.s,z31.b'
diff --git a/gas/testsuite/gas/aarch64/bfloat16-sme2-1-bad.s b/gas/testsuite/gas/aarch64/bfloat16-sme2-1-bad.s
new file mode 100644
index 00000000000..1ba22e18e3c
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/bfloat16-sme2-1-bad.s
@@ -0,0 +1,173 @@ 
+/* BFADD.  */
+bfadd    za.s[w8, 0, vgx2], {z0.h - z1.h}
+bfadd    za.h[w13, 0, vgx2], {z1.h - z0.h}
+bfadd    za.h[w8, 11, vgx3], {z0.h - z1.h}
+bfadd    za.h[w8, 0, vgx2], {z0.h - z4.h}
+
+bfadd    za.s[w8, 0, vgx4], {z0.h - z3.h}
+bfadd    za.h[w14, 0, vgx4], {z10.h - z3.h}
+bfadd    za.h[w8, 15, vgx1], {z3.h - z2.h}
+bfadd    za.h[w8, 0, vgx4], {z30.h - z31.h}
+
+/* BFSUB.  */
+bfsub    za.s[w8, 0, vgx2], {z0.h - z1.h}
+bfsub    za.h[w13, 0, vgx2], {z1.h - z0.h}
+bfsub    za.h[w8, 11, vgx3], {z0.h - z1.h}
+bfsub    za.h[w8, 0, vgx2], {z0.h - z4.h}
+
+bfsub    za.s[w8, 0, vgx4], {z0.h - z3.h}
+bfsub    za.h[w14, 0, vgx4], {z10.h - z3.h}
+bfsub    za.h[w8, 15, vgx1], {z3.h - z2.h}
+bfsub    za.h[w8, 0, vgx4], {z30.h - z31.h}
+
+/* BFMLA (multiple and indexed vector).  */
+bfmla    za.s[w8, 0, vgx3], {z0.h - z1.h}, z0.h[0]
+bfmla    za.h[w14, 0, vgx3], {z10.h - z1.h}, z0.h[0]
+bfmla    za.h[w8, 15, vgx3], {z0.h - z1.h}, z0.h[0]
+bfmla    za.d[w8, 0, vgx3], {z30.h - z31.h}, z0.h[0]
+bfmla    za.h[w8, 0, vgx3], {z0.h - z1.h}, z15.h[0]
+bfmla    za.h[w8, 0, vgx3], {z0.h - z1.h}, z0.h[7]
+bfmla    za.h[w8, 0, vgx2], {z0.h}, z0.h[7]
+bfmla    za.h[w8, 0, vgx2], {z0.h}, z0.h
+bfmla    za.h[w8, 0, vgx2], {z0 - z1}, z0.h[7]
+bfmla    za.h[w8, 0, vgx2], {z0 - z1}
+
+bfmla    za.b[w8, 0, vgx1], {z0.h - z1.h}, z0.h[0]
+bfmla    za.h[w14, 0, vgx1], {z10.h - z1.h}, z0.h[0]
+bfmla    za.h[w8, 15, vgx1], {z0.h - z1.h}, z0.h[0]
+bfmla    za.h[w8, 0, vgx1], {z30.h - z31.h}, z0.h[0]
+bfmla    za.s[w8, 0, vgx1], {z0.h - z1.h}, z15.h[0]
+bfmla    za.h[w8, 0, vgx1], {z0.h - z1.h}, z0.h[7]
+bfmla    za.h[w8, 0, vgx4], {z0.h}, z0.h[7]
+bfmla    za.h[w8, 0, vgx4], {z0.h}, z0.h
+bfmla    za.h[w8, 0, vgx4], {z0 - z1}, z0.h[7]
+bfmla    za.h[w8, 0, vgx4], {z0 - z1}
+
+/* BFMLA (multiple and single vector).  */
+bfmla    za.s[w8, 0, vgx3], {z0.h - z1.h}, z0.h
+bfmla    za.h[w14, 0, vgx3], {z10.h - z1.h}, z0.h
+bfmla    za.h[w8, 15, vgx3], {z0.h - z1.h}, z0.h
+bfmla    za.d[w8, 0, vgx3], {z31.h - z0.h}, z0.h
+bfmla    za.h[w8, 0, vgx3], {z0.h - z1.h}, z15.h
+bfmla    za.h[w8, 0, vgx2], {z0.h}, z15.h
+bfmla    za.h[w8, 0, vgx2], {z0.h -z1.h}, z15
+bfmla    za.h[w8, 0, vgx2], {z0.h -z1.h}, z20
+bfmla    za.h[w8, 0, vgx2], {z0.h -z1.h}
+
+bfmla    za.h[w8, 0, vgx1], {z0.h - z1.h}, z0.h
+bfmla    za.s[w14, 0, vgx1], {z10.h - z1.h}, z0.h
+bfmla    za.h[w8, 15, vgx1], {z0.h - z1.h}, z0.h
+bfmla    za.h[w8, 0, vgx1], {z31.h - z2.h}, z0.h
+bfmla    za.b[w8, 0, vgx1], {z0.h - z1.h}, z15.h
+bfmla    za.h[w8, 0, vgx4], {z0.h}, z15.h
+bfmla    za.h[w8, 0, vgx4], {z0.h -z1.h}, z15
+bfmla    za.h[w8, 0, vgx4], {z0.h -z1.h}, z20
+bfmla    za.h[w8, 0, vgx4], {z0.h -z1.h}
+
+/* BFMLA (multiple vectors).  */
+bfmla    za.s[w8, 0, vgx3], {z0.h - z1.h}, {z0.h - z1.h}
+bfmla    za.h[w14, 0, vgx3], {z10.h - z1.h}, {z0.h - z1.h}
+bfmla    za.d[w8, 15, vgx3], {z0.h - z1.h}, {z0.h - z1.h}
+bfmla    za.h[w8, 0, vgx3], {z30.h - z31.h}, {z0.h - z1.h}
+bfmla    za.b[w8, 0, vgx3], {z0.h - z1.h}, {z30.h - z31.h}
+bfmla    za.h[w8, 0, vgx2], {z0.h}, {z30.h - z31.h}
+bfmla    za.h[w8, 0, vgx2], {z0.h - z1.h}, {z30.h}
+bfmla    za.h[w8, 0, vgx2], {z0.h}, {z30.h}
+bfmla    za.h[w8, 0, vgx2], {z0.h}, {z30.h}
+bfmla    za.b[w8, 20, vgx2], {z0.h}, {z30.h}
+
+bfmla    za.s[w8, 0, vgx1], {z0.h - z1.h}, {z0.h - z3.h}
+bfmla    za.h[w14, 0, vgx1], {z10.h - z1.h}, {z0.h - z3.h}
+bfmla    za.s[w8, 15, vgx1], {z0.h - z1.h}, {z0.h - z3.h}
+bfmla    za.h[w8, 0, vgx1], {z30.h - z31.h}, {z0.h - z1.h}
+bfmla    za.b[w8, 0, vgx1], {z0.h - z1.h}, {z30.h - z31.h}
+bfmla    za.h[w8, 0, vgx4], {z0.h}, {z30.h - z31.h}
+bfmla    za.h[w8, 0, vgx4], {z0.h - z1.h}, {z30.h}
+bfmla    za.h[w8, 0, vgx4], {z0.h}, {z30.h}
+bfmla    za.h[w8, 0, vgx4], {z0.h}, {z30.h}
+bfmla    za.b[w8, 20, vgx4], {z0.h}, {z30.h}
+
+/* BFMLS (multiple and indexed vector).  */
+bfmls    za.s[w8, 0, vgx3], {z0.h - z1.h}, z0.h[0]
+bfmls    za.h[w14, 0, vgx3], {z10.h - z1.h}, z0.h[0]
+bfmls    za.h[w8, 15, vgx3], {z0.h - z1.h}, z0.h[0]
+bfmls    za.d[w8, 0, vgx3], {z30.h - z31.h}, z0.h[0]
+bfmls    za.h[w8, 0, vgx3], {z0.h - z1.h}, z15.h[0]
+bfmls    za.h[w8, 0, vgx3], {z0.h - z1.h}, z0.h[7]
+bfmls    za.h[w8, 0, vgx2], {z0.h}, z0.h[7]
+bfmls    za.h[w8, 0, vgx2], {z0.h}, z0.h
+bfmls    za.h[w8, 0, vgx2], {z0 - z1}, z0.h[7]
+bfmls    za.h[w8, 0, vgx2], {z0 - z1}
+
+bfmls    za.b[w8, 0, vgx1], {z0.h - z1.h}, z0.h[0]
+bfmls    za.h[w14, 0, vgx1], {z10.h - z1.h}, z0.h[0]
+bfmls    za.h[w8, 15, vgx1], {z0.h - z1.h}, z0.h[0]
+bfmls    za.h[w8, 0, vgx1], {z30.h - z31.h}, z0.h[0]
+bfmls    za.s[w8, 0, vgx1], {z0.h - z1.h}, z15.h[0]
+bfmls    za.h[w8, 0, vgx1], {z0.h - z1.h}, z0.h[7]
+bfmls    za.h[w8, 0, vgx4], {z0.h}, z0.h[7]
+bfmls    za.h[w8, 0, vgx4], {z0.h}, z0.h
+bfmls    za.h[w8, 0, vgx4], {z0 - z1}, z0.h[7]
+bfmls    za.h[w8, 0, vgx4], {z0 - z1}
+
+/* BFMLS (multiple and single vector).  */
+bfmls    za.s[w8, 0, vgx3], {z0.h - z1.h}, z0.h
+bfmls    za.h[w14, 0, vgx3], {z10.h - z1.h}, z0.h
+bfmls    za.h[w8, 15, vgx3], {z0.h - z1.h}, z0.h
+bfmls    za.d[w8, 0, vgx3], {z31.h - z0.h}, z0.h
+bfmls    za.h[w8, 0, vgx3], {z0.h - z1.h}, z15.h
+bfmls    za.h[w8, 0, vgx2], {z0.h}, z15.h
+bfmls    za.h[w8, 0, vgx2], {z0.h -z1.h}, z15
+bfmls    za.h[w8, 0, vgx2], {z0.h -z1.h}, z20
+bfmls    za.h[w8, 0, vgx2], {z0.h -z1.h}
+
+bfmls    za.h[w8, 0, vgx1], {z0.h - z1.h}, z0.h
+bfmls    za.s[w14, 0, vgx1], {z10.h - z1.h}, z0.h
+bfmls    za.h[w8, 15, vgx1], {z0.h - z1.h}, z0.h
+bfmls    za.h[w8, 0, vgx1], {z31.h - z2.h}, z0.h
+bfmls    za.b[w8, 0, vgx1], {z0.h - z1.h}, z15.h
+bfmls    za.h[w8, 0, vgx4], {z0.h}, z15.h
+bfmls    za.h[w8, 0, vgx4], {z0.h -z1.h}, z15
+bfmls    za.h[w8, 0, vgx4], {z0.h -z1.h}, z20
+bfmls    za.h[w8, 0, vgx4], {z0.h -z1.h}
+
+/* BFMLS (multiple vectors).  */
+bfmls    za.s[w8, 0, vgx3], {z0.h - z1.h}, {z0.h - z1.h}
+bfmls    za.h[w14, 0, vgx3], {z10.h - z1.h}, {z0.h - z1.h}
+bfmls    za.d[w8, 15, vgx3], {z0.h - z1.h}, {z0.h - z1.h}
+bfmls    za.h[w8, 0, vgx3], {z30.h - z31.h}, {z0.h - z1.h}
+bfmls    za.b[w8, 0, vgx3], {z0.h - z1.h}, {z30.h - z31.h}
+bfmls    za.h[w8, 0, vgx2], {z0.h}, {z30.h - z31.h}
+bfmls    za.h[w8, 0, vgx2], {z0.h - z1.h}, {z30.h}
+bfmls    za.h[w8, 0, vgx2], {z0.h}, {z30.h}
+bfmls    za.h[w8, 0, vgx2], {z0.h}, {z30.h}
+bfmls    za.b[w8, 20, vgx2], {z0.h}, {z30.h}
+
+bfmls    za.s[w8, 0, vgx1], {z0.h - z1.h}, {z0.h - z3.h}
+bfmls    za.h[w14, 0, vgx1], {z10.h - z1.h}, {z0.h - z3.h}
+bfmls    za.s[w8, 15, vgx1], {z0.h - z1.h}, {z0.h - z3.h}
+bfmls    za.h[w8, 0, vgx1], {z30.h - z31.h}, {z0.h - z1.h}
+bfmls    za.b[w8, 0, vgx1], {z0.h - z1.h}, {z30.h - z31.h}
+bfmls    za.h[w8, 0, vgx4], {z0.h}, {z30.h - z31.h}
+bfmls    za.h[w8, 0, vgx4], {z0.h - z1.h}, {z30.h}
+bfmls    za.h[w8, 0, vgx4], {z0.h}, {z30.h}
+bfmls    za.h[w8, 0, vgx4], {z0.h}, {z30.h}
+bfmls    za.b[w8, 20, vgx4], {z0.h}, {z30.h}
+
+/* BFMOPA.  */
+bfmopa ZA0.s, p0/m, p0/m, z0.h, z0.h
+bfmopa ZA1.h, p0, p0/m, z0.h, z0.h
+bfmopa ZA0.h, p7/m, p0, z0.h, z0.h
+bfmopa ZA0.h, p0/m, p7/m, z0.s, z0.s
+bfmopa ZA0.h, p0/m, p0/m, z31.d, z0.d
+bfmopa ZA2.h, p0/m, p8/m, z0.s, z31.b
+bfmopa ZA4.h, p15/m, p11/m, z0.s, z31.b
+
+/* BFMOPS.  */
+bfmops ZA0.s, p0/m, p0/m, z0.h, z0.h
+bfmops ZA1.h, p0, p0/m, z0.h, z0.h
+bfmops ZA0.h, p7/m, p0, z0.h, z0.h
+bfmops ZA0.h, p0/m, p7/m, z0.s, z0.s
+bfmops ZA0.h, p0/m, p0/m, z31.d, z0.d
+bfmops ZA2.h, p0/m, p8/m, z0.s, z31.b
+bfmops ZA4.h, p15/m, p11/m, z0.s, z31.b
diff --git a/gas/testsuite/gas/aarch64/bfloat16-sme2-1.d b/gas/testsuite/gas/aarch64/bfloat16-sme2-1.d
new file mode 100644
index 00000000000..8cbe4806842
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/bfloat16-sme2-1.d
@@ -0,0 +1,122 @@ 
+#name: Test of SME2 non-widening BFloat16 instructions.
+#as: -march=armv9.4-a+sme-b16b16
+#objdump: -dr
+
+[^:]+:     file format .*
+
+
+[^:]+:
+
+[^:]+:
+.*:	c1e41c00 	bfadd	za.h\[w8, 0, vgx2\], {z0.h-z1.h}
+.*:	c1e47c00 	bfadd	za.h\[w11, 0, vgx2\], {z0.h-z1.h}
+.*:	c1e41c07 	bfadd	za.h\[w8, 7, vgx2\], {z0.h-z1.h}
+.*:	c1e41fc0 	bfadd	za.h\[w8, 0, vgx2\], {z30.h-z31.h}
+.*:	c1e41fc3 	bfadd	za.h\[w8, 3, vgx2\], {z30.h-z31.h}
+.*:	c1e51c00 	bfadd	za.h\[w8, 0, vgx4\], {z0.h-z3.h}
+.*:	c1e57c00 	bfadd	za.h\[w11, 0, vgx4\], {z0.h-z3.h}
+.*:	c1e51c07 	bfadd	za.h\[w8, 7, vgx4\], {z0.h-z3.h}
+.*:	c1e51f80 	bfadd	za.h\[w8, 0, vgx4\], {z28.h-z31.h}
+.*:	c1e51f83 	bfadd	za.h\[w8, 3, vgx4\], {z28.h-z31.h}
+.*:	c1e41c08 	bfsub	za.h\[w8, 0, vgx2\], {z0.h-z1.h}
+.*:	c1e47c08 	bfsub	za.h\[w11, 0, vgx2\], {z0.h-z1.h}
+.*:	c1e41c0f 	bfsub	za.h\[w8, 7, vgx2\], {z0.h-z1.h}
+.*:	c1e41fc8 	bfsub	za.h\[w8, 0, vgx2\], {z30.h-z31.h}
+.*:	c1e41fcb 	bfsub	za.h\[w8, 3, vgx2\], {z30.h-z31.h}
+.*:	c1e51c08 	bfsub	za.h\[w8, 0, vgx4\], {z0.h-z3.h}
+.*:	c1e57c08 	bfsub	za.h\[w11, 0, vgx4\], {z0.h-z3.h}
+.*:	c1e51c0f 	bfsub	za.h\[w8, 7, vgx4\], {z0.h-z3.h}
+.*:	c1e51f88 	bfsub	za.h\[w8, 0, vgx4\], {z28.h-z31.h}
+.*:	c1e51f8b 	bfsub	za.h\[w8, 3, vgx4\], {z28.h-z31.h}
+.*:	c1101020 	bfmla	za.h\[w8, 0, vgx2\], {z0.h-z1.h}, z0.h\[0\]
+.*:	c1107020 	bfmla	za.h\[w11, 0, vgx2\], {z0.h-z1.h}, z0.h\[0\]
+.*:	c1101027 	bfmla	za.h\[w8, 7, vgx2\], {z0.h-z1.h}, z0.h\[0\]
+.*:	c11013e0 	bfmla	za.h\[w8, 0, vgx2\], {z30.h-z31.h}, z0.h\[0\]
+.*:	c11f1020 	bfmla	za.h\[w8, 0, vgx2\], {z0.h-z1.h}, z15.h\[0\]
+.*:	c1101c28 	bfmla	za.h\[w8, 0, vgx2\], {z0.h-z1.h}, z0.h\[7\]
+.*:	c1101c2b 	bfmla	za.h\[w8, 3, vgx2\], {z0.h-z1.h}, z0.h\[7\]
+.*:	c1109020 	bfmla	za.h\[w8, 0, vgx4\], {z0.h-z3.h}, z0.h\[0\]
+.*:	c110f020 	bfmla	za.h\[w11, 0, vgx4\], {z0.h-z3.h}, z0.h\[0\]
+.*:	c1109027 	bfmla	za.h\[w8, 7, vgx4\], {z0.h-z3.h}, z0.h\[0\]
+.*:	c11093a0 	bfmla	za.h\[w8, 0, vgx4\], {z28.h-z31.h}, z0.h\[0\]
+.*:	c11f9020 	bfmla	za.h\[w8, 0, vgx4\], {z0.h-z3.h}, z15.h\[0\]
+.*:	c1109c28 	bfmla	za.h\[w8, 0, vgx4\], {z0.h-z3.h}, z0.h\[7\]
+.*:	c1109c2b 	bfmla	za.h\[w8, 3, vgx4\], {z0.h-z3.h}, z0.h\[7\]
+.*:	c1601c00 	bfmla	za.h\[w8, 0, vgx2\], {z0.h-z1.h}, z0.h
+.*:	c1607c00 	bfmla	za.h\[w11, 0, vgx2\], {z0.h-z1.h}, z0.h
+.*:	c1601c07 	bfmla	za.h\[w8, 7, vgx2\], {z0.h-z1.h}, z0.h
+.*:	c1601fe0 	bfmla	za.h\[w8, 0, vgx2\], {z31.h-z0.h}, z0.h
+.*:	c16f1c00 	bfmla	za.h\[w8, 0, vgx2\], {z0.h-z1.h}, z15.h
+.*:	c16f1c03 	bfmla	za.h\[w8, 3, vgx2\], {z0.h-z1.h}, z15.h
+.*:	c1701c00 	bfmla	za.h\[w8, 0, vgx4\], {z0.h-z3.h}, z0.h
+.*:	c1707c00 	bfmla	za.h\[w11, 0, vgx4\], {z0.h-z3.h}, z0.h
+.*:	c1701c07 	bfmla	za.h\[w8, 7, vgx4\], {z0.h-z3.h}, z0.h
+.*:	c1701fe0 	bfmla	za.h\[w8, 0, vgx4\], {z31.h-z2.h}, z0.h
+.*:	c17f1c00 	bfmla	za.h\[w8, 0, vgx4\], {z0.h-z3.h}, z15.h
+.*:	c17f1c03 	bfmla	za.h\[w8, 3, vgx4\], {z0.h-z3.h}, z15.h
+.*:	c1e01008 	bfmla	za.h\[w8, 0, vgx2\], {z0.h-z1.h}, {z0.h-z1.h}
+.*:	c1e07008 	bfmla	za.h\[w11, 0, vgx2\], {z0.h-z1.h}, {z0.h-z1.h}
+.*:	c1e0100f 	bfmla	za.h\[w8, 7, vgx2\], {z0.h-z1.h}, {z0.h-z1.h}
+.*:	c1e013c8 	bfmla	za.h\[w8, 0, vgx2\], {z30.h-z31.h}, {z0.h-z1.h}
+.*:	c1fe1008 	bfmla	za.h\[w8, 0, vgx2\], {z0.h-z1.h}, {z30.h-z31.h}
+.*:	c1fe100b 	bfmla	za.h\[w8, 3, vgx2\], {z0.h-z1.h}, {z30.h-z31.h}
+.*:	c1e11008 	bfmla	za.h\[w8, 0, vgx4\], {z0.h-z3.h}, {z0.h-z3.h}
+.*:	c1e17008 	bfmla	za.h\[w11, 0, vgx4\], {z0.h-z3.h}, {z0.h-z3.h}
+.*:	c1e1100f 	bfmla	za.h\[w8, 7, vgx4\], {z0.h-z3.h}, {z0.h-z3.h}
+.*:	c1e11388 	bfmla	za.h\[w8, 0, vgx4\], {z28.h-z31.h}, {z0.h-z3.h}
+.*:	c1fd1008 	bfmla	za.h\[w8, 0, vgx4\], {z0.h-z3.h}, {z28.h-z31.h}
+.*:	c1fd100b 	bfmla	za.h\[w8, 3, vgx4\], {z0.h-z3.h}, {z28.h-z31.h}
+.*:	c1101030 	bfmls	za.h\[w8, 0, vgx2\], {z0.h-z1.h}, z0.h\[0\]
+.*:	c1107030 	bfmls	za.h\[w11, 0, vgx2\], {z0.h-z1.h}, z0.h\[0\]
+.*:	c1101037 	bfmls	za.h\[w8, 7, vgx2\], {z0.h-z1.h}, z0.h\[0\]
+.*:	c11013f0 	bfmls	za.h\[w8, 0, vgx2\], {z30.h-z31.h}, z0.h\[0\]
+.*:	c11f1030 	bfmls	za.h\[w8, 0, vgx2\], {z0.h-z1.h}, z15.h\[0\]
+.*:	c1101c38 	bfmls	za.h\[w8, 0, vgx2\], {z0.h-z1.h}, z0.h\[7\]
+.*:	c1101c3b 	bfmls	za.h\[w8, 3, vgx2\], {z0.h-z1.h}, z0.h\[7\]
+.*:	c1109030 	bfmls	za.h\[w8, 0, vgx4\], {z0.h-z3.h}, z0.h\[0\]
+.*:	c110f030 	bfmls	za.h\[w11, 0, vgx4\], {z0.h-z3.h}, z0.h\[0\]
+.*:	c1109037 	bfmls	za.h\[w8, 7, vgx4\], {z0.h-z3.h}, z0.h\[0\]
+.*:	c11093b0 	bfmls	za.h\[w8, 0, vgx4\], {z28.h-z31.h}, z0.h\[0\]
+.*:	c11f9030 	bfmls	za.h\[w8, 0, vgx4\], {z0.h-z3.h}, z15.h\[0\]
+.*:	c1109c38 	bfmls	za.h\[w8, 0, vgx4\], {z0.h-z3.h}, z0.h\[7\]
+.*:	c1109c3b 	bfmls	za.h\[w8, 3, vgx4\], {z0.h-z3.h}, z0.h\[7\]
+.*:	c1601c08 	bfmls	za.h\[w8, 0, vgx2\], {z0.h-z1.h}, z0.h
+.*:	c1607c08 	bfmls	za.h\[w11, 0, vgx2\], {z0.h-z1.h}, z0.h
+.*:	c1601c0f 	bfmls	za.h\[w8, 7, vgx2\], {z0.h-z1.h}, z0.h
+.*:	c1601fe8 	bfmls	za.h\[w8, 0, vgx2\], {z31.h-z0.h}, z0.h
+.*:	c16f1c08 	bfmls	za.h\[w8, 0, vgx2\], {z0.h-z1.h}, z15.h
+.*:	c16f1c0b 	bfmls	za.h\[w8, 3, vgx2\], {z0.h-z1.h}, z15.h
+.*:	c1701c08 	bfmls	za.h\[w8, 0, vgx4\], {z0.h-z3.h}, z0.h
+.*:	c1707c08 	bfmls	za.h\[w11, 0, vgx4\], {z0.h-z3.h}, z0.h
+.*:	c1701c0f 	bfmls	za.h\[w8, 7, vgx4\], {z0.h-z3.h}, z0.h
+.*:	c1701fe8 	bfmls	za.h\[w8, 0, vgx4\], {z31.h-z2.h}, z0.h
+.*:	c17f1c08 	bfmls	za.h\[w8, 0, vgx4\], {z0.h-z3.h}, z15.h
+.*:	c17f1c0b 	bfmls	za.h\[w8, 3, vgx4\], {z0.h-z3.h}, z15.h
+.*:	c1e01018 	bfmls	za.h\[w8, 0, vgx2\], {z0.h-z1.h}, {z0.h-z1.h}
+.*:	c1e07018 	bfmls	za.h\[w11, 0, vgx2\], {z0.h-z1.h}, {z0.h-z1.h}
+.*:	c1e0101f 	bfmls	za.h\[w8, 7, vgx2\], {z0.h-z1.h}, {z0.h-z1.h}
+.*:	c1e013d8 	bfmls	za.h\[w8, 0, vgx2\], {z30.h-z31.h}, {z0.h-z1.h}
+.*:	c1fe1018 	bfmls	za.h\[w8, 0, vgx2\], {z0.h-z1.h}, {z30.h-z31.h}
+.*:	c1fe101b 	bfmls	za.h\[w8, 3, vgx2\], {z0.h-z1.h}, {z30.h-z31.h}
+.*:	c1e11018 	bfmls	za.h\[w8, 0, vgx4\], {z0.h-z3.h}, {z0.h-z3.h}
+.*:	c1e17018 	bfmls	za.h\[w11, 0, vgx4\], {z0.h-z3.h}, {z0.h-z3.h}
+.*:	c1e1101f 	bfmls	za.h\[w8, 7, vgx4\], {z0.h-z3.h}, {z0.h-z3.h}
+.*:	c1e11398 	bfmls	za.h\[w8, 0, vgx4\], {z28.h-z31.h}, {z0.h-z3.h}
+.*:	c1fd1018 	bfmls	za.h\[w8, 0, vgx4\], {z0.h-z3.h}, {z28.h-z31.h}
+.*:	c1fd101b 	bfmls	za.h\[w8, 3, vgx4\], {z0.h-z3.h}, {z28.h-z31.h}
+.*:	81a00008 	bfmopa	za0.h, p0/m, p0/m, z0.h, z0.h
+.*:	81a00009 	bfmopa	za1.h, p0/m, p0/m, z0.h, z0.h
+.*:	81a01c08 	bfmopa	za0.h, p7/m, p0/m, z0.h, z0.h
+.*:	81a0e008 	bfmopa	za0.h, p0/m, p7/m, z0.h, z0.h
+.*:	81a003e8 	bfmopa	za0.h, p0/m, p0/m, z31.h, z0.h
+.*:	81bf0008 	bfmopa	za0.h, p0/m, p0/m, z0.h, z31.h
+.*:	81afad48 	bfmopa	za0.h, p3/m, p5/m, z10.h, z15.h
+.*:	81b965e9 	bfmopa	za1.h, p1/m, p3/m, z15.h, z25.h
+.*:	81a00018 	bfmops	za0.h, p0/m, p0/m, z0.h, z0.h
+.*:	81a00019 	bfmops	za1.h, p0/m, p0/m, z0.h, z0.h
+.*:	81a01c18 	bfmops	za0.h, p7/m, p0/m, z0.h, z0.h
+.*:	81a0e018 	bfmops	za0.h, p0/m, p7/m, z0.h, z0.h
+.*:	81a003f8 	bfmops	za0.h, p0/m, p0/m, z31.h, z0.h
+.*:	81bf0018 	bfmops	za0.h, p0/m, p0/m, z0.h, z31.h
+.*:	81afad58 	bfmops	za0.h, p3/m, p5/m, z10.h, z15.h
+.*:	81b965f9 	bfmops	za1.h, p1/m, p3/m, z15.h, z25.h
diff --git a/gas/testsuite/gas/aarch64/bfloat16-sme2-1.s b/gas/testsuite/gas/aarch64/bfloat16-sme2-1.s
new file mode 100644
index 00000000000..e4aefdccda7
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/bfloat16-sme2-1.s
@@ -0,0 +1,139 @@ 
+/* BFADD.  */
+bfadd    za.h[w8, 0, vgx2], {z0.h - z1.h}
+bfadd    za.h[w11, 0, vgx2], {z0.h - z1.h}
+bfadd    za.h[w8, 7, vgx2], {z0.h - z1.h}
+bfadd    za.h[w8, 0, vgx2], {z30.h - z31.h}
+bfadd    za.h[w8, 3], {z30.h - z31.h}
+
+bfadd    za.h[w8, 0, vgx4], {z0.h - z3.h}
+bfadd    za.h[w11, 0, vgx4], {z0.h - z3.h}
+bfadd    za.h[w8, 7, vgx4], {z0.h - z3.h}
+bfadd    za.h[w8, 0, vgx4], {z28.h - z31.h}
+bfadd    za.h[w8, 3], {z28.h - z31.h}
+
+/* BFSUB.  */
+bfsub    za.h[w8, 0, vgx2], {z0.h - z1.h}
+bfsub    za.h[w11, 0, vgx2], {z0.h - z1.h}
+bfsub    za.h[w8, 7, vgx2], {z0.h - z1.h}
+bfsub    za.h[w8, 0, vgx2], {z30.h - z31.h}
+bfsub    za.h[w8, 3], {z30.h - z31.h}
+
+bfsub    za.h[w8, 0, vgx4], {z0.h - z3.h}
+bfsub    za.h[w11, 0, vgx4], {z0.h - z3.h}
+bfsub    za.h[w8, 7, vgx4], {z0.h - z3.h}
+bfsub    za.h[w8, 0, vgx4], {z28.h - z31.h}
+bfsub    za.h[w8, 3], {z28.h - z31.h}
+
+/* BFMLA (multiple and indexed vector).  */
+bfmla    za.h[w8, 0, vgx2], {z0.h - z1.h}, z0.h[0]
+bfmla    za.h[w11, 0, vgx2], {z0.h - z1.h}, z0.h[0]
+bfmla    za.h[w8, 7, vgx2], {z0.h - z1.h}, z0.h[0]
+bfmla    za.h[w8, 0, vgx2], {z30.h - z31.h}, z0.h[0]
+bfmla    za.h[w8, 0, vgx2], {z0.h - z1.h}, z15.h[0]
+bfmla    za.h[w8, 0, vgx2], {z0.h - z1.h}, z0.h[7]
+bfmla    za.h[w8, 3], {z0.h - z1.h}, z0.h[7]
+
+bfmla    za.h[w8, 0, vgx4], {z0.h - z3.h}, z0.h[0]
+bfmla    za.h[w11, 0, vgx4], {z0.h - z3.h}, z0.h[0]
+bfmla    za.h[w8, 7, vgx4], {z0.h - z3.h}, z0.h[0]
+bfmla    za.h[w8, 0, vgx4], {z28.h - z31.h}, z0.h[0]
+bfmla    za.h[w8, 0, vgx4], {z0.h - z3.h}, z15.h[0]
+bfmla    za.h[w8, 0, vgx4], {z0.h - z3.h}, z0.h[7]
+bfmla    za.h[w8, 3], {z0.h - z3.h}, z0.h[7]
+
+/* BFMLA (multiple and single vector).  */
+bfmla    za.h[w8, 0, vgx2], {z0.h - z1.h}, z0.h
+bfmla    za.h[w11, 0, vgx2], {z0.h - z1.h}, z0.h
+bfmla    za.h[w8, 7, vgx2], {z0.h - z1.h}, z0.h
+bfmla    za.h[w8, 0, vgx2], {z31.h - z0.h}, z0.h
+bfmla    za.h[w8, 0, vgx2], {z0.h - z1.h}, z15.h
+bfmla    za.h[w8, 3], {z0.h - z1.h}, z15.h
+
+bfmla    za.h[w8, 0, vgx4], {z0.h - z3.h}, z0.h
+bfmla    za.h[w11, 0, vgx4], {z0.h - z3.h}, z0.h
+bfmla    za.h[w8, 7, vgx4], {z0.h - z3.h}, z0.h
+bfmla    za.h[w8, 0, vgx4], {z31.h - z2.h}, z0.h
+bfmla    za.h[w8, 0, vgx4], {z0.h - z3.h}, z15.h
+bfmla    za.h[w8, 3], {z0.h - z3.h}, z15.h
+
+/* BFMLA (multiple vectors).  */
+bfmla    za.h[w8, 0, vgx2], {z0.h - z1.h}, {z0.h - z1.h}
+bfmla    za.h[w11, 0, vgx2], {z0.h - z1.h}, {z0.h - z1.h}
+bfmla    za.h[w8, 7, vgx2], {z0.h - z1.h}, {z0.h - z1.h}
+bfmla    za.h[w8, 0, vgx2], {z30.h - z31.h}, {z0.h - z1.h}
+bfmla    za.h[w8, 0, vgx2], {z0.h - z1.h}, {z30.h - z31.h}
+bfmla    za.h[w8, 3], {z0.h - z1.h}, {z30.h - z31.h}
+
+bfmla    za.h[w8, 0, vgx4], {z0.h - z3.h}, {z0.h - z3.h}
+bfmla    za.h[w11, 0, vgx4], {z0.h - z3.h}, {z0.h - z3.h}
+bfmla    za.h[w8, 7, vgx4], {z0.h - z3.h}, {z0.h - z3.h}
+bfmla    za.h[w8, 0, vgx4], {z28.h - z31.h}, {z0.h - z3.h}
+bfmla    za.h[w8, 0, vgx4], {z0.h - z3.h}, {z28.h - z31.h}
+bfmla    za.h[w8, 3], {z0.h - z3.h}, {z28.h - z31.h}
+
+/* BFMLS (multiple and indexed vector).  */
+bfmls    za.h[w8, 0, vgx2], {z0.h - z1.h}, z0.h[0]
+bfmls    za.h[w11, 0, vgx2], {z0.h - z1.h}, z0.h[0]
+bfmls    za.h[w8, 7, vgx2], {z0.h - z1.h}, z0.h[0]
+bfmls    za.h[w8, 0, vgx2], {z30.h - z31.h}, z0.h[0]
+bfmls    za.h[w8, 0, vgx2], {z0.h - z1.h}, z15.h[0]
+bfmls    za.h[w8, 0, vgx2], {z0.h - z1.h}, z0.h[7]
+bfmls    za.h[w8, 3], {z0.h - z1.h}, z0.h[7]
+
+bfmls    za.h[w8, 0, vgx4], {z0.h - z3.h}, z0.h[0]
+bfmls    za.h[w11, 0, vgx4], {z0.h - z3.h}, z0.h[0]
+bfmls    za.h[w8, 7, vgx4], {z0.h - z3.h}, z0.h[0]
+bfmls    za.h[w8, 0, vgx4], {z28.h - z31.h}, z0.h[0]
+bfmls    za.h[w8, 0, vgx4], {z0.h - z3.h}, z15.h[0]
+bfmls    za.h[w8, 0, vgx4], {z0.h - z3.h}, z0.h[7]
+bfmls    za.h[w8, 3], {z0.h - z3.h}, z0.h[7]
+
+/* BFMLS (multiple and single vector).  */
+bfmls    za.h[w8, 0, vgx2], {z0.h - z1.h}, z0.h
+bfmls    za.h[w11, 0, vgx2], {z0.h - z1.h}, z0.h
+bfmls    za.h[w8, 7, vgx2], {z0.h - z1.h}, z0.h
+bfmls    za.h[w8, 0, vgx2], {z31.h - z0.h}, z0.h
+bfmls    za.h[w8, 0, vgx2], {z0.h - z1.h}, z15.h
+bfmls    za.h[w8, 3], {z0.h - z1.h}, z15.h
+
+bfmls    za.h[w8, 0, vgx4], {z0.h - z3.h}, z0.h
+bfmls    za.h[w11, 0, vgx4], {z0.h - z3.h}, z0.h
+bfmls    za.h[w8, 7, vgx4], {z0.h - z3.h}, z0.h
+bfmls    za.h[w8, 0, vgx4], {z31.h - z2.h}, z0.h
+bfmls    za.h[w8, 0, vgx4], {z0.h - z3.h}, z15.h
+bfmls    za.h[w8, 3], {z0.h - z3.h}, z15.h
+
+/* BFMLS (multiple vectors).  */
+bfmls    za.h[w8, 0, vgx2], {z0.h - z1.h}, {z0.h - z1.h}
+bfmls    za.h[w11, 0, vgx2], {z0.h - z1.h}, {z0.h - z1.h}
+bfmls    za.h[w8, 7, vgx2], {z0.h - z1.h}, {z0.h - z1.h}
+bfmls    za.h[w8, 0, vgx2], {z30.h - z31.h}, {z0.h - z1.h}
+bfmls    za.h[w8, 0, vgx2], {z0.h - z1.h}, {z30.h - z31.h}
+bfmls    za.h[w8, 3], {z0.h - z1.h}, {z30.h - z31.h}
+
+bfmls    za.h[w8, 0, vgx4], {z0.h - z3.h}, {z0.h - z3.h}
+bfmls    za.h[w11, 0, vgx4], {z0.h - z3.h}, {z0.h - z3.h}
+bfmls    za.h[w8, 7, vgx4], {z0.h - z3.h}, {z0.h - z3.h}
+bfmls    za.h[w8, 0, vgx4], {z28.h - z31.h}, {z0.h - z3.h}
+bfmls    za.h[w8, 0, vgx4], {z0.h - z3.h}, {z28.h - z31.h}
+bfmls    za.h[w8, 3], {z0.h - z3.h}, {z28.h - z31.h}
+
+/* BFMOPA.  */
+bfmopa ZA0.h, p0/m, p0/m, z0.h, z0.h
+bfmopa ZA1.h, p0/m, p0/m, z0.h, z0.h
+bfmopa ZA0.h, p7/m, p0/m, z0.h, z0.h
+bfmopa ZA0.h, p0/m, p7/m, z0.h, z0.h
+bfmopa ZA0.h, p0/m, p0/m, z31.h, z0.h
+bfmopa ZA0.h, p0/m, p0/m, z0.h, z31.h
+bfmopa ZA0.h, p3/m, p5/m, z10.h, z15.h
+bfmopa ZA1.h, p1/m, p3/m, z15.h, z25.h
+
+/* BFMOPS.  */
+bfmops ZA0.h, p0/m, p0/m, z0.h, z0.h
+bfmops ZA1.h, p0/m, p0/m, z0.h, z0.h
+bfmops ZA0.h, p7/m, p0/m, z0.h, z0.h
+bfmops ZA0.h, p0/m, p7/m, z0.h, z0.h
+bfmops ZA0.h, p0/m, p0/m, z31.h, z0.h
+bfmops ZA0.h, p0/m, p0/m, z0.h, z31.h
+bfmops ZA0.h, p3/m, p5/m, z10.h, z15.h
+bfmops ZA1.h, p1/m, p3/m, z15.h, z25.h
diff --git a/include/opcode/aarch64.h b/include/opcode/aarch64.h
index 5f43a235dd1..9c9cb60637e 100644
--- a/include/opcode/aarch64.h
+++ b/include/opcode/aarch64.h
@@ -264,6 +264,8 @@  enum aarch64_feature_bit {
   AARCH64_FEATURE_SME_F8F16,
   /* SVE2 non-widening BFloat16 instructions.  */
   AARCH64_FEATURE_SVE_B16B16,
+  /* SME2 non-widening BFloat16 instructions.  */
+  AARCH64_FEATURE_SME_B16B16,
 
   /* Virtual features.  These are used to gate instructions that are enabled
      by either of two (or more) sets of command line flags.  */
diff --git a/opcodes/aarch64-tbl.h b/opcodes/aarch64-tbl.h
index 82816d5db38..8bc6c1887f3 100644
--- a/opcodes/aarch64-tbl.h
+++ b/opcodes/aarch64-tbl.h
@@ -1715,6 +1715,10 @@ 
 {                                                       \
   QLF5(S_H,P_M,P_M,S_B,S_B)                             \
 }
+#define OP_SVE_HMMHH                                   \
+{                                                      \
+  QLF5(S_H,P_M,P_M,S_H,S_H)                            \
+}
 #define OP_SVE_HMS                                      \
 {                                                       \
   QLF3(S_H,P_M,S_S),                                    \
@@ -2815,6 +2819,8 @@  static const aarch64_feature_set aarch64_feature_sve_sve2_b16b16 =
   AARCH64_FEATURES (3, SVE_B16B16, SVE2, SVE_SVE2_B16B16);
 static const aarch64_feature_set aarch64_feature_sve_sme2_b16b16 =
   AARCH64_FEATURES (3, SVE_B16B16, SME2, SVE_SME2_B16B16);
+static const aarch64_feature_set aarch64_feature_sme_b16b16 =
+  AARCH64_FEATURES (2, SME_B16B16, SME2);
 static const aarch64_feature_set aarch64_feature_sme2p1 =
   AARCH64_FEATURE (SME2p1);
 static const aarch64_feature_set aarch64_feature_sve2p1 =
@@ -2927,6 +2933,7 @@  static const aarch64_feature_set aarch64_feature_sme_f16f16_f8f16 =
 #define D128_THE  &aarch64_feature_d128_the
 #define SVE_SVE2_B16B16  &aarch64_feature_sve_sve2_b16b16
 #define SVE_SME2_B16B16  &aarch64_feature_sve_sme2_b16b16
+#define SME_B16B16  &aarch64_feature_sme_b16b16
 #define SME2p1  &aarch64_feature_sme2p1
 #define SVE2p1  &aarch64_feature_sve2p1
 #define RCPC3	  &aarch64_feature_rcpc3
@@ -3035,6 +3042,9 @@  static const aarch64_feature_set aarch64_feature_sme_f16f16_f8f16 =
 #define SVE_SME2_B16B16_INSNC(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS,CONSTRAINTS,TIED) \
   { NAME, OPCODE, MASK, CLASS, OP, SVE_SME2_B16B16, OPS, QUALS, \
     FLAGS | F_STRICT, CONSTRAINTS, TIED, NULL }
+#define SME_B16B16_INSN(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS,TIED) \
+  { NAME, OPCODE, MASK, CLASS, OP, SME_B16B16, OPS, QUALS, \
+    FLAGS | F_STRICT, 0, TIED, NULL }
 #define SVE2p1_INSN(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS,TIED) \
   { NAME, OPCODE, MASK, CLASS, OP, SVE2p1, OPS, QUALS, \
     FLAGS | F_STRICT, 0, TIED, NULL }
@@ -6672,6 +6682,26 @@  const struct aarch64_opcode aarch64_opcode_table[] =
   SVE_SME2_B16B16_INSN("bfclamp", 0xc120c000, 0xffe0fc01, sme_misc, 0, OP3 (SME_Zdnx2, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, F_OD(2), 0),
   SVE_SME2_B16B16_INSN("bfclamp", 0xc120c800, 0xffe0fc03, sme_misc, 0, OP3 (SME_Zdnx4, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, F_OD(4), 0),
 
+/* SME ZA-targeting non-widening BFloat16 instructions.  */
+  SME_B16B16_INSN("bfadd", 0xc1e41c00, 0xffff9c38, sme_misc, 0, OP2 (SME_ZA_array_off3_0, SME_Znx2), OP_SVE_HH, F_OD(2), 0),
+  SME_B16B16_INSN("bfadd", 0xc1e51c00, 0xffff9c78, sme_misc, 0, OP2 (SME_ZA_array_off3_0, SME_Znx4), OP_SVE_HH, F_OD(4), 0),
+  SME_B16B16_INSN("bfsub", 0xc1e41c08, 0xffff9c38, sme_misc, 0, OP2 (SME_ZA_array_off3_0, SME_Znx2), OP_SVE_HH, F_OD(2), 0),
+  SME_B16B16_INSN("bfsub", 0xc1e51c08, 0xffff9c78, sme_misc, 0, OP2 (SME_ZA_array_off3_0, SME_Znx4), OP_SVE_HH, F_OD(4), 0),
+  SME_B16B16_INSN("bfmla", 0xc1101020, 0xfff09030, sme_misc, 0, OP3 (SME_ZA_array_off3_0, SME_Znx2, SME_Zm_INDEX3_3), OP_SVE_HHH, F_OD(2), 0),
+  SME_B16B16_INSN("bfmla", 0xc1109020, 0xfff09070, sme_misc, 0, OP3 (SME_ZA_array_off3_0, SME_Znx4, SME_Zm_INDEX3_3), OP_SVE_HHH, F_OD(4), 0),
+  SME_B16B16_INSN("bfmla", 0xc1601c00, 0xfff09c18, sme_misc, 0, OP3 (SME_ZA_array_off3_0, SVE_ZnxN, SME_Zm), OP_SVE_HHH, F_OD(2), 0),
+  SME_B16B16_INSN("bfmla", 0xc1701c00, 0xfff09c18, sme_misc, 0, OP3 (SME_ZA_array_off3_0, SVE_ZnxN, SME_Zm), OP_SVE_HHH, F_OD(4), 0),
+  SME_B16B16_INSN("bfmla", 0xc1e01008, 0xffe19c38, sme_misc, 0, OP3 (SME_ZA_array_off3_0, SME_Znx2, SME_Zmx2), OP_SVE_HHH, F_OD(2), 0),
+  SME_B16B16_INSN("bfmla", 0xc1e11008, 0xffe39c78, sme_misc, 0, OP3 (SME_ZA_array_off3_0, SME_Znx4, SME_Zmx4), OP_SVE_HHH, F_OD(4), 0),
+  SME_B16B16_INSN("bfmls", 0xc1101030, 0xfff09030, sme_misc, 0, OP3 (SME_ZA_array_off3_0, SME_Znx2, SME_Zm_INDEX3_3), OP_SVE_HHH, F_OD(2), 0),
+  SME_B16B16_INSN("bfmls", 0xc1109030, 0xfff09070, sme_misc, 0, OP3 (SME_ZA_array_off3_0, SME_Znx4, SME_Zm_INDEX3_3), OP_SVE_HHH, F_OD(4), 0),
+  SME_B16B16_INSN("bfmls", 0xc1601c08, 0xfff09c18, sme_misc, 0, OP3 (SME_ZA_array_off3_0, SVE_ZnxN, SME_Zm), OP_SVE_HHH, F_OD(2), 0),
+  SME_B16B16_INSN("bfmls", 0xc1701c08, 0xfff09c18, sme_misc, 0, OP3 (SME_ZA_array_off3_0, SVE_ZnxN, SME_Zm), OP_SVE_HHH, F_OD(4), 0),
+  SME_B16B16_INSN("bfmls", 0xc1e01018, 0xffe19c38, sme_misc, 0, OP3 (SME_ZA_array_off3_0, SME_Znx2, SME_Zmx2), OP_SVE_HHH, F_OD(2), 0),
+  SME_B16B16_INSN("bfmls", 0xc1e11018, 0xffe39c78, sme_misc, 0, OP3 (SME_ZA_array_off3_0, SME_Znx4, SME_Zmx4), OP_SVE_HHH, F_OD(4), 0),
+  SME_B16B16_INSN("bfmopa", 0x81a00008, 0xffe0001e, sme_misc, 0, OP5 (SME_ZAda_1b, SVE_Pg3, SME_Pm, SVE_Zn, SVE_Zm_16), OP_SVE_HMMHH, 0, 0),
+  SME_B16B16_INSN("bfmops", 0x81a00018, 0xffe0001e, sme_misc, 0, OP5 (SME_ZAda_1b, SVE_Pg3, SME_Pm, SVE_Zn, SVE_Zm_16), OP_SVE_HMMHH, 0, 0),
+
 /* SME2.1 movaz instructions.  */
   SME2p1_INSN ("movaz", 0xc0060600, 0xffff1f83, sme2_movaz, 0, OP2 (SME_Zdnx4, SME_ZA_array_vrsb_2), OP_SVE_BB, 0, 0),
   SME2p1_INSN ("movaz", 0xc0460600, 0xffff1f83, sme2_movaz, 0, OP2 (SME_Zdnx4, SME_ZA_array_vrsh_2), OP_SVE_HH, 0, 0),