[v2,2/5] aarch64: Add support for FEAT_SVE_B16B16 min and max instructions.

Message ID 20240719121438.2929286-3-srinath.parvathaneni@arm.com
State Superseded
Headers
Series aarch64: Add support for sme2 and sve2 BFloat16 feature. |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 fail Test failed
linaro-tcwg-bot/tcwg_binutils_check--master-arm success Test passed

Commit Message

Srinath Parvathaneni July 19, 2024, 12:14 p.m. UTC
  This patch adds support for SME Z-targeting multi-vector non-widening
BFloat16 instructions, under tick FEAT_SVE_B16B16 and command line flag
"+sme-b16b16+sme2".

The spec for this feature and instructions is availabe here [1]:
[1]: https://developer.arm.com/documentation/ddi0602/2024-06/SME-Instructions?lang=en
---
 gas/config/tc-aarch64.c                       |   2 +
 .../gas/aarch64/bfloat16-2-invalid.l          |   4 +-
 .../gas/aarch64/bfloat16-sme2-2-bad.d         |   4 +
 .../gas/aarch64/bfloat16-sme2-2-bad.l         | 159 ++++++++++++++++++
 .../gas/aarch64/bfloat16-sme2-2-bad.s         | 136 +++++++++++++++
 gas/testsuite/gas/aarch64/bfloat16-sme2-2.d   | 122 ++++++++++++++
 gas/testsuite/gas/aarch64/bfloat16-sme2-2.s   | 138 +++++++++++++++
 include/opcode/aarch64.h                      |   2 +
 opcodes/aarch64-tbl.h                         |  29 ++++
 9 files changed, 594 insertions(+), 2 deletions(-)
 create mode 100644 gas/testsuite/gas/aarch64/bfloat16-sme2-2-bad.d
 create mode 100644 gas/testsuite/gas/aarch64/bfloat16-sme2-2-bad.l
 create mode 100644 gas/testsuite/gas/aarch64/bfloat16-sme2-2-bad.s
 create mode 100644 gas/testsuite/gas/aarch64/bfloat16-sme2-2.d
 create mode 100644 gas/testsuite/gas/aarch64/bfloat16-sme2-2.s
  

Patch

diff --git a/gas/config/tc-aarch64.c b/gas/config/tc-aarch64.c
index 4249374cf64..0699bd0eaed 100644
--- a/gas/config/tc-aarch64.c
+++ b/gas/config/tc-aarch64.c
@@ -10765,6 +10765,8 @@  static const struct aarch64_virtual_dependency_table aarch64_dependencies[] = {
   /* TODO: Add SME_F16F16->SME_F16F16_F8F16 when SME_F16F16 is added.  */
   {AARCH64_FEATURE (SME_F8F16), AARCH64_FEATURE (SME_F16F16_F8F16)},
   {AARCH64_FEATURES (2, SVE_B16B16, SVE2), AARCH64_FEATURE (SVE_SVE2_B16B16)},
+  {AARCH64_FEATURES (2, SVE_B16B16, SME2), AARCH64_FEATURES (2, SVE_SME2_B16B16,
+							     SVE_SVE2_B16B16)},
 };
 
 static aarch64_feature_set
diff --git a/gas/testsuite/gas/aarch64/bfloat16-2-invalid.l b/gas/testsuite/gas/aarch64/bfloat16-2-invalid.l
index 5da96c72ae5..7742e9d4865 100644
--- a/gas/testsuite/gas/aarch64/bfloat16-2-invalid.l
+++ b/gas/testsuite/gas/aarch64/bfloat16-2-invalid.l
@@ -183,8 +183,8 @@ 
 .*: Error: operand mismatch -- `bfclamp z31.b,z31.s,z31.d'
 .*: Info:    did you mean this\?
 .*: Info:    	bfclamp z31.h, z31.h, z31.h
-.*: Error: expected an SVE vector register at operand 1 -- `bfclamp {z0.h},z0.h,z0.h'
-.*: Error: expected an SVE vector register at operand 1 -- `bfclamp {z0.h-z0.h},z0.h'
+.*: Error: expected a list of 2 or 4 registers at operand 1 -- `bfclamp {z0.h},z0.h,z0.h'
+.*: Error: invalid range in vector register list at operand 1 -- `bfclamp {z0.h-z0.h},z0.h'
 .*: Error: comma expected between operands at operand 3 -- `bfclamp z0.h,z0.h'
 .*: Error: operand mismatch -- `bfmla z0.b,z0.h,z0.h\[0\]'
 .*: Info:    did you mean this\?
diff --git a/gas/testsuite/gas/aarch64/bfloat16-sme2-2-bad.d b/gas/testsuite/gas/aarch64/bfloat16-sme2-2-bad.d
new file mode 100644
index 00000000000..0ec94674a4a
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/bfloat16-sme2-2-bad.d
@@ -0,0 +1,4 @@ 
+#name: Test of invalid SME2 non-widening BFloat16 min max instructions.
+#as: -march=armv9.4-a+sve-b16b16+sme2
+#source: bfloat16-sme2-2-bad.s
+#error_output: bfloat16-sme2-2-bad.l
diff --git a/gas/testsuite/gas/aarch64/bfloat16-sme2-2-bad.l b/gas/testsuite/gas/aarch64/bfloat16-sme2-2-bad.l
new file mode 100644
index 00000000000..657f87d002a
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/bfloat16-sme2-2-bad.l
@@ -0,0 +1,159 @@ 
+.*: Assembler messages:
+.*: Error: too many registers in vector register list at operand 2 -- `bfmax {z30.h-z31.h},{z0.h-z31.h},z0.h'
+.*: Error: z0-z15 expected at operand 3 -- `bfmax {z0.h-z1.h},{z0.h-z1.h},z16.h'
+.*: Error: operand mismatch -- `bfmax {z14.s-z15.s},{z14.s-z15.s},z1.h'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmax {z14.h-z15.h}, {z14.h-z15.h}, z1.h
+.*: Error: too many registers in vector register list at operand 1 -- `bfmax {z2.h-z7.h},{z30.h-z7.h},z3.d'
+.*: Error: too many registers in vector register list at operand 1 -- `bfmax {z4.h-z3.h},{z4.h-z3.h},z7.b'
+.*: Error: operand mismatch -- `bfmax {z28.h-z31.h},{z28.s-z31.h},z0.h'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmax {z28.h-z31.h}, {z28.h-z31.h}, z0.h
+.*: Error: z0-z15 expected at operand 3 -- `bfmax {z0.h-z3.h},{z0.h-z3.h},z16.h'
+.*: Error: too many registers in vector register list at operand 1 -- `bfmax {z10.h-z15.h},{z10.h-z15.h},z1.h'
+.*: Error: too many registers in vector register list at operand 1 -- `bfmax {z14.h-z7.h},{z4.h-z7.h},z3.s'
+.*: Error: too many registers in vector register list at operand 1 -- `bfmax {z4.h-z17.h},{z1.h-z7.h},z17.b'
+.*: Error: operand mismatch -- `bfmax {z0.s-z1.h},{z0.h-z1.h},{z0.h-z1.h}'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmax {z0.h-z1.h}, {z0.h-z1.h}, {z0.h-z1.h}
+.*: Error: invalid range in vector register list at operand 2 -- `bfmax {z30.h-z31.h},{z31.s-z31.h},{z0.h-z1.h}'
+.*: Error: operand mismatch -- `bfmax {z0.h-z1.h},{z0.h-z1.h},{z30.b-z31.b}'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmax {z0.h-z1.h}, {z0.h-z1.h}, {z30.h-z31.h}
+.*: Error: too many registers in vector register list at operand 3 -- `bfmax {z14.h-z15.h},{z14.h-z15.h},{z14.s-z10.s}'
+.*: Error: operand 2 must be the same register as operand 1 -- `bfmax {z6.h-z7.h},{z16.h-z17.h},{z30.h-z31.h}'
+.*: Error: comma expected between operands at operand 3 -- `bfmax {z2.h-z3.h},{z2.h-z3.h}'
+.*: Error: too many registers in vector register list at operand 1 -- `bfmax {z20.h-z31.h},{z0.h-z3.h},{z0.h-z3.h}'
+.*: Error: too many registers in vector register list at operand 2 -- `bfmax {z28.h-z31.h},{z21.s-z31.s},{z0.h-z3.h}'
+.*: Error: operand mismatch -- `bfmax {z0.h-z3.h},{z0.h-z3.h},{z28.b-z31.b}'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmax {z0.h-z3.h}, {z0.h-z3.h}, {z28.h-z31.h}
+.*: Error: too many registers in vector register list at operand 3 -- `bfmax {z12.h-z15.h},{z12.h-z15.h},{z10.h-z25.h}'
+.*: Error: too many registers in vector register list at operand 1 -- `bfmax {z4.h-z17.h},{z4.h-z7.h},{z14.h-z17.h}'
+.*: Error: too many registers in vector register list at operand 1 -- `bfmax {z14.h-z7.h},{z4.s-z7.b},{z10.h-z3.h}'
+.*: Error: too many registers in vector register list at operand 2 -- `bfmaxnm {z30.h-z31.h},{z0.h-z31.h},z0.h'
+.*: Error: z0-z15 expected at operand 3 -- `bfmaxnm {z0.h-z1.h},{z0.h-z1.h},z16.h'
+.*: Error: operand mismatch -- `bfmaxnm {z14.s-z15.s},{z14.s-z15.s},z1.h'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmaxnm {z14.h-z15.h}, {z14.h-z15.h}, z1.h
+.*: Error: too many registers in vector register list at operand 1 -- `bfmaxnm {z2.h-z7.h},{z30.h-z7.h},z3.d'
+.*: Error: too many registers in vector register list at operand 1 -- `bfmaxnm {z4.h-z3.h},{z4.h-z3.h},z7.b'
+.*: Error: operand mismatch -- `bfmaxnm {z28.h-z31.h},{z28.s-z31.h},z0.h'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmaxnm {z28.h-z31.h}, {z28.h-z31.h}, z0.h
+.*: Error: z0-z15 expected at operand 3 -- `bfmaxnm {z0.h-z3.h},{z0.h-z3.h},z16.h'
+.*: Error: too many registers in vector register list at operand 1 -- `bfmaxnm {z10.h-z15.h},{z10.h-z15.h},z1.h'
+.*: Error: too many registers in vector register list at operand 1 -- `bfmaxnm {z14.h-z7.h},{z4.h-z7.h},z3.s'
+.*: Error: too many registers in vector register list at operand 1 -- `bfmaxnm {z4.h-z17.h},{z1.h-z7.h},z17.b'
+.*: Error: operand mismatch -- `bfmaxnm {z0.s-z1.h},{z0.h-z1.h},{z0.h-z1.h}'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmaxnm {z0.h-z1.h}, {z0.h-z1.h}, {z0.h-z1.h}
+.*: Error: invalid range in vector register list at operand 2 -- `bfmaxnm {z30.h-z31.h},{z31.s-z31.h},{z0.h-z1.h}'
+.*: Error: operand mismatch -- `bfmaxnm {z0.h-z1.h},{z0.h-z1.h},{z30.b-z31.b}'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmaxnm {z0.h-z1.h}, {z0.h-z1.h}, {z30.h-z31.h}
+.*: Error: too many registers in vector register list at operand 3 -- `bfmaxnm {z14.h-z15.h},{z14.h-z15.h},{z14.s-z10.s}'
+.*: Error: operand 2 must be the same register as operand 1 -- `bfmaxnm {z6.h-z7.h},{z16.h-z17.h},{z30.h-z31.h}'
+.*: Error: comma expected between operands at operand 3 -- `bfmaxnm {z2.h-z3.h},{z2.h-z3.h}'
+.*: Error: too many registers in vector register list at operand 1 -- `bfmaxnm {z20.h-z31.h},{z0.h-z3.h},{z0.h-z3.h}'
+.*: Error: too many registers in vector register list at operand 2 -- `bfmaxnm {z28.h-z31.h},{z21.s-z31.s},{z0.h-z3.h}'
+.*: Error: operand mismatch -- `bfmaxnm {z0.h-z3.h},{z0.h-z3.h},{z28.b-z31.b}'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmaxnm {z0.h-z3.h}, {z0.h-z3.h}, {z28.h-z31.h}
+.*: Error: too many registers in vector register list at operand 3 -- `bfmaxnm {z12.h-z15.h},{z12.h-z15.h},{z10.h-z25.h}'
+.*: Error: too many registers in vector register list at operand 1 -- `bfmaxnm {z4.h-z17.h},{z4.h-z7.h},{z14.h-z17.h}'
+.*: Error: too many registers in vector register list at operand 1 -- `bfmaxnm {z14.h-z7.h},{z4.s-z7.b},{z10.h-z3.h}'
+.*: Error: too many registers in vector register list at operand 2 -- `bfmin {z30.h-z31.h},{z0.h-z31.h},z0.h'
+.*: Error: z0-z15 expected at operand 3 -- `bfmin {z0.h-z1.h},{z0.h-z1.h},z16.h'
+.*: Error: operand mismatch -- `bfmin {z14.s-z15.s},{z14.s-z15.s},z1.h'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmin {z14.h-z15.h}, {z14.h-z15.h}, z1.h
+.*: Error: too many registers in vector register list at operand 1 -- `bfmin {z2.h-z7.h},{z30.h-z7.h},z3.d'
+.*: Error: too many registers in vector register list at operand 1 -- `bfmin {z4.h-z3.h},{z4.h-z3.h},z7.b'
+.*: Error: operand mismatch -- `bfmin {z28.h-z31.h},{z28.s-z31.h},z0.h'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmin {z28.h-z31.h}, {z28.h-z31.h}, z0.h
+.*: Error: z0-z15 expected at operand 3 -- `bfmin {z0.h-z3.h},{z0.h-z3.h},z16.h'
+.*: Error: too many registers in vector register list at operand 1 -- `bfmin {z10.h-z15.h},{z10.h-z15.h},z1.h'
+.*: Error: too many registers in vector register list at operand 1 -- `bfmin {z14.h-z7.h},{z4.h-z7.h},z3.s'
+.*: Error: too many registers in vector register list at operand 1 -- `bfmin {z4.h-z17.h},{z1.h-z7.h},z17.b'
+.*: Error: operand mismatch -- `bfmin {z0.s-z1.h},{z0.h-z1.h},{z0.h-z1.h}'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmin {z0.h-z1.h}, {z0.h-z1.h}, {z0.h-z1.h}
+.*: Error: invalid range in vector register list at operand 2 -- `bfmin {z30.h-z31.h},{z31.s-z31.h},{z0.h-z1.h}'
+.*: Error: operand mismatch -- `bfmin {z0.h-z1.h},{z0.h-z1.h},{z30.b-z31.b}'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmin {z0.h-z1.h}, {z0.h-z1.h}, {z30.h-z31.h}
+.*: Error: too many registers in vector register list at operand 3 -- `bfmin {z14.h-z15.h},{z14.h-z15.h},{z14.s-z10.s}'
+.*: Error: operand 2 must be the same register as operand 1 -- `bfmin {z6.h-z7.h},{z16.h-z17.h},{z30.h-z31.h}'
+.*: Error: comma expected between operands at operand 3 -- `bfmin {z2.h-z3.h},{z2.h-z3.h}'
+.*: Error: too many registers in vector register list at operand 1 -- `bfmin {z20.h-z31.h},{z0.h-z3.h},{z0.h-z3.h}'
+.*: Error: too many registers in vector register list at operand 2 -- `bfmin {z28.h-z31.h},{z21.s-z31.s},{z0.h-z3.h}'
+.*: Error: operand mismatch -- `bfmin {z0.h-z3.h},{z0.h-z3.h},{z28.b-z31.b}'
+.*: Info:    did you mean this\?
+.*: Info:    	bfmin {z0.h-z3.h}, {z0.h-z3.h}, {z28.h-z31.h}
+.*: Error: too many registers in vector register list at operand 3 -- `bfmin {z12.h-z15.h},{z12.h-z15.h},{z10.h-z25.h}'
+.*: Error: too many registers in vector register list at operand 1 -- `bfmin {z4.h-z17.h},{z4.h-z7.h},{z14.h-z17.h}'
+.*: Error: too many registers in vector register list at operand 1 -- `bfmin {z14.h-z7.h},{z4.s-z7.b},{z10.h-z3.h}'
+.*: Error: too many registers in vector register list at operand 2 -- `bfminnm {z30.h-z31.h},{z0.h-z31.h},z0.h'
+.*: Error: z0-z15 expected at operand 3 -- `bfminnm {z0.h-z1.h},{z0.h-z1.h},z16.h'
+.*: Error: operand mismatch -- `bfminnm {z14.s-z15.s},{z14.s-z15.s},z1.h'
+.*: Info:    did you mean this\?
+.*: Info:    	bfminnm {z14.h-z15.h}, {z14.h-z15.h}, z1.h
+.*: Error: too many registers in vector register list at operand 1 -- `bfminnm {z2.h-z7.h},{z30.h-z7.h},z3.d'
+.*: Error: too many registers in vector register list at operand 1 -- `bfminnm {z4.h-z3.h},{z4.h-z3.h},z7.b'
+.*: Error: operand mismatch -- `bfminnm {z28.h-z31.h},{z28.s-z31.h},z0.h'
+.*: Info:    did you mean this\?
+.*: Info:    	bfminnm {z28.h-z31.h}, {z28.h-z31.h}, z0.h
+.*: Error: z0-z15 expected at operand 3 -- `bfminnm {z0.h-z3.h},{z0.h-z3.h},z16.h'
+.*: Error: too many registers in vector register list at operand 1 -- `bfminnm {z10.h-z15.h},{z10.h-z15.h},z1.h'
+.*: Error: too many registers in vector register list at operand 1 -- `bfminnm {z14.h-z7.h},{z4.h-z7.h},z3.s'
+.*: Error: too many registers in vector register list at operand 1 -- `bfminnm {z4.h-z17.h},{z1.h-z7.h},z17.b'
+.*: Error: operand mismatch -- `bfminnm {z0.s-z1.h},{z0.h-z1.h},{z0.h-z1.h}'
+.*: Info:    did you mean this\?
+.*: Info:    	bfminnm {z0.h-z1.h}, {z0.h-z1.h}, {z0.h-z1.h}
+.*: Error: invalid range in vector register list at operand 2 -- `bfminnm {z30.h-z31.h},{z31.s-z31.h},{z0.h-z1.h}'
+.*: Error: operand mismatch -- `bfminnm {z0.h-z1.h},{z0.h-z1.h},{z30.b-z31.b}'
+.*: Info:    did you mean this\?
+.*: Info:    	bfminnm {z0.h-z1.h}, {z0.h-z1.h}, {z30.h-z31.h}
+.*: Error: too many registers in vector register list at operand 3 -- `bfminnm {z14.h-z15.h},{z14.h-z15.h},{z14.s-z10.s}'
+.*: Error: operand 2 must be the same register as operand 1 -- `bfminnm {z6.h-z7.h},{z16.h-z17.h},{z30.h-z31.h}'
+.*: Error: comma expected between operands at operand 3 -- `bfminnm {z2.h-z3.h},{z2.h-z3.h}'
+.*: Error: too many registers in vector register list at operand 1 -- `bfminnm {z20.h-z31.h},{z0.h-z3.h},{z0.h-z3.h}'
+.*: Error: too many registers in vector register list at operand 2 -- `bfminnm {z28.h-z31.h},{z21.s-z31.s},{z0.h-z3.h}'
+.*: Error: operand mismatch -- `bfminnm {z0.h-z3.h},{z0.h-z3.h},{z28.b-z31.b}'
+.*: Info:    did you mean this\?
+.*: Info:    	bfminnm {z0.h-z3.h}, {z0.h-z3.h}, {z28.h-z31.h}
+.*: Error: too many registers in vector register list at operand 3 -- `bfminnm {z12.h-z15.h},{z12.h-z15.h},{z10.h-z25.h}'
+.*: Error: too many registers in vector register list at operand 1 -- `bfminnm {z4.h-z17.h},{z4.h-z7.h},{z14.h-z17.h}'
+.*: Error: too many registers in vector register list at operand 1 -- `bfminnm {z14.h-z7.h},{z4.s-z7.b},{z10.h-z3.h}'
+.*: Error: operand mismatch -- `bfclamp {z0.s-z1.s},z0.h,z0.h'
+.*: Info:    did you mean this\?
+.*: Info:    	bfclamp {z0.h-z1.h}, z0.h, z0.h
+.*: Error: too many registers in vector register list at operand 1 -- `bfclamp {z31.h-z30.h},z0.h,z0.h'
+.*: Error: operand mismatch -- `bfclamp {z0.h-z1.h},z31.s,z0.h'
+.*: Info:    did you mean this\?
+.*: Info:    	bfclamp {z0.h-z1.h}, z31.h, z0.h
+.*: Error: operand mismatch -- `bfclamp {z0.h-z1.h},z0.h,z31.d'
+.*: Info:    did you mean this\?
+.*: Info:    	bfclamp {z0.h-z1.h}, z0.h, z31.h
+.*: Error: operand mismatch -- `bfclamp {z16.h-z17.h},z1.b,z15.b'
+.*: Info:    did you mean this\?
+.*: Info:    	bfclamp {z16.h-z17.h}, z1.h, z15.h
+.*: Error: too many registers in vector register list at operand 1 -- `bfclamp {z16.h-z21.h},z3.s,z7.d'
+.*: Error: invalid range in vector register list at operand 1 -- `bfclamp {z2.h-z2.h},z7.s,z31.d'
+.*: Error: operand mismatch -- `bfclamp {z0.s-z3.s},z0.h,z0.h'
+.*: Info:    did you mean this\?
+.*: Info:    	bfclamp {z0.h-z3.h}, z0.h, z0.h
+.*: Error: too many registers in vector register list at operand 1 -- `bfclamp {z25.h-z31.h},z0.h,z0.h'
+.*: Error: operand mismatch -- `bfclamp {z0.h-z3.h},z31.s,z0.h'
+.*: Info:    did you mean this\?
+.*: Info:    	bfclamp {z0.h-z3.h}, z31.h, z0.h
+.*: Error: operand mismatch -- `bfclamp {z0.h-z3.h},z0.h,z31.d'
+.*: Info:    did you mean this\?
+.*: Info:    	bfclamp {z0.h-z3.h}, z0.h, z31.h
+.*: Error: operand mismatch -- `bfclamp {z31.h-z2.h},z1.b,z15.b'
+.*: Info:    did you mean this\?
+.*: Info:    	bfclamp {z31.h-z2.h}, z1.h, z15.h
+.*: Error: too many registers in vector register list at operand 1 -- `bfclamp {z18.s-z15.s},z3.h,z7.h'
+.*: Error: too many registers in vector register list at operand 1 -- `bfclamp {z14.h-z27.h},z7.s,z31.d'
diff --git a/gas/testsuite/gas/aarch64/bfloat16-sme2-2-bad.s b/gas/testsuite/gas/aarch64/bfloat16-sme2-2-bad.s
new file mode 100644
index 00000000000..fb27f2d078c
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/bfloat16-sme2-2-bad.s
@@ -0,0 +1,136 @@ 
+/* BFMAX.  */
+bfmax {z0.h - z1.s}, {z0.h - z1.h}, z0.h
+bfmax {z30.h - z31.h}, {z0.h - z31.h}, z0.h
+bfmax {z0.h - z1.h}, {z0.h - z1.h}, z16.h
+bfmax {z14.s - z15.s}, {z14.s - z15.s}, z1.h
+bfmax {z2.h - z7.h}, {z30.h - z7.h}, z3.d
+bfmax {z4.h - z3.h}, {z4.h - z3.h}, z7.b
+
+bfmax {z0.h - z3.s}, {z0.h - z3.h}, z0.h
+bfmax {z28.h - z31.h}, {z28.s - z31.h}, z0.h
+bfmax {z0.h - z3.h}, {z0.h - z3.h}, z16.h
+bfmax {z10.h - z15.h}, {z10.h - z15.h}, z1.h
+bfmax {z14.h - z7.h}, {z4.h - z7.h}, z3.s
+bfmax {z4.h - z17.h}, {z1.h - z7.h}, z17.b
+
+/* BFMAX (multiple vectors).  */
+bfmax {z0.s - z1.h}, {z0.h - z1.h}, {z0.h - z1.h}
+bfmax {z30.h - z31.h}, {z31.s - z31.h}, {z0.h - z1.h}
+bfmax {z0.h - z1.h}, {z0.h - z1.h}, {z30.b - z31.b}
+bfmax {z14.h - z15.h}, {z14.h - z15.h}, {z14.s - z10.s}
+bfmax {z6.h - z7.h}, {z16.h - z17.h}, {z30.h - z31.h}
+bfmax {z2.h - z3.h}, {z2.h - z3.h}
+
+bfmax {z20.h - z31.h}, {z0.h - z3.h}, {z0.h - z3.h}
+bfmax {z28.h - z31.h}, {z21.s - z31.s}, {z0.h - z3.h}
+bfmax {z0.h - z3.h}, {z0.h - z3.h}, {z28.b - z31.b}
+bfmax {z12.h - z15.h}, {z12.h - z15.h}, {z10.h - z25.h}
+bfmax {z4.h - z17.h}, {z4.h - z7.h}, {z14.h - z17.h}
+bfmax {z14.h - z7.h}, {z4.s - z7.b}, {z10.h - z3.h}
+
+/* BFMAXNM.  */
+bfmaxnm {z0.h - z1.s}, {z0.h - z1.h}, z0.h
+bfmaxnm {z30.h - z31.h}, {z0.h - z31.h}, z0.h
+bfmaxnm {z0.h - z1.h}, {z0.h - z1.h}, z16.h
+bfmaxnm {z14.s - z15.s}, {z14.s - z15.s}, z1.h
+bfmaxnm {z2.h - z7.h}, {z30.h - z7.h}, z3.d
+bfmaxnm {z4.h - z3.h}, {z4.h - z3.h}, z7.b
+
+bfmaxnm {z0.h - z3.s}, {z0.h - z3.h}, z0.h
+bfmaxnm {z28.h - z31.h}, {z28.s - z31.h}, z0.h
+bfmaxnm {z0.h - z3.h}, {z0.h - z3.h}, z16.h
+bfmaxnm {z10.h - z15.h}, {z10.h - z15.h}, z1.h
+bfmaxnm {z14.h - z7.h}, {z4.h - z7.h}, z3.s
+bfmaxnm {z4.h - z17.h}, {z1.h - z7.h}, z17.b
+
+/* BFMAXNM (multiple vectors).  */
+bfmaxnm {z0.s - z1.h}, {z0.h - z1.h}, {z0.h - z1.h}
+bfmaxnm {z30.h - z31.h}, {z31.s - z31.h}, {z0.h - z1.h}
+bfmaxnm {z0.h - z1.h}, {z0.h - z1.h}, {z30.b - z31.b}
+bfmaxnm {z14.h - z15.h}, {z14.h - z15.h}, {z14.s - z10.s}
+bfmaxnm {z6.h - z7.h}, {z16.h - z17.h}, {z30.h - z31.h}
+bfmaxnm {z2.h - z3.h}, {z2.h - z3.h}
+
+bfmaxnm {z20.h - z31.h}, {z0.h - z3.h}, {z0.h - z3.h}
+bfmaxnm {z28.h - z31.h}, {z21.s - z31.s}, {z0.h - z3.h}
+bfmaxnm {z0.h - z3.h}, {z0.h - z3.h}, {z28.b - z31.b}
+bfmaxnm {z12.h - z15.h}, {z12.h - z15.h}, {z10.h - z25.h}
+bfmaxnm {z4.h - z17.h}, {z4.h - z7.h}, {z14.h - z17.h}
+bfmaxnm {z14.h - z7.h}, {z4.s - z7.b}, {z10.h - z3.h}
+
+/* BFMIN.  */
+bfmin {z0.h - z1.s}, {z0.h - z1.h}, z0.h
+bfmin {z30.h - z31.h}, {z0.h - z31.h}, z0.h
+bfmin {z0.h - z1.h}, {z0.h - z1.h}, z16.h
+bfmin {z14.s - z15.s}, {z14.s - z15.s}, z1.h
+bfmin {z2.h - z7.h}, {z30.h - z7.h}, z3.d
+bfmin {z4.h - z3.h}, {z4.h - z3.h}, z7.b
+
+bfmin {z0.h - z3.s}, {z0.h - z3.h}, z0.h
+bfmin {z28.h - z31.h}, {z28.s - z31.h}, z0.h
+bfmin {z0.h - z3.h}, {z0.h - z3.h}, z16.h
+bfmin {z10.h - z15.h}, {z10.h - z15.h}, z1.h
+bfmin {z14.h - z7.h}, {z4.h - z7.h}, z3.s
+bfmin {z4.h - z17.h}, {z1.h - z7.h}, z17.b
+
+/* BFMIN (multiple vectors).  */
+bfmin {z0.s - z1.h}, {z0.h - z1.h}, {z0.h - z1.h}
+bfmin {z30.h - z31.h}, {z31.s - z31.h}, {z0.h - z1.h}
+bfmin {z0.h - z1.h}, {z0.h - z1.h}, {z30.b - z31.b}
+bfmin {z14.h - z15.h}, {z14.h - z15.h}, {z14.s - z10.s}
+bfmin {z6.h - z7.h}, {z16.h - z17.h}, {z30.h - z31.h}
+bfmin {z2.h - z3.h}, {z2.h - z3.h}
+
+bfmin {z20.h - z31.h}, {z0.h - z3.h}, {z0.h - z3.h}
+bfmin {z28.h - z31.h}, {z21.s - z31.s}, {z0.h - z3.h}
+bfmin {z0.h - z3.h}, {z0.h - z3.h}, {z28.b - z31.b}
+bfmin {z12.h - z15.h}, {z12.h - z15.h}, {z10.h - z25.h}
+bfmin {z4.h - z17.h}, {z4.h - z7.h}, {z14.h - z17.h}
+bfmin {z14.h - z7.h}, {z4.s - z7.b}, {z10.h - z3.h}
+
+/* BFMINNM.  */
+bfminnm {z0.h - z1.s}, {z0.h - z1.h}, z0.h
+bfminnm {z30.h - z31.h}, {z0.h - z31.h}, z0.h
+bfminnm {z0.h - z1.h}, {z0.h - z1.h}, z16.h
+bfminnm {z14.s - z15.s}, {z14.s - z15.s}, z1.h
+bfminnm {z2.h - z7.h}, {z30.h - z7.h}, z3.d
+bfminnm {z4.h - z3.h}, {z4.h - z3.h}, z7.b
+
+bfminnm {z0.h - z3.s}, {z0.h - z3.h}, z0.h
+bfminnm {z28.h - z31.h}, {z28.s - z31.h}, z0.h
+bfminnm {z0.h - z3.h}, {z0.h - z3.h}, z16.h
+bfminnm {z10.h - z15.h}, {z10.h - z15.h}, z1.h
+bfminnm {z14.h - z7.h}, {z4.h - z7.h}, z3.s
+bfminnm {z4.h - z17.h}, {z1.h - z7.h}, z17.b
+
+/* BFMINNM (multiple vectors).  */
+bfminnm {z0.s - z1.h}, {z0.h - z1.h}, {z0.h - z1.h}
+bfminnm {z30.h - z31.h}, {z31.s - z31.h}, {z0.h - z1.h}
+bfminnm {z0.h - z1.h}, {z0.h - z1.h}, {z30.b - z31.b}
+bfminnm {z14.h - z15.h}, {z14.h - z15.h}, {z14.s - z10.s}
+bfminnm {z6.h - z7.h}, {z16.h - z17.h}, {z30.h - z31.h}
+bfminnm {z2.h - z3.h}, {z2.h - z3.h}
+
+bfminnm {z20.h - z31.h}, {z0.h - z3.h}, {z0.h - z3.h}
+bfminnm {z28.h - z31.h}, {z21.s - z31.s}, {z0.h - z3.h}
+bfminnm {z0.h - z3.h}, {z0.h - z3.h}, {z28.b - z31.b}
+bfminnm {z12.h - z15.h}, {z12.h - z15.h}, {z10.h - z25.h}
+bfminnm {z4.h - z17.h}, {z4.h - z7.h}, {z14.h - z17.h}
+bfminnm {z14.h - z7.h}, {z4.s - z7.b}, {z10.h - z3.h}
+
+/* BFCLAMP.  */
+bfclamp {z0.s - z1.s}, z0.h, z0.h
+bfclamp {z31.h - z30.h}, z0.h, z0.h
+bfclamp {z0.h - z1.h}, z31.s, z0.h
+bfclamp {z0.h - z1.h}, z0.h, z31.d
+bfclamp {z16.h - z17.h}, z1.b, z15.b
+bfclamp {z16.h - z21.h}, z3.s, z7.d
+bfclamp {z2.h - z2.h}, z7.s, z31.d
+
+bfclamp {z0.s - z3.s}, z0.h, z0.h
+bfclamp {z25.h - z31.h}, z0.h, z0.h
+bfclamp {z0.h - z3.h}, z31.s, z0.h
+bfclamp {z0.h - z3.h}, z0.h, z31.d
+bfclamp {z31.h - z2.h}, z1.b, z15.b
+bfclamp {z18.s - z15.s}, z3.h, z7.h
+bfclamp {z14.h - z27.h}, z7.s, z31.d
diff --git a/gas/testsuite/gas/aarch64/bfloat16-sme2-2.d b/gas/testsuite/gas/aarch64/bfloat16-sme2-2.d
new file mode 100644
index 00000000000..04e4a107636
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/bfloat16-sme2-2.d
@@ -0,0 +1,122 @@ 
+#name: Test of SME2 non-widening BFloat16 min max instructions.
+#as: -march=armv9.4-a+sve-b16b16+sme2
+#objdump: -dr
+
+[^:]+:     file format .*
+
+
+[^:]+:
+
+[^:]+:
+.*:	c120a100 	bfmax	{z0.h-z1.h}, {z0.h-z1.h}, z0.h
+.*:	c120a11e 	bfmax	{z30.h-z31.h}, {z30.h-z31.h}, z0.h
+.*:	c12fa100 	bfmax	{z0.h-z1.h}, {z0.h-z1.h}, z15.h
+.*:	c121a10e 	bfmax	{z14.h-z15.h}, {z14.h-z15.h}, z1.h
+.*:	c123a106 	bfmax	{z6.h-z7.h}, {z6.h-z7.h}, z3.h
+.*:	c127a102 	bfmax	{z2.h-z3.h}, {z2.h-z3.h}, z7.h
+.*:	c120a900 	bfmax	{z0.h-z3.h}, {z0.h-z3.h}, z0.h
+.*:	c120a91c 	bfmax	{z28.h-z31.h}, {z28.h-z31.h}, z0.h
+.*:	c12fa900 	bfmax	{z0.h-z3.h}, {z0.h-z3.h}, z15.h
+.*:	c121a90c 	bfmax	{z12.h-z15.h}, {z12.h-z15.h}, z1.h
+.*:	c123a904 	bfmax	{z4.h-z7.h}, {z4.h-z7.h}, z3.h
+.*:	c127a904 	bfmax	{z4.h-z7.h}, {z4.h-z7.h}, z7.h
+.*:	c120b100 	bfmax	{z0.h-z1.h}, {z0.h-z1.h}, {z0.h-z1.h}
+.*:	c120b11e 	bfmax	{z30.h-z31.h}, {z30.h-z31.h}, {z0.h-z1.h}
+.*:	c13eb100 	bfmax	{z0.h-z1.h}, {z0.h-z1.h}, {z30.h-z31.h}
+.*:	c12eb10e 	bfmax	{z14.h-z15.h}, {z14.h-z15.h}, {z14.h-z15.h}
+.*:	c126b106 	bfmax	{z6.h-z7.h}, {z6.h-z7.h}, {z6.h-z7.h}
+.*:	c122b102 	bfmax	{z2.h-z3.h}, {z2.h-z3.h}, {z2.h-z3.h}
+.*:	c120b900 	bfmax	{z0.h-z3.h}, {z0.h-z3.h}, {z0.h-z3.h}
+.*:	c120b91c 	bfmax	{z28.h-z31.h}, {z28.h-z31.h}, {z0.h-z3.h}
+.*:	c13cb900 	bfmax	{z0.h-z3.h}, {z0.h-z3.h}, {z28.h-z31.h}
+.*:	c12cb90c 	bfmax	{z12.h-z15.h}, {z12.h-z15.h}, {z12.h-z15.h}
+.*:	c124b904 	bfmax	{z4.h-z7.h}, {z4.h-z7.h}, {z4.h-z7.h}
+.*:	c120b904 	bfmax	{z4.h-z7.h}, {z4.h-z7.h}, {z0.h-z3.h}
+.*:	c120a120 	bfmaxnm	{z0.h-z1.h}, {z0.h-z1.h}, z0.h
+.*:	c120a13e 	bfmaxnm	{z30.h-z31.h}, {z30.h-z31.h}, z0.h
+.*:	c12fa120 	bfmaxnm	{z0.h-z1.h}, {z0.h-z1.h}, z15.h
+.*:	c121a12e 	bfmaxnm	{z14.h-z15.h}, {z14.h-z15.h}, z1.h
+.*:	c123a126 	bfmaxnm	{z6.h-z7.h}, {z6.h-z7.h}, z3.h
+.*:	c127a122 	bfmaxnm	{z2.h-z3.h}, {z2.h-z3.h}, z7.h
+.*:	c120a920 	bfmaxnm	{z0.h-z3.h}, {z0.h-z3.h}, z0.h
+.*:	c120a93c 	bfmaxnm	{z28.h-z31.h}, {z28.h-z31.h}, z0.h
+.*:	c12fa920 	bfmaxnm	{z0.h-z3.h}, {z0.h-z3.h}, z15.h
+.*:	c121a92c 	bfmaxnm	{z12.h-z15.h}, {z12.h-z15.h}, z1.h
+.*:	c123a924 	bfmaxnm	{z4.h-z7.h}, {z4.h-z7.h}, z3.h
+.*:	c127a924 	bfmaxnm	{z4.h-z7.h}, {z4.h-z7.h}, z7.h
+.*:	c120b120 	bfmaxnm	{z0.h-z1.h}, {z0.h-z1.h}, {z0.h-z1.h}
+.*:	c120b13e 	bfmaxnm	{z30.h-z31.h}, {z30.h-z31.h}, {z0.h-z1.h}
+.*:	c13eb120 	bfmaxnm	{z0.h-z1.h}, {z0.h-z1.h}, {z30.h-z31.h}
+.*:	c12eb12e 	bfmaxnm	{z14.h-z15.h}, {z14.h-z15.h}, {z14.h-z15.h}
+.*:	c126b126 	bfmaxnm	{z6.h-z7.h}, {z6.h-z7.h}, {z6.h-z7.h}
+.*:	c122b122 	bfmaxnm	{z2.h-z3.h}, {z2.h-z3.h}, {z2.h-z3.h}
+.*:	c120b920 	bfmaxnm	{z0.h-z3.h}, {z0.h-z3.h}, {z0.h-z3.h}
+.*:	c120b93c 	bfmaxnm	{z28.h-z31.h}, {z28.h-z31.h}, {z0.h-z3.h}
+.*:	c13cb920 	bfmaxnm	{z0.h-z3.h}, {z0.h-z3.h}, {z28.h-z31.h}
+.*:	c12cb92c 	bfmaxnm	{z12.h-z15.h}, {z12.h-z15.h}, {z12.h-z15.h}
+.*:	c124b924 	bfmaxnm	{z4.h-z7.h}, {z4.h-z7.h}, {z4.h-z7.h}
+.*:	c120b924 	bfmaxnm	{z4.h-z7.h}, {z4.h-z7.h}, {z0.h-z3.h}
+.*:	c120a101 	bfmin	{z0.h-z1.h}, {z0.h-z1.h}, z0.h
+.*:	c120a11f 	bfmin	{z30.h-z31.h}, {z30.h-z31.h}, z0.h
+.*:	c12fa101 	bfmin	{z0.h-z1.h}, {z0.h-z1.h}, z15.h
+.*:	c121a10f 	bfmin	{z14.h-z15.h}, {z14.h-z15.h}, z1.h
+.*:	c123a107 	bfmin	{z6.h-z7.h}, {z6.h-z7.h}, z3.h
+.*:	c127a103 	bfmin	{z2.h-z3.h}, {z2.h-z3.h}, z7.h
+.*:	c120a901 	bfmin	{z0.h-z3.h}, {z0.h-z3.h}, z0.h
+.*:	c120a91d 	bfmin	{z28.h-z31.h}, {z28.h-z31.h}, z0.h
+.*:	c12fa901 	bfmin	{z0.h-z3.h}, {z0.h-z3.h}, z15.h
+.*:	c121a90d 	bfmin	{z12.h-z15.h}, {z12.h-z15.h}, z1.h
+.*:	c123a905 	bfmin	{z4.h-z7.h}, {z4.h-z7.h}, z3.h
+.*:	c127a905 	bfmin	{z4.h-z7.h}, {z4.h-z7.h}, z7.h
+.*:	c120b101 	bfmin	{z0.h-z1.h}, {z0.h-z1.h}, {z0.h-z1.h}
+.*:	c120b11f 	bfmin	{z30.h-z31.h}, {z30.h-z31.h}, {z0.h-z1.h}
+.*:	c13eb101 	bfmin	{z0.h-z1.h}, {z0.h-z1.h}, {z30.h-z31.h}
+.*:	c12eb10f 	bfmin	{z14.h-z15.h}, {z14.h-z15.h}, {z14.h-z15.h}
+.*:	c126b107 	bfmin	{z6.h-z7.h}, {z6.h-z7.h}, {z6.h-z7.h}
+.*:	c122b103 	bfmin	{z2.h-z3.h}, {z2.h-z3.h}, {z2.h-z3.h}
+.*:	c120b901 	bfmin	{z0.h-z3.h}, {z0.h-z3.h}, {z0.h-z3.h}
+.*:	c120b91d 	bfmin	{z28.h-z31.h}, {z28.h-z31.h}, {z0.h-z3.h}
+.*:	c13cb901 	bfmin	{z0.h-z3.h}, {z0.h-z3.h}, {z28.h-z31.h}
+.*:	c12cb90d 	bfmin	{z12.h-z15.h}, {z12.h-z15.h}, {z12.h-z15.h}
+.*:	c124b905 	bfmin	{z4.h-z7.h}, {z4.h-z7.h}, {z4.h-z7.h}
+.*:	c120b905 	bfmin	{z4.h-z7.h}, {z4.h-z7.h}, {z0.h-z3.h}
+.*:	c120a121 	bfminnm	{z0.h-z1.h}, {z0.h-z1.h}, z0.h
+.*:	c120a13f 	bfminnm	{z30.h-z31.h}, {z30.h-z31.h}, z0.h
+.*:	c12fa121 	bfminnm	{z0.h-z1.h}, {z0.h-z1.h}, z15.h
+.*:	c121a12f 	bfminnm	{z14.h-z15.h}, {z14.h-z15.h}, z1.h
+.*:	c123a127 	bfminnm	{z6.h-z7.h}, {z6.h-z7.h}, z3.h
+.*:	c127a123 	bfminnm	{z2.h-z3.h}, {z2.h-z3.h}, z7.h
+.*:	c120a921 	bfminnm	{z0.h-z3.h}, {z0.h-z3.h}, z0.h
+.*:	c120a93d 	bfminnm	{z28.h-z31.h}, {z28.h-z31.h}, z0.h
+.*:	c12fa921 	bfminnm	{z0.h-z3.h}, {z0.h-z3.h}, z15.h
+.*:	c121a92d 	bfminnm	{z12.h-z15.h}, {z12.h-z15.h}, z1.h
+.*:	c123a925 	bfminnm	{z4.h-z7.h}, {z4.h-z7.h}, z3.h
+.*:	c127a925 	bfminnm	{z4.h-z7.h}, {z4.h-z7.h}, z7.h
+.*:	c120b121 	bfminnm	{z0.h-z1.h}, {z0.h-z1.h}, {z0.h-z1.h}
+.*:	c120b13f 	bfminnm	{z30.h-z31.h}, {z30.h-z31.h}, {z0.h-z1.h}
+.*:	c13eb121 	bfminnm	{z0.h-z1.h}, {z0.h-z1.h}, {z30.h-z31.h}
+.*:	c12eb12f 	bfminnm	{z14.h-z15.h}, {z14.h-z15.h}, {z14.h-z15.h}
+.*:	c126b127 	bfminnm	{z6.h-z7.h}, {z6.h-z7.h}, {z6.h-z7.h}
+.*:	c122b123 	bfminnm	{z2.h-z3.h}, {z2.h-z3.h}, {z2.h-z3.h}
+.*:	c120b921 	bfminnm	{z0.h-z3.h}, {z0.h-z3.h}, {z0.h-z3.h}
+.*:	c120b93d 	bfminnm	{z28.h-z31.h}, {z28.h-z31.h}, {z0.h-z3.h}
+.*:	c13cb921 	bfminnm	{z0.h-z3.h}, {z0.h-z3.h}, {z28.h-z31.h}
+.*:	c12cb92d 	bfminnm	{z12.h-z15.h}, {z12.h-z15.h}, {z12.h-z15.h}
+.*:	c124b925 	bfminnm	{z4.h-z7.h}, {z4.h-z7.h}, {z4.h-z7.h}
+.*:	c120b925 	bfminnm	{z4.h-z7.h}, {z4.h-z7.h}, {z0.h-z3.h}
+.*:	c120c000 	bfclamp	{z0.h-z1.h}, z0.h, z0.h
+.*:	c120c01e 	bfclamp	{z30.h-z31.h}, z0.h, z0.h
+.*:	c120c3e0 	bfclamp	{z0.h-z1.h}, z31.h, z0.h
+.*:	c13fc000 	bfclamp	{z0.h-z1.h}, z0.h, z31.h
+.*:	c12fc02e 	bfclamp	{z14.h-z15.h}, z1.h, z15.h
+.*:	c127c066 	bfclamp	{z6.h-z7.h}, z3.h, z7.h
+.*:	c123c0e2 	bfclamp	{z2.h-z3.h}, z7.h, z3.h
+.*:	c121c1f4 	bfclamp	{z20.h-z21.h}, z15.h, z1.h
+.*:	c120c800 	bfclamp	{z0.h-z3.h}, z0.h, z0.h
+.*:	c120c81c 	bfclamp	{z28.h-z31.h}, z0.h, z0.h
+.*:	c120cbe0 	bfclamp	{z0.h-z3.h}, z31.h, z0.h
+.*:	c13fc800 	bfclamp	{z0.h-z3.h}, z0.h, z31.h
+.*:	c12fc82c 	bfclamp	{z12.h-z15.h}, z1.h, z15.h
+.*:	c127c868 	bfclamp	{z8.h-z11.h}, z3.h, z7.h
+.*:	c123c8e4 	bfclamp	{z4.h-z7.h}, z7.h, z3.h
+.*:	c121c9f4 	bfclamp	{z20.h-z23.h}, z15.h, z1.h
diff --git a/gas/testsuite/gas/aarch64/bfloat16-sme2-2.s b/gas/testsuite/gas/aarch64/bfloat16-sme2-2.s
new file mode 100644
index 00000000000..94cec19df49
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/bfloat16-sme2-2.s
@@ -0,0 +1,138 @@ 
+/* BFMAX.  */
+bfmax {z0.h - z1.h}, {z0.h - z1.h}, z0.h
+bfmax {z30.h - z31.h}, {z30.h - z31.h}, z0.h
+bfmax {z0.h - z1.h}, {z0.h - z1.h}, z15.h
+bfmax {z14.h - z15.h}, {z14.h - z15.h}, z1.h
+bfmax {z6.h - z7.h}, {z6.h - z7.h}, z3.h
+bfmax {z2.h - z3.h}, {z2.h - z3.h}, z7.h
+
+bfmax {z0.h - z3.h}, {z0.h - z3.h}, z0.h
+bfmax {z28.h - z31.h}, {z28.h - z31.h}, z0.h
+bfmax {z0.h - z3.h}, {z0.h - z3.h}, z15.h
+bfmax {z12.h - z15.h}, {z12.h - z15.h}, z1.h
+bfmax {z4.h - z7.h}, {z4.h - z7.h}, z3.h
+bfmax {z4.h - z7.h}, {z4.h - z7.h}, z7.h
+
+/* BFMAX (multiple vectors).  */
+bfmax {z0.h - z1.h}, {z0.h - z1.h}, {z0.h - z1.h}
+bfmax {z30.h - z31.h}, {z30.h - z31.h}, {z0.h - z1.h}
+bfmax {z0.h - z1.h}, {z0.h - z1.h}, {z30.h - z31.h}
+bfmax {z14.h - z15.h}, {z14.h - z15.h}, {z14.h - z15.h}
+bfmax {z6.h - z7.h}, {z6.h - z7.h}, {z6.h - z7.h}
+bfmax {z2.h - z3.h}, {z2.h - z3.h}, {z2.h - z3.h}
+
+bfmax {z0.h - z3.h}, {z0.h - z3.h}, {z0.h - z3.h}
+bfmax {z28.h - z31.h}, {z28.h - z31.h}, {z0.h - z3.h}
+bfmax {z0.h - z3.h}, {z0.h - z3.h}, {z28.h - z31.h}
+bfmax {z12.h - z15.h}, {z12.h - z15.h}, {z12.h - z15.h}
+bfmax {z4.h - z7.h}, {z4.h - z7.h}, {z4.h - z7.h}
+bfmax {z4.h - z7.h}, {z4.h - z7.h}, {z0.h - z3.h}
+
+/* BFMAXNM.  */
+bfmaxnm {z0.h - z1.h}, {z0.h - z1.h}, z0.h
+bfmaxnm {z30.h - z31.h}, {z30.h - z31.h}, z0.h
+bfmaxnm {z0.h - z1.h}, {z0.h - z1.h}, z15.h
+bfmaxnm {z14.h - z15.h}, {z14.h - z15.h}, z1.h
+bfmaxnm {z6.h - z7.h}, {z6.h - z7.h}, z3.h
+bfmaxnm {z2.h - z3.h}, {z2.h - z3.h}, z7.h
+
+bfmaxnm {z0.h - z3.h}, {z0.h - z3.h}, z0.h
+bfmaxnm {z28.h - z31.h}, {z28.h - z31.h}, z0.h
+bfmaxnm {z0.h - z3.h}, {z0.h - z3.h}, z15.h
+bfmaxnm {z12.h - z15.h}, {z12.h - z15.h}, z1.h
+bfmaxnm {z4.h - z7.h}, {z4.h - z7.h}, z3.h
+bfmaxnm {z4.h - z7.h}, {z4.h - z7.h}, z7.h
+
+/* BFMAXNM (multiple vectors).  */
+bfmaxnm {z0.h - z1.h}, {z0.h - z1.h}, {z0.h - z1.h}
+bfmaxnm {z30.h - z31.h}, {z30.h - z31.h}, {z0.h - z1.h}
+bfmaxnm {z0.h - z1.h}, {z0.h - z1.h}, {z30.h - z31.h}
+bfmaxnm {z14.h - z15.h}, {z14.h - z15.h}, {z14.h - z15.h}
+bfmaxnm {z6.h - z7.h}, {z6.h - z7.h}, {z6.h - z7.h}
+bfmaxnm {z2.h - z3.h}, {z2.h - z3.h}, {z2.h - z3.h}
+
+bfmaxnm {z0.h - z3.h}, {z0.h - z3.h}, {z0.h - z3.h}
+bfmaxnm {z28.h - z31.h}, {z28.h - z31.h}, {z0.h - z3.h}
+bfmaxnm {z0.h - z3.h}, {z0.h - z3.h}, {z28.h - z31.h}
+bfmaxnm {z12.h - z15.h}, {z12.h - z15.h}, {z12.h - z15.h}
+bfmaxnm {z4.h - z7.h}, {z4.h - z7.h}, {z4.h - z7.h}
+bfmaxnm {z4.h - z7.h}, {z4.h - z7.h}, {z0.h - z3.h}
+
+/* BFMIN.  */
+bfmin {z0.h - z1.h}, {z0.h - z1.h}, z0.h
+bfmin {z30.h - z31.h}, {z30.h - z31.h}, z0.h
+bfmin {z0.h - z1.h}, {z0.h - z1.h}, z15.h
+bfmin {z14.h - z15.h}, {z14.h - z15.h}, z1.h
+bfmin {z6.h - z7.h}, {z6.h - z7.h}, z3.h
+bfmin {z2.h - z3.h}, {z2.h - z3.h}, z7.h
+
+bfmin {z0.h - z3.h}, {z0.h - z3.h}, z0.h
+bfmin {z28.h - z31.h}, {z28.h - z31.h}, z0.h
+bfmin {z0.h - z3.h}, {z0.h - z3.h}, z15.h
+bfmin {z12.h - z15.h}, {z12.h - z15.h}, z1.h
+bfmin {z4.h - z7.h}, {z4.h - z7.h}, z3.h
+bfmin {z4.h - z7.h}, {z4.h - z7.h}, z7.h
+
+/* BFMIN (multiple vectors).  */
+bfmin {z0.h - z1.h}, {z0.h - z1.h}, {z0.h - z1.h}
+bfmin {z30.h - z31.h}, {z30.h - z31.h}, {z0.h - z1.h}
+bfmin {z0.h - z1.h}, {z0.h - z1.h}, {z30.h - z31.h}
+bfmin {z14.h - z15.h}, {z14.h - z15.h}, {z14.h - z15.h}
+bfmin {z6.h - z7.h}, {z6.h - z7.h}, {z6.h - z7.h}
+bfmin {z2.h - z3.h}, {z2.h - z3.h}, {z2.h - z3.h}
+
+bfmin {z0.h - z3.h}, {z0.h - z3.h}, {z0.h - z3.h}
+bfmin {z28.h - z31.h}, {z28.h - z31.h}, {z0.h - z3.h}
+bfmin {z0.h - z3.h}, {z0.h - z3.h}, {z28.h - z31.h}
+bfmin {z12.h - z15.h}, {z12.h - z15.h}, {z12.h - z15.h}
+bfmin {z4.h - z7.h}, {z4.h - z7.h}, {z4.h - z7.h}
+bfmin {z4.h - z7.h}, {z4.h - z7.h}, {z0.h - z3.h}
+
+/* BFMINNM.  */
+bfminnm {z0.h - z1.h}, {z0.h - z1.h}, z0.h
+bfminnm {z30.h - z31.h}, {z30.h - z31.h}, z0.h
+bfminnm {z0.h - z1.h}, {z0.h - z1.h}, z15.h
+bfminnm {z14.h - z15.h}, {z14.h - z15.h}, z1.h
+bfminnm {z6.h - z7.h}, {z6.h - z7.h}, z3.h
+bfminnm {z2.h - z3.h}, {z2.h - z3.h}, z7.h
+
+bfminnm {z0.h - z3.h}, {z0.h - z3.h}, z0.h
+bfminnm {z28.h - z31.h}, {z28.h - z31.h}, z0.h
+bfminnm {z0.h - z3.h}, {z0.h - z3.h}, z15.h
+bfminnm {z12.h - z15.h}, {z12.h - z15.h}, z1.h
+bfminnm {z4.h - z7.h}, {z4.h - z7.h}, z3.h
+bfminnm {z4.h - z7.h}, {z4.h - z7.h}, z7.h
+
+/* BFMINNM (multiple vectors).  */
+bfminnm {z0.h - z1.h}, {z0.h - z1.h}, {z0.h - z1.h}
+bfminnm {z30.h - z31.h}, {z30.h - z31.h}, {z0.h - z1.h}
+bfminnm {z0.h - z1.h}, {z0.h - z1.h}, {z30.h - z31.h}
+bfminnm {z14.h - z15.h}, {z14.h - z15.h}, {z14.h - z15.h}
+bfminnm {z6.h - z7.h}, {z6.h - z7.h}, {z6.h - z7.h}
+bfminnm {z2.h - z3.h}, {z2.h - z3.h}, {z2.h - z3.h}
+
+bfminnm {z0.h - z3.h}, {z0.h - z3.h}, {z0.h - z3.h}
+bfminnm {z28.h - z31.h}, {z28.h - z31.h}, {z0.h - z3.h}
+bfminnm {z0.h - z3.h}, {z0.h - z3.h}, {z28.h - z31.h}
+bfminnm {z12.h - z15.h}, {z12.h - z15.h}, {z12.h - z15.h}
+bfminnm {z4.h - z7.h}, {z4.h - z7.h}, {z4.h - z7.h}
+bfminnm {z4.h - z7.h}, {z4.h - z7.h}, {z0.h - z3.h}
+
+/* BFCLAMP.  */
+bfclamp {z0.h - z1.h}, z0.h, z0.h
+bfclamp {z30.h - z31.h}, z0.h, z0.h
+bfclamp {z0.h - z1.h}, z31.h, z0.h
+bfclamp {z0.h - z1.h}, z0.h, z31.h
+bfclamp {z14.h - z15.h}, z1.h, z15.h
+bfclamp {z6.h - z7.h}, z3.h, z7.h
+bfclamp {z2.h - z3.h}, z7.h, z3.h
+bfclamp {z20.h - z21.h}, z15.h, z1.h
+
+bfclamp {z0.h - z3.h}, z0.h, z0.h
+bfclamp {z28.h - z31.h}, z0.h, z0.h
+bfclamp {z0.h - z3.h}, z31.h, z0.h
+bfclamp {z0.h - z3.h}, z0.h, z31.h
+bfclamp {z12.h - z15.h}, z1.h, z15.h
+bfclamp {z8.h - z11.h}, z3.h, z7.h
+bfclamp {z4.h - z7.h}, z7.h, z3.h
+bfclamp {z20.h - z23.h}, z15.h, z1.h
diff --git a/include/opcode/aarch64.h b/include/opcode/aarch64.h
index 72b48183882..5f43a235dd1 100644
--- a/include/opcode/aarch64.h
+++ b/include/opcode/aarch64.h
@@ -279,6 +279,8 @@  enum aarch64_feature_bit {
   AARCH64_FEATURE_V9_5A,
   /* +sve-b16b16+sve2.  */
   AARCH64_FEATURE_SVE_SVE2_B16B16,
+  /* +sve-b16b16+sme2.  */
+  AARCH64_FEATURE_SVE_SME2_B16B16,
   AARCH64_NUM_FEATURES
 };
 
diff --git a/opcodes/aarch64-tbl.h b/opcodes/aarch64-tbl.h
index ea30fb31ba5..82816d5db38 100644
--- a/opcodes/aarch64-tbl.h
+++ b/opcodes/aarch64-tbl.h
@@ -2813,6 +2813,8 @@  static const aarch64_feature_set aarch64_feature_d128_the =
   AARCH64_FEATURES (2, D128, THE);
 static const aarch64_feature_set aarch64_feature_sve_sve2_b16b16 =
   AARCH64_FEATURES (3, SVE_B16B16, SVE2, SVE_SVE2_B16B16);
+static const aarch64_feature_set aarch64_feature_sve_sme2_b16b16 =
+  AARCH64_FEATURES (3, SVE_B16B16, SME2, SVE_SME2_B16B16);
 static const aarch64_feature_set aarch64_feature_sme2p1 =
   AARCH64_FEATURE (SME2p1);
 static const aarch64_feature_set aarch64_feature_sve2p1 =
@@ -2924,6 +2926,7 @@  static const aarch64_feature_set aarch64_feature_sme_f16f16_f8f16 =
 #define THE	  &aarch64_feature_the
 #define D128_THE  &aarch64_feature_d128_the
 #define SVE_SVE2_B16B16  &aarch64_feature_sve_sve2_b16b16
+#define SVE_SME2_B16B16  &aarch64_feature_sve_sme2_b16b16
 #define SME2p1  &aarch64_feature_sme2p1
 #define SVE2p1  &aarch64_feature_sve2p1
 #define RCPC3	  &aarch64_feature_rcpc3
@@ -3026,6 +3029,12 @@  static const aarch64_feature_set aarch64_feature_sme_f16f16_f8f16 =
 #define SVE_SVE2_B16B16_INSNC(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS,CONSTRAINTS,TIED) \
   { NAME, OPCODE, MASK, CLASS, OP, SVE_SVE2_B16B16, OPS, QUALS, \
     FLAGS | F_STRICT, CONSTRAINTS, TIED, NULL }
+#define SVE_SME2_B16B16_INSN(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS,TIED) \
+  { NAME, OPCODE, MASK, CLASS, OP, SVE_SME2_B16B16, OPS, QUALS, \
+    FLAGS | F_STRICT, 0, TIED, NULL }
+#define SVE_SME2_B16B16_INSNC(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS,CONSTRAINTS,TIED) \
+  { NAME, OPCODE, MASK, CLASS, OP, SVE_SME2_B16B16, OPS, QUALS, \
+    FLAGS | F_STRICT, CONSTRAINTS, TIED, NULL }
 #define SVE2p1_INSN(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS,TIED) \
   { NAME, OPCODE, MASK, CLASS, OP, SVE2p1, OPS, QUALS, \
     FLAGS | F_STRICT, 0, TIED, NULL }
@@ -6643,6 +6652,26 @@  const struct aarch64_opcode aarch64_opcode_table[] =
   SVE_SVE2_B16B16_INSN("bfsub", 0x65000400, 0xffe0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0, 0),
   SVE_SVE2_B16B16_INSN("bfmul", 0x64202800, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_22_INDEX), OP_SVE_VVV_H, 0, 0),
 
+/* SME Z-targeting multi-vector non-widening BFloat16 instructions.  */
+  SVE_SME2_B16B16_INSN("bfmax", 0xc120a100, 0xfff0ffe1, sme_misc, 0, OP3 (SME_Zdnx2, SME_Zdnx2, SME_Zm), OP_SVE_HHH, F_OD(2), 1),
+  SVE_SME2_B16B16_INSN("bfmax", 0xc120a900, 0xfff0ffe3, sme_misc, 0, OP3 (SME_Zdnx4, SME_Zdnx4, SME_Zm), OP_SVE_HHH, F_OD(4), 1),
+  SVE_SME2_B16B16_INSN("bfmax", 0xc120b100, 0xffe1ffe1, sme_misc, 0, OP3 (SME_Zdnx2, SME_Zdnx2, SME_Zmx2), OP_SVE_HHH, F_OD(2), 1),
+  SVE_SME2_B16B16_INSN("bfmax", 0xc120b900, 0xffe3ffe3, sme_misc, 0, OP3 (SME_Zdnx4, SME_Zdnx4, SME_Zmx4), OP_SVE_HHH, F_OD(4), 1),
+  SVE_SME2_B16B16_INSN("bfmaxnm", 0xc120a120, 0xfff0ffe1, sme_misc, 0, OP3 (SME_Zdnx2, SME_Zdnx2, SME_Zm), OP_SVE_HHH, F_OD(2), 1),
+  SVE_SME2_B16B16_INSN("bfmaxnm", 0xc120a920, 0xfff0ffe3, sme_misc, 0, OP3 (SME_Zdnx4, SME_Zdnx4, SME_Zm), OP_SVE_HHH, F_OD(4), 1),
+  SVE_SME2_B16B16_INSN("bfmaxnm", 0xc120b120, 0xffe1ffe1, sme_misc, 0, OP3 (SME_Zdnx2, SME_Zdnx2, SME_Zmx2), OP_SVE_HHH, F_OD(2), 1),
+  SVE_SME2_B16B16_INSN("bfmaxnm", 0xc120b920, 0xffe3ffe3, sme_misc, 0, OP3 (SME_Zdnx4, SME_Zdnx4, SME_Zmx4), OP_SVE_HHH, F_OD(4), 1),
+  SVE_SME2_B16B16_INSN("bfmin", 0xc120a101, 0xfff0ffe1, sme_misc, 0, OP3 (SME_Zdnx2, SME_Zdnx2, SME_Zm), OP_SVE_HHH, F_OD(2), 1),
+  SVE_SME2_B16B16_INSN("bfmin", 0xc120a901, 0xfff0ffe3, sme_misc, 0, OP3 (SME_Zdnx4, SME_Zdnx4, SME_Zm), OP_SVE_HHH, F_OD(4), 1),
+  SVE_SME2_B16B16_INSN("bfmin", 0xc120b101, 0xffe1ffe1, sme_misc, 0, OP3 (SME_Zdnx2, SME_Zdnx2, SME_Zmx2), OP_SVE_HHH, F_OD(2), 1),
+  SVE_SME2_B16B16_INSN("bfmin", 0xc120b901, 0xffe3ffe3, sme_misc, 0, OP3 (SME_Zdnx4, SME_Zdnx4, SME_Zmx4), OP_SVE_HHH, F_OD(4), 1),
+  SVE_SME2_B16B16_INSN("bfminnm", 0xc120a121, 0xfff0ffe1, sme_misc, 0, OP3 (SME_Zdnx2, SME_Zdnx2, SME_Zm), OP_SVE_HHH, F_OD(2), 1),
+  SVE_SME2_B16B16_INSN("bfminnm", 0xc120a921, 0xfff0ffe3, sme_misc, 0, OP3 (SME_Zdnx4, SME_Zdnx4, SME_Zm), OP_SVE_HHH, F_OD(4), 1),
+  SVE_SME2_B16B16_INSN("bfminnm", 0xc120b121, 0xffe1ffe1, sme_misc, 0, OP3 (SME_Zdnx2, SME_Zdnx2, SME_Zmx2), OP_SVE_HHH, F_OD(2), 1),
+  SVE_SME2_B16B16_INSN("bfminnm", 0xc120b921, 0xffe3ffe3, sme_misc, 0, OP3 (SME_Zdnx4, SME_Zdnx4, SME_Zmx4), OP_SVE_HHH, F_OD(4), 1),
+  SVE_SME2_B16B16_INSN("bfclamp", 0xc120c000, 0xffe0fc01, sme_misc, 0, OP3 (SME_Zdnx2, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, F_OD(2), 0),
+  SVE_SME2_B16B16_INSN("bfclamp", 0xc120c800, 0xffe0fc03, sme_misc, 0, OP3 (SME_Zdnx4, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, F_OD(4), 0),
+
 /* SME2.1 movaz instructions.  */
   SME2p1_INSN ("movaz", 0xc0060600, 0xffff1f83, sme2_movaz, 0, OP2 (SME_Zdnx4, SME_ZA_array_vrsb_2), OP_SVE_BB, 0, 0),
   SME2p1_INSN ("movaz", 0xc0460600, 0xffff1f83, sme2_movaz, 0, OP2 (SME_Zdnx4, SME_ZA_array_vrsh_2), OP_SVE_HH, 0, 0),