[2/6] Arm64: check matching operands for predicated B16B16 insns

Message ID 99f3418f-c836-4ae4-a6d1-ce81168d1b9a@suse.com
State Committed
Headers
Series Arm64: (mostly) SVE adjustments |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_binutils_check--master-arm success Testing passed

Commit Message

Jan Beulich Feb. 23, 2024, 11:28 a.m. UTC
  Except for bfml{a,s} their 1st and 3rd operands need to match - pass
the TIED macro argument accordingly. While doing that also slightly
re-arrange table entries, such that all predicated insns are close
together.

At the same time change the existing test source to actually use non-
matching operands for the respective bfml{a,s} forms.
  

Comments

Richard Earnshaw (lists) March 20, 2024, 4:19 p.m. UTC | #1
On 23/02/2024 11:28, Jan Beulich wrote:
> Except for bfml{a,s} their 1st and 3rd operands need to match - pass
> the TIED macro argument accordingly. While doing that also slightly
> re-arrange table entries, such that all predicated insns are close
> together.
> 
> At the same time change the existing test source to actually use non-
> matching operands for the respective bfml{a,s} forms.

OK.

R.

> 
> --- a/gas/testsuite/gas/aarch64/bfloat16-1.d
> +++ b/gas/testsuite/gas/aarch64/bfloat16-1.d
> @@ -50,24 +50,24 @@
>  .*:	64222604 	bfclamp	z4.h, z16.h, z2.h
>  .*:	64212688 	bfclamp	z8.h, z20.h, z1.h
>  .*:	64202710 	bfclamp	z16.h, z24.h, z0.h
> -.*:	65300000 	bfmla	z0.h, p0\/m, z0.h, z16.h
> -.*:	65280421 	bfmla	z1.h, p1\/m, z1.h, z8.h
> -.*:	65240842 	bfmla	z2.h, p2\/m, z2.h, z4.h
> -.*:	65221084 	bfmla	z4.h, p4\/m, z4.h, z2.h
> -.*:	65211908 	bfmla	z8.h, p6\/m, z8.h, z1.h
> -.*:	65201e10 	bfmla	z16.h, p7\/m, z16.h, z0.h
> +.*:	65300080 	bfmla	z0.h, p0\/m, z4.h, z16.h
> +.*:	65280501 	bfmla	z1.h, p1\/m, z8.h, z8.h
> +.*:	65240982 	bfmla	z2.h, p2\/m, z12.h, z4.h
> +.*:	65221204 	bfmla	z4.h, p4\/m, z16.h, z2.h
> +.*:	65211a88 	bfmla	z8.h, p6\/m, z20.h, z1.h
> +.*:	65201f10 	bfmla	z16.h, p7\/m, z24.h, z0.h
>  .*:	647e0a00 	bfmla	z0.h, z16.h, z6.h\[7\]
>  .*:	64750901 	bfmla	z1.h, z8.h, z5.h\[6\]
>  .*:	646409c2 	bfmla	z2.h, z14.h, z4.h\[4\]
>  .*:	64320aa4 	bfmla	z4.h, z21.h, z2.h\[2\]
>  .*:	64290988 	bfmla	z8.h, z12.h, z1.h\[1\]
>  .*:	64200950 	bfmla	z16.h, z10.h, z0.h\[0\]
> -.*:	65302000 	bfmls	z0.h, p0\/m, z0.h, z16.h
> -.*:	65282421 	bfmls	z1.h, p1\/m, z1.h, z8.h
> -.*:	65242842 	bfmls	z2.h, p2\/m, z2.h, z4.h
> -.*:	65223084 	bfmls	z4.h, p4\/m, z4.h, z2.h
> -.*:	65213908 	bfmls	z8.h, p6\/m, z8.h, z1.h
> -.*:	65203e10 	bfmls	z16.h, p7\/m, z16.h, z0.h
> +.*:	65302080 	bfmls	z0.h, p0\/m, z4.h, z16.h
> +.*:	65282501 	bfmls	z1.h, p1\/m, z8.h, z8.h
> +.*:	65242982 	bfmls	z2.h, p2\/m, z12.h, z4.h
> +.*:	65223204 	bfmls	z4.h, p4\/m, z16.h, z2.h
> +.*:	65213a88 	bfmls	z8.h, p6\/m, z20.h, z1.h
> +.*:	65203f10 	bfmls	z16.h, p7\/m, z24.h, z0.h
>  .*:	647e0e00 	bfmls	z0.h, z16.h, z6.h\[7\]
>  .*:	64750d01 	bfmls	z1.h, z8.h, z5.h\[6\]
>  .*:	64640dc2 	bfmls	z2.h, z14.h, z4.h\[4\]
> --- a/gas/testsuite/gas/aarch64/bfloat16-1.s
> +++ b/gas/testsuite/gas/aarch64/bfloat16-1.s
> @@ -46,12 +46,13 @@ bfclamp z2.h, z12.h, z4.h
>  bfclamp z4.h, z16.h, z2.h
>  bfclamp z8.h, z20.h, z1.h
>  bfclamp z16.h, z24.h, z0.h
> -bfmla z0.h, p0/m, z0.h, z16.h
> -bfmla z1.h, p1/m, z1.h, z8.h
> -bfmla z2.h, p2/m, z2.h, z4.h
> -bfmla z4.h, p4/m, z4.h, z2.h
> -bfmla z8.h, p6/m, z8.h, z1.h
> -bfmla z16.h, p7/m, z16.h, z0.h
> +
> +bfmla z0.h, p0/m, z4.h, z16.h
> +bfmla z1.h, p1/m, z8.h, z8.h
> +bfmla z2.h, p2/m, z12.h, z4.h
> +bfmla z4.h, p4/m, z16.h, z2.h
> +bfmla z8.h, p6/m, z20.h, z1.h
> +bfmla z16.h, p7/m, z24.h, z0.h
>  
>  bfmla z0.h, z16.h, z6.h[7]
>  bfmla z1.h, z8.h, z5.h[6]
> @@ -60,12 +61,12 @@ bfmla z4.h, z21.h, z2.h[2]
>  bfmla z8.h, z12.h, z1.h[1]
>  bfmla z16.h, z10.h, z0.h[0]
>  
> -bfmls z0.h, p0/m, z0.h, z16.h
> -bfmls z1.h, p1/m, z1.h, z8.h
> -bfmls z2.h, p2/m, z2.h, z4.h
> -bfmls z4.h, p4/m, z4.h, z2.h
> -bfmls z8.h, p6/m, z8.h, z1.h
> -bfmls z16.h, p7/m, z16.h, z0.h
> +bfmls z0.h, p0/m, z4.h, z16.h
> +bfmls z1.h, p1/m, z8.h, z8.h
> +bfmls z2.h, p2/m, z12.h, z4.h
> +bfmls z4.h, p4/m, z16.h, z2.h
> +bfmls z8.h, p6/m, z20.h, z1.h
> +bfmls z16.h, p7/m, z24.h, z0.h
>  
>  bfmls z0.h, z16.h, z6.h[7]
>  bfmls z1.h, z8.h, z5.h[6]
> --- a/gas/testsuite/gas/aarch64/bfloat16-bad.l
> +++ b/gas/testsuite/gas/aarch64/bfloat16-bad.l
> @@ -41,24 +41,24 @@
>  .*: Error: selected processor does not support `bfclamp z4.h,z16.h,z2.h'
>  .*: Error: selected processor does not support `bfclamp z8.h,z20.h,z1.h'
>  .*: Error: selected processor does not support `bfclamp z16.h,z24.h,z0.h'
> -.*: Error: selected processor does not support `bfmla z0.h,p0\/m,z0.h,z16.h'
> -.*: Error: selected processor does not support `bfmla z1.h,p1\/m,z1.h,z8.h'
> -.*: Error: selected processor does not support `bfmla z2.h,p2\/m,z2.h,z4.h'
> -.*: Error: selected processor does not support `bfmla z4.h,p4\/m,z4.h,z2.h'
> -.*: Error: selected processor does not support `bfmla z8.h,p6\/m,z8.h,z1.h'
> -.*: Error: selected processor does not support `bfmla z16.h,p7\/m,z16.h,z0.h'
> +.*: Error: selected processor does not support `bfmla .*
> +.*: Error: selected processor does not support `bfmla .*
> +.*: Error: selected processor does not support `bfmla .*
> +.*: Error: selected processor does not support `bfmla .*
> +.*: Error: selected processor does not support `bfmla .*
> +.*: Error: selected processor does not support `bfmla .*
>  .*: Error: selected processor does not support `bfmla z0.h,z16.h,z6.h\[7\]'
>  .*: Error: selected processor does not support `bfmla z1.h,z8.h,z5.h\[6\]'
>  .*: Error: selected processor does not support `bfmla z2.h,z14.h,z4.h\[4\]'
>  .*: Error: selected processor does not support `bfmla z4.h,z21.h,z2.h\[2\]'
>  .*: Error: selected processor does not support `bfmla z8.h,z12.h,z1.h\[1\]'
>  .*: Error: selected processor does not support `bfmla z16.h,z10.h,z0.h\[0\]'
> -.*: Error: selected processor does not support `bfmls z0.h,p0\/m,z0.h,z16.h'
> -.*: Error: selected processor does not support `bfmls z1.h,p1\/m,z1.h,z8.h'
> -.*: Error: selected processor does not support `bfmls z2.h,p2\/m,z2.h,z4.h'
> -.*: Error: selected processor does not support `bfmls z4.h,p4\/m,z4.h,z2.h'
> -.*: Error: selected processor does not support `bfmls z8.h,p6\/m,z8.h,z1.h'
> -.*: Error: selected processor does not support `bfmls z16.h,p7\/m,z16.h,z0.h'
> +.*: Error: selected processor does not support `bfmls .*
> +.*: Error: selected processor does not support `bfmls .*
> +.*: Error: selected processor does not support `bfmls .*
> +.*: Error: selected processor does not support `bfmls .*
> +.*: Error: selected processor does not support `bfmls .*
> +.*: Error: selected processor does not support `bfmls .*
>  .*: Error: selected processor does not support `bfmls z0.h,z16.h,z6.h\[7\]'
>  .*: Error: selected processor does not support `bfmls z1.h,z8.h,z5.h\[6\]'
>  .*: Error: selected processor does not support `bfmls z2.h,z14.h,z4.h\[4\]'
> --- /dev/null
> +++ b/gas/testsuite/gas/aarch64/bfloat16-invalid.d
> @@ -0,0 +1,4 @@
> +#name: Test Bfloat16 instructions with wrong operand combinations
> +#as: -march=armv9.4-a
> +#source: bfloat16-invalid.s
> +#error_output: bfloat16-invalid.l
> --- /dev/null
> +++ b/gas/testsuite/gas/aarch64/bfloat16-invalid.l
> @@ -0,0 +1,8 @@
> +.*: Assembler messages:
> +[^ :]+:[0-9]+: Error: operand 3 must be the same register as operand 1 -- `bfadd .*
> +[^ :]+:[0-9]+: Error: operand 3 must be the same register as operand 1 -- `bfmax .*
> +[^ :]+:[0-9]+: Error: operand 3 must be the same register as operand 1 -- `bfmaxnm .*
> +[^ :]+:[0-9]+: Error: operand 3 must be the same register as operand 1 -- `bfmin .*
> +[^ :]+:[0-9]+: Error: operand 3 must be the same register as operand 1 -- `bfminnm .*
> +[^ :]+:[0-9]+: Error: operand 3 must be the same register as operand 1 -- `bfmul .*
> +[^ :]+:[0-9]+: Error: operand 3 must be the same register as operand 1 -- `bfsub .*
> --- /dev/null
> +++ b/gas/testsuite/gas/aarch64/bfloat16-invalid.s
> @@ -0,0 +1,13 @@
> +bfadd z0.h, p0/m, z1.h, z0.h
> +
> +bfmax z0.h, p0/m, z1.h, z0.h
> +
> +bfmaxnm z0.h, p0/m, z1.h, z0.h
> +
> +bfmin z0.h, p0/m, z1.h, z0.h
> +
> +bfminnm z0.h, p0/m, z1.h, z0.h
> +
> +bfmul z0.h, p0/m, z1.h, z0.h
> +
> +bfsub z0.h, p0/m, z1.h, z0.h
> --- a/opcodes/aarch64-dis-2.c
> +++ b/opcodes/aarch64-dis-2.c
> @@ -32211,14 +32211,14 @@ aarch64_find_next_opcode (const aarch64_
>      case 1705: return NULL;		/* ldff1h --> NULL.  */
>      case 1659: value = 3313; break;	/* ld2h --> ld2q.  */
>      case 3313: return NULL;		/* ld2q --> NULL.  */
> -    case 2464: value = 3279; break;	/* fclamp --> bfclamp.  */
> -    case 3279: return NULL;		/* bfclamp --> NULL.  */
> +    case 2464: value = 3281; break;	/* fclamp --> bfclamp.  */
> +    case 3281: return NULL;		/* bfclamp --> NULL.  */
>      case 1778: value = 1779; break;	/* ldr --> ldr.  */
>      case 1779: return NULL;		/* ldr --> NULL.  */
> -    case 1434: value = 3278; break;	/* fadd --> bfadd.  */
> -    case 3278: return NULL;		/* bfadd --> NULL.  */
> -    case 1501: value = 3281; break;	/* fmul --> bfmul.  */
> -    case 3281: return NULL;		/* bfmul --> NULL.  */
> +    case 1434: value = 3280; break;	/* fadd --> bfadd.  */
> +    case 3280: return NULL;		/* bfadd --> NULL.  */
> +    case 1501: value = 3282; break;	/* fmul --> bfmul.  */
> +    case 3282: return NULL;		/* bfmul --> NULL.  */
>      case 1527: value = 3283; break;	/* fsub --> bfsub.  */
>      case 3283: return NULL;		/* bfsub --> NULL.  */
>      case 1492: value = 3276; break;	/* fmla --> bfmla.  */
> @@ -32251,12 +32251,12 @@ aarch64_find_next_opcode (const aarch64_
>      case 3271: return NULL;		/* bfadd --> NULL.  */
>      case 1482: value = 3273; break;	/* fmaxnm --> bfmaxnm.  */
>      case 3273: return NULL;		/* bfmaxnm --> NULL.  */
> -    case 1502: value = 3280; break;	/* fmul --> bfmul.  */
> -    case 3280: return NULL;		/* bfmul --> NULL.  */
> +    case 1502: value = 3278; break;	/* fmul --> bfmul.  */
> +    case 3278: return NULL;		/* bfmul --> NULL.  */
>      case 1480: value = 3272; break;	/* fmax --> bfmax.  */
>      case 3272: return NULL;		/* bfmax --> NULL.  */
> -    case 1528: value = 3282; break;	/* fsub --> bfsub.  */
> -    case 3282: return NULL;		/* bfsub --> NULL.  */
> +    case 1528: value = 3279; break;	/* fsub --> bfsub.  */
> +    case 3279: return NULL;		/* bfsub --> NULL.  */
>      case 1488: value = 3275; break;	/* fminnm --> bfminnm.  */
>      case 3275: return NULL;		/* bfminnm --> NULL.  */
>      case 1486: value = 3274; break;	/* fmin --> bfmin.  */
> --- a/opcodes/aarch64-tbl.h
> +++ b/opcodes/aarch64-tbl.h
> @@ -6331,18 +6331,18 @@ const struct aarch64_opcode aarch64_opco
>    D128_THE_INSN("rcwsswppl", 0x5960a000, 0xffe0fc00, OP3 (Rt, Rs, ADDR_SIMPLE), QL_X2NIL, 0),
>  
>  /* BFloat16 SVE Instructions.  */
> -  B16B16_INSNC("bfadd", 0x65008000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
> -  B16B16_INSNC("bfmax", 0x65068000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
> -  B16B16_INSNC("bfmaxnm", 0x65048000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
> -  B16B16_INSNC("bfmin", 0x65078000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
> -  B16B16_INSNC("bfminnm", 0x65058000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
> +  B16B16_INSNC("bfadd", 0x65008000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
> +  B16B16_INSNC("bfmax", 0x65068000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
> +  B16B16_INSNC("bfmaxnm", 0x65048000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
> +  B16B16_INSNC("bfmin", 0x65078000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
> +  B16B16_INSNC("bfminnm", 0x65058000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
>    B16B16_INSNC("bfmla", 0x65200000, 0xffe0e000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zn, SVE_Zm_16), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
>    B16B16_INSNC("bfmls", 0x65202000, 0xffe0e000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zn, SVE_Zm_16), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
> +  B16B16_INSNC("bfmul", 0x65028000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
> +  B16B16_INSNC("bfsub", 0x65018000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
>    B16B16_INSN("bfadd", 0x65000000, 0xffe0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0, 0),
>    B16B16_INSN("bfclamp", 0x64202400, 0xffe0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0, 0),
> -  B16B16_INSNC("bfmul", 0x65028000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
>    B16B16_INSN("bfmul", 0x65000800, 0xffe0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0, 0),
> -  B16B16_INSNC("bfsub", 0x65018000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
>    B16B16_INSN("bfsub", 0x65000400, 0xffe0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0, 0),
>    B16B16_INSN("bfmla", 0x64200800, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_22_INDEX), OP_SVE_VVV_H, 0, 0),
>    B16B16_INSN("bfmls", 0x64200c00, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_22_INDEX), OP_SVE_VVV_H, 0, 0),
>
  

Patch

--- a/gas/testsuite/gas/aarch64/bfloat16-1.d
+++ b/gas/testsuite/gas/aarch64/bfloat16-1.d
@@ -50,24 +50,24 @@ 
 .*:	64222604 	bfclamp	z4.h, z16.h, z2.h
 .*:	64212688 	bfclamp	z8.h, z20.h, z1.h
 .*:	64202710 	bfclamp	z16.h, z24.h, z0.h
-.*:	65300000 	bfmla	z0.h, p0\/m, z0.h, z16.h
-.*:	65280421 	bfmla	z1.h, p1\/m, z1.h, z8.h
-.*:	65240842 	bfmla	z2.h, p2\/m, z2.h, z4.h
-.*:	65221084 	bfmla	z4.h, p4\/m, z4.h, z2.h
-.*:	65211908 	bfmla	z8.h, p6\/m, z8.h, z1.h
-.*:	65201e10 	bfmla	z16.h, p7\/m, z16.h, z0.h
+.*:	65300080 	bfmla	z0.h, p0\/m, z4.h, z16.h
+.*:	65280501 	bfmla	z1.h, p1\/m, z8.h, z8.h
+.*:	65240982 	bfmla	z2.h, p2\/m, z12.h, z4.h
+.*:	65221204 	bfmla	z4.h, p4\/m, z16.h, z2.h
+.*:	65211a88 	bfmla	z8.h, p6\/m, z20.h, z1.h
+.*:	65201f10 	bfmla	z16.h, p7\/m, z24.h, z0.h
 .*:	647e0a00 	bfmla	z0.h, z16.h, z6.h\[7\]
 .*:	64750901 	bfmla	z1.h, z8.h, z5.h\[6\]
 .*:	646409c2 	bfmla	z2.h, z14.h, z4.h\[4\]
 .*:	64320aa4 	bfmla	z4.h, z21.h, z2.h\[2\]
 .*:	64290988 	bfmla	z8.h, z12.h, z1.h\[1\]
 .*:	64200950 	bfmla	z16.h, z10.h, z0.h\[0\]
-.*:	65302000 	bfmls	z0.h, p0\/m, z0.h, z16.h
-.*:	65282421 	bfmls	z1.h, p1\/m, z1.h, z8.h
-.*:	65242842 	bfmls	z2.h, p2\/m, z2.h, z4.h
-.*:	65223084 	bfmls	z4.h, p4\/m, z4.h, z2.h
-.*:	65213908 	bfmls	z8.h, p6\/m, z8.h, z1.h
-.*:	65203e10 	bfmls	z16.h, p7\/m, z16.h, z0.h
+.*:	65302080 	bfmls	z0.h, p0\/m, z4.h, z16.h
+.*:	65282501 	bfmls	z1.h, p1\/m, z8.h, z8.h
+.*:	65242982 	bfmls	z2.h, p2\/m, z12.h, z4.h
+.*:	65223204 	bfmls	z4.h, p4\/m, z16.h, z2.h
+.*:	65213a88 	bfmls	z8.h, p6\/m, z20.h, z1.h
+.*:	65203f10 	bfmls	z16.h, p7\/m, z24.h, z0.h
 .*:	647e0e00 	bfmls	z0.h, z16.h, z6.h\[7\]
 .*:	64750d01 	bfmls	z1.h, z8.h, z5.h\[6\]
 .*:	64640dc2 	bfmls	z2.h, z14.h, z4.h\[4\]
--- a/gas/testsuite/gas/aarch64/bfloat16-1.s
+++ b/gas/testsuite/gas/aarch64/bfloat16-1.s
@@ -46,12 +46,13 @@  bfclamp z2.h, z12.h, z4.h
 bfclamp z4.h, z16.h, z2.h
 bfclamp z8.h, z20.h, z1.h
 bfclamp z16.h, z24.h, z0.h
-bfmla z0.h, p0/m, z0.h, z16.h
-bfmla z1.h, p1/m, z1.h, z8.h
-bfmla z2.h, p2/m, z2.h, z4.h
-bfmla z4.h, p4/m, z4.h, z2.h
-bfmla z8.h, p6/m, z8.h, z1.h
-bfmla z16.h, p7/m, z16.h, z0.h
+
+bfmla z0.h, p0/m, z4.h, z16.h
+bfmla z1.h, p1/m, z8.h, z8.h
+bfmla z2.h, p2/m, z12.h, z4.h
+bfmla z4.h, p4/m, z16.h, z2.h
+bfmla z8.h, p6/m, z20.h, z1.h
+bfmla z16.h, p7/m, z24.h, z0.h
 
 bfmla z0.h, z16.h, z6.h[7]
 bfmla z1.h, z8.h, z5.h[6]
@@ -60,12 +61,12 @@  bfmla z4.h, z21.h, z2.h[2]
 bfmla z8.h, z12.h, z1.h[1]
 bfmla z16.h, z10.h, z0.h[0]
 
-bfmls z0.h, p0/m, z0.h, z16.h
-bfmls z1.h, p1/m, z1.h, z8.h
-bfmls z2.h, p2/m, z2.h, z4.h
-bfmls z4.h, p4/m, z4.h, z2.h
-bfmls z8.h, p6/m, z8.h, z1.h
-bfmls z16.h, p7/m, z16.h, z0.h
+bfmls z0.h, p0/m, z4.h, z16.h
+bfmls z1.h, p1/m, z8.h, z8.h
+bfmls z2.h, p2/m, z12.h, z4.h
+bfmls z4.h, p4/m, z16.h, z2.h
+bfmls z8.h, p6/m, z20.h, z1.h
+bfmls z16.h, p7/m, z24.h, z0.h
 
 bfmls z0.h, z16.h, z6.h[7]
 bfmls z1.h, z8.h, z5.h[6]
--- a/gas/testsuite/gas/aarch64/bfloat16-bad.l
+++ b/gas/testsuite/gas/aarch64/bfloat16-bad.l
@@ -41,24 +41,24 @@ 
 .*: Error: selected processor does not support `bfclamp z4.h,z16.h,z2.h'
 .*: Error: selected processor does not support `bfclamp z8.h,z20.h,z1.h'
 .*: Error: selected processor does not support `bfclamp z16.h,z24.h,z0.h'
-.*: Error: selected processor does not support `bfmla z0.h,p0\/m,z0.h,z16.h'
-.*: Error: selected processor does not support `bfmla z1.h,p1\/m,z1.h,z8.h'
-.*: Error: selected processor does not support `bfmla z2.h,p2\/m,z2.h,z4.h'
-.*: Error: selected processor does not support `bfmla z4.h,p4\/m,z4.h,z2.h'
-.*: Error: selected processor does not support `bfmla z8.h,p6\/m,z8.h,z1.h'
-.*: Error: selected processor does not support `bfmla z16.h,p7\/m,z16.h,z0.h'
+.*: Error: selected processor does not support `bfmla .*
+.*: Error: selected processor does not support `bfmla .*
+.*: Error: selected processor does not support `bfmla .*
+.*: Error: selected processor does not support `bfmla .*
+.*: Error: selected processor does not support `bfmla .*
+.*: Error: selected processor does not support `bfmla .*
 .*: Error: selected processor does not support `bfmla z0.h,z16.h,z6.h\[7\]'
 .*: Error: selected processor does not support `bfmla z1.h,z8.h,z5.h\[6\]'
 .*: Error: selected processor does not support `bfmla z2.h,z14.h,z4.h\[4\]'
 .*: Error: selected processor does not support `bfmla z4.h,z21.h,z2.h\[2\]'
 .*: Error: selected processor does not support `bfmla z8.h,z12.h,z1.h\[1\]'
 .*: Error: selected processor does not support `bfmla z16.h,z10.h,z0.h\[0\]'
-.*: Error: selected processor does not support `bfmls z0.h,p0\/m,z0.h,z16.h'
-.*: Error: selected processor does not support `bfmls z1.h,p1\/m,z1.h,z8.h'
-.*: Error: selected processor does not support `bfmls z2.h,p2\/m,z2.h,z4.h'
-.*: Error: selected processor does not support `bfmls z4.h,p4\/m,z4.h,z2.h'
-.*: Error: selected processor does not support `bfmls z8.h,p6\/m,z8.h,z1.h'
-.*: Error: selected processor does not support `bfmls z16.h,p7\/m,z16.h,z0.h'
+.*: Error: selected processor does not support `bfmls .*
+.*: Error: selected processor does not support `bfmls .*
+.*: Error: selected processor does not support `bfmls .*
+.*: Error: selected processor does not support `bfmls .*
+.*: Error: selected processor does not support `bfmls .*
+.*: Error: selected processor does not support `bfmls .*
 .*: Error: selected processor does not support `bfmls z0.h,z16.h,z6.h\[7\]'
 .*: Error: selected processor does not support `bfmls z1.h,z8.h,z5.h\[6\]'
 .*: Error: selected processor does not support `bfmls z2.h,z14.h,z4.h\[4\]'
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/bfloat16-invalid.d
@@ -0,0 +1,4 @@ 
+#name: Test Bfloat16 instructions with wrong operand combinations
+#as: -march=armv9.4-a
+#source: bfloat16-invalid.s
+#error_output: bfloat16-invalid.l
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/bfloat16-invalid.l
@@ -0,0 +1,8 @@ 
+.*: Assembler messages:
+[^ :]+:[0-9]+: Error: operand 3 must be the same register as operand 1 -- `bfadd .*
+[^ :]+:[0-9]+: Error: operand 3 must be the same register as operand 1 -- `bfmax .*
+[^ :]+:[0-9]+: Error: operand 3 must be the same register as operand 1 -- `bfmaxnm .*
+[^ :]+:[0-9]+: Error: operand 3 must be the same register as operand 1 -- `bfmin .*
+[^ :]+:[0-9]+: Error: operand 3 must be the same register as operand 1 -- `bfminnm .*
+[^ :]+:[0-9]+: Error: operand 3 must be the same register as operand 1 -- `bfmul .*
+[^ :]+:[0-9]+: Error: operand 3 must be the same register as operand 1 -- `bfsub .*
--- /dev/null
+++ b/gas/testsuite/gas/aarch64/bfloat16-invalid.s
@@ -0,0 +1,13 @@ 
+bfadd z0.h, p0/m, z1.h, z0.h
+
+bfmax z0.h, p0/m, z1.h, z0.h
+
+bfmaxnm z0.h, p0/m, z1.h, z0.h
+
+bfmin z0.h, p0/m, z1.h, z0.h
+
+bfminnm z0.h, p0/m, z1.h, z0.h
+
+bfmul z0.h, p0/m, z1.h, z0.h
+
+bfsub z0.h, p0/m, z1.h, z0.h
--- a/opcodes/aarch64-dis-2.c
+++ b/opcodes/aarch64-dis-2.c
@@ -32211,14 +32211,14 @@  aarch64_find_next_opcode (const aarch64_
     case 1705: return NULL;		/* ldff1h --> NULL.  */
     case 1659: value = 3313; break;	/* ld2h --> ld2q.  */
     case 3313: return NULL;		/* ld2q --> NULL.  */
-    case 2464: value = 3279; break;	/* fclamp --> bfclamp.  */
-    case 3279: return NULL;		/* bfclamp --> NULL.  */
+    case 2464: value = 3281; break;	/* fclamp --> bfclamp.  */
+    case 3281: return NULL;		/* bfclamp --> NULL.  */
     case 1778: value = 1779; break;	/* ldr --> ldr.  */
     case 1779: return NULL;		/* ldr --> NULL.  */
-    case 1434: value = 3278; break;	/* fadd --> bfadd.  */
-    case 3278: return NULL;		/* bfadd --> NULL.  */
-    case 1501: value = 3281; break;	/* fmul --> bfmul.  */
-    case 3281: return NULL;		/* bfmul --> NULL.  */
+    case 1434: value = 3280; break;	/* fadd --> bfadd.  */
+    case 3280: return NULL;		/* bfadd --> NULL.  */
+    case 1501: value = 3282; break;	/* fmul --> bfmul.  */
+    case 3282: return NULL;		/* bfmul --> NULL.  */
     case 1527: value = 3283; break;	/* fsub --> bfsub.  */
     case 3283: return NULL;		/* bfsub --> NULL.  */
     case 1492: value = 3276; break;	/* fmla --> bfmla.  */
@@ -32251,12 +32251,12 @@  aarch64_find_next_opcode (const aarch64_
     case 3271: return NULL;		/* bfadd --> NULL.  */
     case 1482: value = 3273; break;	/* fmaxnm --> bfmaxnm.  */
     case 3273: return NULL;		/* bfmaxnm --> NULL.  */
-    case 1502: value = 3280; break;	/* fmul --> bfmul.  */
-    case 3280: return NULL;		/* bfmul --> NULL.  */
+    case 1502: value = 3278; break;	/* fmul --> bfmul.  */
+    case 3278: return NULL;		/* bfmul --> NULL.  */
     case 1480: value = 3272; break;	/* fmax --> bfmax.  */
     case 3272: return NULL;		/* bfmax --> NULL.  */
-    case 1528: value = 3282; break;	/* fsub --> bfsub.  */
-    case 3282: return NULL;		/* bfsub --> NULL.  */
+    case 1528: value = 3279; break;	/* fsub --> bfsub.  */
+    case 3279: return NULL;		/* bfsub --> NULL.  */
     case 1488: value = 3275; break;	/* fminnm --> bfminnm.  */
     case 3275: return NULL;		/* bfminnm --> NULL.  */
     case 1486: value = 3274; break;	/* fmin --> bfmin.  */
--- a/opcodes/aarch64-tbl.h
+++ b/opcodes/aarch64-tbl.h
@@ -6331,18 +6331,18 @@  const struct aarch64_opcode aarch64_opco
   D128_THE_INSN("rcwsswppl", 0x5960a000, 0xffe0fc00, OP3 (Rt, Rs, ADDR_SIMPLE), QL_X2NIL, 0),
 
 /* BFloat16 SVE Instructions.  */
-  B16B16_INSNC("bfadd", 0x65008000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
-  B16B16_INSNC("bfmax", 0x65068000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
-  B16B16_INSNC("bfmaxnm", 0x65048000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
-  B16B16_INSNC("bfmin", 0x65078000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
-  B16B16_INSNC("bfminnm", 0x65058000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
+  B16B16_INSNC("bfadd", 0x65008000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
+  B16B16_INSNC("bfmax", 0x65068000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
+  B16B16_INSNC("bfmaxnm", 0x65048000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
+  B16B16_INSNC("bfmin", 0x65078000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
+  B16B16_INSNC("bfminnm", 0x65058000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
   B16B16_INSNC("bfmla", 0x65200000, 0xffe0e000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zn, SVE_Zm_16), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
   B16B16_INSNC("bfmls", 0x65202000, 0xffe0e000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zn, SVE_Zm_16), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
+  B16B16_INSNC("bfmul", 0x65028000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
+  B16B16_INSNC("bfsub", 0x65018000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
   B16B16_INSN("bfadd", 0x65000000, 0xffe0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0, 0),
   B16B16_INSN("bfclamp", 0x64202400, 0xffe0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0, 0),
-  B16B16_INSNC("bfmul", 0x65028000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
   B16B16_INSN("bfmul", 0x65000800, 0xffe0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0, 0),
-  B16B16_INSNC("bfsub", 0x65018000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
   B16B16_INSN("bfsub", 0x65000400, 0xffe0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0, 0),
   B16B16_INSN("bfmla", 0x64200800, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_22_INDEX), OP_SVE_VVV_H, 0, 0),
   B16B16_INSN("bfmls", 0x64200c00, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_22_INDEX), OP_SVE_VVV_H, 0, 0),