[1/6] Arm64: correct B16B16 indexed bf{mla,mls,mul}

Message ID 2b3f3784-481d-4b8c-861f-7c9ac062683b@suse.com
State Committed
Headers
Series Arm64: (mostly) SVE adjustments |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_binutils_check--master-arm success Testing passed

Commit Message

Jan Beulich Feb. 23, 2024, 11:28 a.m. UTC
  Their index is in bits 19, 20, and 22. Bit 11 in particular is already
set in the base opcode. Note also how disassembler output didn't match
assembler input in the respective testcase.
  

Comments

Richard Earnshaw (lists) March 20, 2024, 3:54 p.m. UTC | #1
On 23/02/2024 11:28, Jan Beulich wrote:
> Their index is in bits 19, 20, and 22. Bit 11 in particular is already
> set in the base opcode. Note also how disassembler output didn't match
> assembler input in the respective testcase.

This is OK.

I realize you didn't write the tests, but something I generally recommend is that, whenever, possible, each test entry should test one field in the instruction, with the other fields being set to fill with zero.

We then get, for these tests, things like:

bfmla	z0.h, z0.h, z0.h[0]   // base opcode
bfmla   z31.h, z0.h, z0.h[0]  // Zd register
bfmla   z0.h, z31.h, z0.h[0]  // Zn register
bfmla   z0.h, z0.h, z7.h[0]   // Zm register
bfmla   z0.h, z0.h, z0.h[7]   // index

and then, finally, one instruction that tests a random combination of registers, just to check that we combine things properly:

bfmla   z23.h, z9.h, z5.h[3]

R.

> 
> --- a/gas/testsuite/gas/aarch64/bfloat16-1.d
> +++ b/gas/testsuite/gas/aarch64/bfloat16-1.d
> @@ -56,24 +56,24 @@
>  .*:	65221084 	bfmla	z4.h, p4\/m, z4.h, z2.h
>  .*:	65211908 	bfmla	z8.h, p6\/m, z8.h, z1.h
>  .*:	65201e10 	bfmla	z16.h, p7\/m, z16.h, z0.h
> -.*:	643e0a00 	bfmla	z0.h, z16.h, z6.h\[7\]
> -.*:	643d0901 	bfmla	z1.h, z8.h, z5.h\[7\]
> -.*:	643409c2 	bfmla	z2.h, z14.h, z4.h\[5\]
> -.*:	642a0aa4 	bfmla	z4.h, z21.h, z2.h\[3\]
> -.*:	64210988 	bfmla	z8.h, z12.h, z1.h\[1\]
> -.*:	64200950 	bfmla	z16.h, z10.h, z0.h\[1\]
> +.*:	647e0a00 	bfmla	z0.h, z16.h, z6.h\[7\]
> +.*:	64750901 	bfmla	z1.h, z8.h, z5.h\[6\]
> +.*:	646409c2 	bfmla	z2.h, z14.h, z4.h\[4\]
> +.*:	64320aa4 	bfmla	z4.h, z21.h, z2.h\[2\]
> +.*:	64290988 	bfmla	z8.h, z12.h, z1.h\[1\]
> +.*:	64200950 	bfmla	z16.h, z10.h, z0.h\[0\]
>  .*:	65302000 	bfmls	z0.h, p0\/m, z0.h, z16.h
>  .*:	65282421 	bfmls	z1.h, p1\/m, z1.h, z8.h
>  .*:	65242842 	bfmls	z2.h, p2\/m, z2.h, z4.h
>  .*:	65223084 	bfmls	z4.h, p4\/m, z4.h, z2.h
>  .*:	65213908 	bfmls	z8.h, p6\/m, z8.h, z1.h
>  .*:	65203e10 	bfmls	z16.h, p7\/m, z16.h, z0.h
> -.*:	643e0e00 	bfmls	z0.h, z16.h, z6.h\[7\]
> -.*:	643d0d01 	bfmls	z1.h, z8.h, z5.h\[7\]
> -.*:	64340dc2 	bfmls	z2.h, z14.h, z4.h\[5\]
> -.*:	642a0ea4 	bfmls	z4.h, z21.h, z2.h\[3\]
> -.*:	64210d88 	bfmls	z8.h, z12.h, z1.h\[1\]
> -.*:	64200d50 	bfmls	z16.h, z10.h, z0.h\[1\]
> +.*:	647e0e00 	bfmls	z0.h, z16.h, z6.h\[7\]
> +.*:	64750d01 	bfmls	z1.h, z8.h, z5.h\[6\]
> +.*:	64640dc2 	bfmls	z2.h, z14.h, z4.h\[4\]
> +.*:	64320ea4 	bfmls	z4.h, z21.h, z2.h\[2\]
> +.*:	64290d88 	bfmls	z8.h, z12.h, z1.h\[1\]
> +.*:	64200d50 	bfmls	z16.h, z10.h, z0.h\[0\]
>  .*:	65028200 	bfmul	z0.h, p0\/m, z0.h, z16.h
>  .*:	65028501 	bfmul	z1.h, p1\/m, z1.h, z8.h
>  .*:	65028882 	bfmul	z2.h, p2\/m, z2.h, z4.h
> @@ -86,12 +86,12 @@
>  .*:	65020a04 	bfmul	z4.h, z16.h, z2.h
>  .*:	65010a88 	bfmul	z8.h, z20.h, z1.h
>  .*:	65000b10 	bfmul	z16.h, z24.h, z0.h
> -.*:	643e2a00 	bfmul	z0.h, z16.h, z6.h\[7\]
> -.*:	643d2901 	bfmul	z1.h, z8.h, z5.h\[7\]
> -.*:	643429c2 	bfmul	z2.h, z14.h, z4.h\[5\]
> -.*:	642a2aa4 	bfmul	z4.h, z21.h, z2.h\[3\]
> -.*:	64212988 	bfmul	z8.h, z12.h, z1.h\[1\]
> -.*:	64202950 	bfmul	z16.h, z10.h, z0.h\[1\]
> +.*:	647e2a00 	bfmul	z0.h, z16.h, z6.h\[7\]
> +.*:	64752901 	bfmul	z1.h, z8.h, z5.h\[6\]
> +.*:	646429c2 	bfmul	z2.h, z14.h, z4.h\[4\]
> +.*:	64322aa4 	bfmul	z4.h, z21.h, z2.h\[2\]
> +.*:	64292988 	bfmul	z8.h, z12.h, z1.h\[1\]
> +.*:	64202950 	bfmul	z16.h, z10.h, z0.h\[0\]
>  .*:	65018200 	bfsub	z0.h, p0\/m, z0.h, z16.h
>  .*:	65018501 	bfsub	z1.h, p1\/m, z1.h, z8.h
>  .*:	65018882 	bfsub	z2.h, p2\/m, z2.h, z4.h
> --- a/opcodes/aarch64-tbl.h
> +++ b/opcodes/aarch64-tbl.h
> @@ -6344,9 +6344,9 @@ const struct aarch64_opcode aarch64_opco
>    B16B16_INSN("bfmul", 0x65000800, 0xffe0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0, 0),
>    B16B16_INSNC("bfsub", 0x65018000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
>    B16B16_INSN("bfsub", 0x65000400, 0xffe0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0, 0),
> -  B16B16_INSN("bfmla", 0x64200800, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_11_INDEX), OP_SVE_VVV_H, 0, 0),
> -  B16B16_INSN("bfmls", 0x64200c00, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_11_INDEX), OP_SVE_VVV_H, 0, 0),
> -  B16B16_INSN("bfmul", 0x64202800, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_11_INDEX), OP_SVE_VVV_H, 0, 0),
> +  B16B16_INSN("bfmla", 0x64200800, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_22_INDEX), OP_SVE_VVV_H, 0, 0),
> +  B16B16_INSN("bfmls", 0x64200c00, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_22_INDEX), OP_SVE_VVV_H, 0, 0),
> +  B16B16_INSN("bfmul", 0x64202800, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_22_INDEX), OP_SVE_VVV_H, 0, 0),
>  
>  /* SME2.1 movaz instructions.  */
>    SME2p1_INSN ("movaz", 0xc0060600, 0xffff1f83, sme2_movaz, 0, OP2 (SME_Zdnx4, SME_ZA_array_vrsb_2), OP_SVE_BB, 0, 0),
>
  
Jan Beulich March 20, 2024, 4:09 p.m. UTC | #2
On 20.03.2024 16:54, Richard Earnshaw (lists) wrote:
> On 23/02/2024 11:28, Jan Beulich wrote:
>> Their index is in bits 19, 20, and 22. Bit 11 in particular is already
>> set in the base opcode. Note also how disassembler output didn't match
>> assembler input in the respective testcase.
> 
> This is OK.
> 
> I realize you didn't write the tests, but something I generally recommend is that, whenever, possible, each test entry should test one field in the instruction, with the other fields being set to fill with zero.
> 
> We then get, for these tests, things like:
> 
> bfmla	z0.h, z0.h, z0.h[0]   // base opcode
> bfmla   z31.h, z0.h, z0.h[0]  // Zd register
> bfmla   z0.h, z31.h, z0.h[0]  // Zn register
> bfmla   z0.h, z0.h, z7.h[0]   // Zm register
> bfmla   z0.h, z0.h, z0.h[7]   // index

Indeed that's how I'm writing tests for my own disassembler library. Whereas
here, as you say, I merely had to alter what was there already.

Jan
  

Patch

--- a/gas/testsuite/gas/aarch64/bfloat16-1.d
+++ b/gas/testsuite/gas/aarch64/bfloat16-1.d
@@ -56,24 +56,24 @@ 
 .*:	65221084 	bfmla	z4.h, p4\/m, z4.h, z2.h
 .*:	65211908 	bfmla	z8.h, p6\/m, z8.h, z1.h
 .*:	65201e10 	bfmla	z16.h, p7\/m, z16.h, z0.h
-.*:	643e0a00 	bfmla	z0.h, z16.h, z6.h\[7\]
-.*:	643d0901 	bfmla	z1.h, z8.h, z5.h\[7\]
-.*:	643409c2 	bfmla	z2.h, z14.h, z4.h\[5\]
-.*:	642a0aa4 	bfmla	z4.h, z21.h, z2.h\[3\]
-.*:	64210988 	bfmla	z8.h, z12.h, z1.h\[1\]
-.*:	64200950 	bfmla	z16.h, z10.h, z0.h\[1\]
+.*:	647e0a00 	bfmla	z0.h, z16.h, z6.h\[7\]
+.*:	64750901 	bfmla	z1.h, z8.h, z5.h\[6\]
+.*:	646409c2 	bfmla	z2.h, z14.h, z4.h\[4\]
+.*:	64320aa4 	bfmla	z4.h, z21.h, z2.h\[2\]
+.*:	64290988 	bfmla	z8.h, z12.h, z1.h\[1\]
+.*:	64200950 	bfmla	z16.h, z10.h, z0.h\[0\]
 .*:	65302000 	bfmls	z0.h, p0\/m, z0.h, z16.h
 .*:	65282421 	bfmls	z1.h, p1\/m, z1.h, z8.h
 .*:	65242842 	bfmls	z2.h, p2\/m, z2.h, z4.h
 .*:	65223084 	bfmls	z4.h, p4\/m, z4.h, z2.h
 .*:	65213908 	bfmls	z8.h, p6\/m, z8.h, z1.h
 .*:	65203e10 	bfmls	z16.h, p7\/m, z16.h, z0.h
-.*:	643e0e00 	bfmls	z0.h, z16.h, z6.h\[7\]
-.*:	643d0d01 	bfmls	z1.h, z8.h, z5.h\[7\]
-.*:	64340dc2 	bfmls	z2.h, z14.h, z4.h\[5\]
-.*:	642a0ea4 	bfmls	z4.h, z21.h, z2.h\[3\]
-.*:	64210d88 	bfmls	z8.h, z12.h, z1.h\[1\]
-.*:	64200d50 	bfmls	z16.h, z10.h, z0.h\[1\]
+.*:	647e0e00 	bfmls	z0.h, z16.h, z6.h\[7\]
+.*:	64750d01 	bfmls	z1.h, z8.h, z5.h\[6\]
+.*:	64640dc2 	bfmls	z2.h, z14.h, z4.h\[4\]
+.*:	64320ea4 	bfmls	z4.h, z21.h, z2.h\[2\]
+.*:	64290d88 	bfmls	z8.h, z12.h, z1.h\[1\]
+.*:	64200d50 	bfmls	z16.h, z10.h, z0.h\[0\]
 .*:	65028200 	bfmul	z0.h, p0\/m, z0.h, z16.h
 .*:	65028501 	bfmul	z1.h, p1\/m, z1.h, z8.h
 .*:	65028882 	bfmul	z2.h, p2\/m, z2.h, z4.h
@@ -86,12 +86,12 @@ 
 .*:	65020a04 	bfmul	z4.h, z16.h, z2.h
 .*:	65010a88 	bfmul	z8.h, z20.h, z1.h
 .*:	65000b10 	bfmul	z16.h, z24.h, z0.h
-.*:	643e2a00 	bfmul	z0.h, z16.h, z6.h\[7\]
-.*:	643d2901 	bfmul	z1.h, z8.h, z5.h\[7\]
-.*:	643429c2 	bfmul	z2.h, z14.h, z4.h\[5\]
-.*:	642a2aa4 	bfmul	z4.h, z21.h, z2.h\[3\]
-.*:	64212988 	bfmul	z8.h, z12.h, z1.h\[1\]
-.*:	64202950 	bfmul	z16.h, z10.h, z0.h\[1\]
+.*:	647e2a00 	bfmul	z0.h, z16.h, z6.h\[7\]
+.*:	64752901 	bfmul	z1.h, z8.h, z5.h\[6\]
+.*:	646429c2 	bfmul	z2.h, z14.h, z4.h\[4\]
+.*:	64322aa4 	bfmul	z4.h, z21.h, z2.h\[2\]
+.*:	64292988 	bfmul	z8.h, z12.h, z1.h\[1\]
+.*:	64202950 	bfmul	z16.h, z10.h, z0.h\[0\]
 .*:	65018200 	bfsub	z0.h, p0\/m, z0.h, z16.h
 .*:	65018501 	bfsub	z1.h, p1\/m, z1.h, z8.h
 .*:	65018882 	bfsub	z2.h, p2\/m, z2.h, z4.h
--- a/opcodes/aarch64-tbl.h
+++ b/opcodes/aarch64-tbl.h
@@ -6344,9 +6344,9 @@  const struct aarch64_opcode aarch64_opco
   B16B16_INSN("bfmul", 0x65000800, 0xffe0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0, 0),
   B16B16_INSNC("bfsub", 0x65018000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
   B16B16_INSN("bfsub", 0x65000400, 0xffe0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0, 0),
-  B16B16_INSN("bfmla", 0x64200800, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_11_INDEX), OP_SVE_VVV_H, 0, 0),
-  B16B16_INSN("bfmls", 0x64200c00, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_11_INDEX), OP_SVE_VVV_H, 0, 0),
-  B16B16_INSN("bfmul", 0x64202800, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_11_INDEX), OP_SVE_VVV_H, 0, 0),
+  B16B16_INSN("bfmla", 0x64200800, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_22_INDEX), OP_SVE_VVV_H, 0, 0),
+  B16B16_INSN("bfmls", 0x64200c00, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_22_INDEX), OP_SVE_VVV_H, 0, 0),
+  B16B16_INSN("bfmul", 0x64202800, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_22_INDEX), OP_SVE_VVV_H, 0, 0),
 
 /* SME2.1 movaz instructions.  */
   SME2p1_INSN ("movaz", 0xc0060600, 0xffff1f83, sme2_movaz, 0, OP2 (SME_Zdnx4, SME_ZA_array_vrsb_2), OP_SVE_BB, 0, 0),