[1/6] Arm64: correct B16B16 indexed bf{mla,mls,mul}
Checks
Context |
Check |
Description |
linaro-tcwg-bot/tcwg_binutils_build--master-arm |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_binutils_check--master-arm |
success
|
Testing passed
|
Commit Message
Their index is in bits 19, 20, and 22. Bit 11 in particular is already
set in the base opcode. Note also how disassembler output didn't match
assembler input in the respective testcase.
Comments
On 23/02/2024 11:28, Jan Beulich wrote:
> Their index is in bits 19, 20, and 22. Bit 11 in particular is already
> set in the base opcode. Note also how disassembler output didn't match
> assembler input in the respective testcase.
This is OK.
I realize you didn't write the tests, but something I generally recommend is that, whenever, possible, each test entry should test one field in the instruction, with the other fields being set to fill with zero.
We then get, for these tests, things like:
bfmla z0.h, z0.h, z0.h[0] // base opcode
bfmla z31.h, z0.h, z0.h[0] // Zd register
bfmla z0.h, z31.h, z0.h[0] // Zn register
bfmla z0.h, z0.h, z7.h[0] // Zm register
bfmla z0.h, z0.h, z0.h[7] // index
and then, finally, one instruction that tests a random combination of registers, just to check that we combine things properly:
bfmla z23.h, z9.h, z5.h[3]
R.
>
> --- a/gas/testsuite/gas/aarch64/bfloat16-1.d
> +++ b/gas/testsuite/gas/aarch64/bfloat16-1.d
> @@ -56,24 +56,24 @@
> .*: 65221084 bfmla z4.h, p4\/m, z4.h, z2.h
> .*: 65211908 bfmla z8.h, p6\/m, z8.h, z1.h
> .*: 65201e10 bfmla z16.h, p7\/m, z16.h, z0.h
> -.*: 643e0a00 bfmla z0.h, z16.h, z6.h\[7\]
> -.*: 643d0901 bfmla z1.h, z8.h, z5.h\[7\]
> -.*: 643409c2 bfmla z2.h, z14.h, z4.h\[5\]
> -.*: 642a0aa4 bfmla z4.h, z21.h, z2.h\[3\]
> -.*: 64210988 bfmla z8.h, z12.h, z1.h\[1\]
> -.*: 64200950 bfmla z16.h, z10.h, z0.h\[1\]
> +.*: 647e0a00 bfmla z0.h, z16.h, z6.h\[7\]
> +.*: 64750901 bfmla z1.h, z8.h, z5.h\[6\]
> +.*: 646409c2 bfmla z2.h, z14.h, z4.h\[4\]
> +.*: 64320aa4 bfmla z4.h, z21.h, z2.h\[2\]
> +.*: 64290988 bfmla z8.h, z12.h, z1.h\[1\]
> +.*: 64200950 bfmla z16.h, z10.h, z0.h\[0\]
> .*: 65302000 bfmls z0.h, p0\/m, z0.h, z16.h
> .*: 65282421 bfmls z1.h, p1\/m, z1.h, z8.h
> .*: 65242842 bfmls z2.h, p2\/m, z2.h, z4.h
> .*: 65223084 bfmls z4.h, p4\/m, z4.h, z2.h
> .*: 65213908 bfmls z8.h, p6\/m, z8.h, z1.h
> .*: 65203e10 bfmls z16.h, p7\/m, z16.h, z0.h
> -.*: 643e0e00 bfmls z0.h, z16.h, z6.h\[7\]
> -.*: 643d0d01 bfmls z1.h, z8.h, z5.h\[7\]
> -.*: 64340dc2 bfmls z2.h, z14.h, z4.h\[5\]
> -.*: 642a0ea4 bfmls z4.h, z21.h, z2.h\[3\]
> -.*: 64210d88 bfmls z8.h, z12.h, z1.h\[1\]
> -.*: 64200d50 bfmls z16.h, z10.h, z0.h\[1\]
> +.*: 647e0e00 bfmls z0.h, z16.h, z6.h\[7\]
> +.*: 64750d01 bfmls z1.h, z8.h, z5.h\[6\]
> +.*: 64640dc2 bfmls z2.h, z14.h, z4.h\[4\]
> +.*: 64320ea4 bfmls z4.h, z21.h, z2.h\[2\]
> +.*: 64290d88 bfmls z8.h, z12.h, z1.h\[1\]
> +.*: 64200d50 bfmls z16.h, z10.h, z0.h\[0\]
> .*: 65028200 bfmul z0.h, p0\/m, z0.h, z16.h
> .*: 65028501 bfmul z1.h, p1\/m, z1.h, z8.h
> .*: 65028882 bfmul z2.h, p2\/m, z2.h, z4.h
> @@ -86,12 +86,12 @@
> .*: 65020a04 bfmul z4.h, z16.h, z2.h
> .*: 65010a88 bfmul z8.h, z20.h, z1.h
> .*: 65000b10 bfmul z16.h, z24.h, z0.h
> -.*: 643e2a00 bfmul z0.h, z16.h, z6.h\[7\]
> -.*: 643d2901 bfmul z1.h, z8.h, z5.h\[7\]
> -.*: 643429c2 bfmul z2.h, z14.h, z4.h\[5\]
> -.*: 642a2aa4 bfmul z4.h, z21.h, z2.h\[3\]
> -.*: 64212988 bfmul z8.h, z12.h, z1.h\[1\]
> -.*: 64202950 bfmul z16.h, z10.h, z0.h\[1\]
> +.*: 647e2a00 bfmul z0.h, z16.h, z6.h\[7\]
> +.*: 64752901 bfmul z1.h, z8.h, z5.h\[6\]
> +.*: 646429c2 bfmul z2.h, z14.h, z4.h\[4\]
> +.*: 64322aa4 bfmul z4.h, z21.h, z2.h\[2\]
> +.*: 64292988 bfmul z8.h, z12.h, z1.h\[1\]
> +.*: 64202950 bfmul z16.h, z10.h, z0.h\[0\]
> .*: 65018200 bfsub z0.h, p0\/m, z0.h, z16.h
> .*: 65018501 bfsub z1.h, p1\/m, z1.h, z8.h
> .*: 65018882 bfsub z2.h, p2\/m, z2.h, z4.h
> --- a/opcodes/aarch64-tbl.h
> +++ b/opcodes/aarch64-tbl.h
> @@ -6344,9 +6344,9 @@ const struct aarch64_opcode aarch64_opco
> B16B16_INSN("bfmul", 0x65000800, 0xffe0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0, 0),
> B16B16_INSNC("bfsub", 0x65018000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
> B16B16_INSN("bfsub", 0x65000400, 0xffe0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0, 0),
> - B16B16_INSN("bfmla", 0x64200800, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_11_INDEX), OP_SVE_VVV_H, 0, 0),
> - B16B16_INSN("bfmls", 0x64200c00, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_11_INDEX), OP_SVE_VVV_H, 0, 0),
> - B16B16_INSN("bfmul", 0x64202800, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_11_INDEX), OP_SVE_VVV_H, 0, 0),
> + B16B16_INSN("bfmla", 0x64200800, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_22_INDEX), OP_SVE_VVV_H, 0, 0),
> + B16B16_INSN("bfmls", 0x64200c00, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_22_INDEX), OP_SVE_VVV_H, 0, 0),
> + B16B16_INSN("bfmul", 0x64202800, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_22_INDEX), OP_SVE_VVV_H, 0, 0),
>
> /* SME2.1 movaz instructions. */
> SME2p1_INSN ("movaz", 0xc0060600, 0xffff1f83, sme2_movaz, 0, OP2 (SME_Zdnx4, SME_ZA_array_vrsb_2), OP_SVE_BB, 0, 0),
>
On 20.03.2024 16:54, Richard Earnshaw (lists) wrote:
> On 23/02/2024 11:28, Jan Beulich wrote:
>> Their index is in bits 19, 20, and 22. Bit 11 in particular is already
>> set in the base opcode. Note also how disassembler output didn't match
>> assembler input in the respective testcase.
>
> This is OK.
>
> I realize you didn't write the tests, but something I generally recommend is that, whenever, possible, each test entry should test one field in the instruction, with the other fields being set to fill with zero.
>
> We then get, for these tests, things like:
>
> bfmla z0.h, z0.h, z0.h[0] // base opcode
> bfmla z31.h, z0.h, z0.h[0] // Zd register
> bfmla z0.h, z31.h, z0.h[0] // Zn register
> bfmla z0.h, z0.h, z7.h[0] // Zm register
> bfmla z0.h, z0.h, z0.h[7] // index
Indeed that's how I'm writing tests for my own disassembler library. Whereas
here, as you say, I merely had to alter what was there already.
Jan
@@ -56,24 +56,24 @@
.*: 65221084 bfmla z4.h, p4\/m, z4.h, z2.h
.*: 65211908 bfmla z8.h, p6\/m, z8.h, z1.h
.*: 65201e10 bfmla z16.h, p7\/m, z16.h, z0.h
-.*: 643e0a00 bfmla z0.h, z16.h, z6.h\[7\]
-.*: 643d0901 bfmla z1.h, z8.h, z5.h\[7\]
-.*: 643409c2 bfmla z2.h, z14.h, z4.h\[5\]
-.*: 642a0aa4 bfmla z4.h, z21.h, z2.h\[3\]
-.*: 64210988 bfmla z8.h, z12.h, z1.h\[1\]
-.*: 64200950 bfmla z16.h, z10.h, z0.h\[1\]
+.*: 647e0a00 bfmla z0.h, z16.h, z6.h\[7\]
+.*: 64750901 bfmla z1.h, z8.h, z5.h\[6\]
+.*: 646409c2 bfmla z2.h, z14.h, z4.h\[4\]
+.*: 64320aa4 bfmla z4.h, z21.h, z2.h\[2\]
+.*: 64290988 bfmla z8.h, z12.h, z1.h\[1\]
+.*: 64200950 bfmla z16.h, z10.h, z0.h\[0\]
.*: 65302000 bfmls z0.h, p0\/m, z0.h, z16.h
.*: 65282421 bfmls z1.h, p1\/m, z1.h, z8.h
.*: 65242842 bfmls z2.h, p2\/m, z2.h, z4.h
.*: 65223084 bfmls z4.h, p4\/m, z4.h, z2.h
.*: 65213908 bfmls z8.h, p6\/m, z8.h, z1.h
.*: 65203e10 bfmls z16.h, p7\/m, z16.h, z0.h
-.*: 643e0e00 bfmls z0.h, z16.h, z6.h\[7\]
-.*: 643d0d01 bfmls z1.h, z8.h, z5.h\[7\]
-.*: 64340dc2 bfmls z2.h, z14.h, z4.h\[5\]
-.*: 642a0ea4 bfmls z4.h, z21.h, z2.h\[3\]
-.*: 64210d88 bfmls z8.h, z12.h, z1.h\[1\]
-.*: 64200d50 bfmls z16.h, z10.h, z0.h\[1\]
+.*: 647e0e00 bfmls z0.h, z16.h, z6.h\[7\]
+.*: 64750d01 bfmls z1.h, z8.h, z5.h\[6\]
+.*: 64640dc2 bfmls z2.h, z14.h, z4.h\[4\]
+.*: 64320ea4 bfmls z4.h, z21.h, z2.h\[2\]
+.*: 64290d88 bfmls z8.h, z12.h, z1.h\[1\]
+.*: 64200d50 bfmls z16.h, z10.h, z0.h\[0\]
.*: 65028200 bfmul z0.h, p0\/m, z0.h, z16.h
.*: 65028501 bfmul z1.h, p1\/m, z1.h, z8.h
.*: 65028882 bfmul z2.h, p2\/m, z2.h, z4.h
@@ -86,12 +86,12 @@
.*: 65020a04 bfmul z4.h, z16.h, z2.h
.*: 65010a88 bfmul z8.h, z20.h, z1.h
.*: 65000b10 bfmul z16.h, z24.h, z0.h
-.*: 643e2a00 bfmul z0.h, z16.h, z6.h\[7\]
-.*: 643d2901 bfmul z1.h, z8.h, z5.h\[7\]
-.*: 643429c2 bfmul z2.h, z14.h, z4.h\[5\]
-.*: 642a2aa4 bfmul z4.h, z21.h, z2.h\[3\]
-.*: 64212988 bfmul z8.h, z12.h, z1.h\[1\]
-.*: 64202950 bfmul z16.h, z10.h, z0.h\[1\]
+.*: 647e2a00 bfmul z0.h, z16.h, z6.h\[7\]
+.*: 64752901 bfmul z1.h, z8.h, z5.h\[6\]
+.*: 646429c2 bfmul z2.h, z14.h, z4.h\[4\]
+.*: 64322aa4 bfmul z4.h, z21.h, z2.h\[2\]
+.*: 64292988 bfmul z8.h, z12.h, z1.h\[1\]
+.*: 64202950 bfmul z16.h, z10.h, z0.h\[0\]
.*: 65018200 bfsub z0.h, p0\/m, z0.h, z16.h
.*: 65018501 bfsub z1.h, p1\/m, z1.h, z8.h
.*: 65018882 bfsub z2.h, p2\/m, z2.h, z4.h
@@ -6344,9 +6344,9 @@ const struct aarch64_opcode aarch64_opco
B16B16_INSN("bfmul", 0x65000800, 0xffe0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0, 0),
B16B16_INSNC("bfsub", 0x65018000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
B16B16_INSN("bfsub", 0x65000400, 0xffe0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0, 0),
- B16B16_INSN("bfmla", 0x64200800, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_11_INDEX), OP_SVE_VVV_H, 0, 0),
- B16B16_INSN("bfmls", 0x64200c00, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_11_INDEX), OP_SVE_VVV_H, 0, 0),
- B16B16_INSN("bfmul", 0x64202800, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_11_INDEX), OP_SVE_VVV_H, 0, 0),
+ B16B16_INSN("bfmla", 0x64200800, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_22_INDEX), OP_SVE_VVV_H, 0, 0),
+ B16B16_INSN("bfmls", 0x64200c00, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_22_INDEX), OP_SVE_VVV_H, 0, 0),
+ B16B16_INSN("bfmul", 0x64202800, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_22_INDEX), OP_SVE_VVV_H, 0, 0),
/* SME2.1 movaz instructions. */
SME2p1_INSN ("movaz", 0xc0060600, 0xffff1f83, sme2_movaz, 0, OP2 (SME_Zdnx4, SME_ZA_array_vrsb_2), OP_SVE_BB, 0, 0),