On 10/04/2024 16:29, Victor Do Nascimento wrote:
> @@ -6459,6 +6487,19 @@ const struct aarch64_opcode
> aarch64_opcode_table[] =
> SVE2p1_INSNC("st2q",0xe4600000, 0xffe0e000, sve_misc, 0, OP3
> (SME_Zt2, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
> SVE2p1_INSNC("st3q",0xe4a00000, 0xffe0e000, sve_misc, 0, OP3
> (SME_Zt3, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
> SVE2p1_INSNC("st4q",0xe4e00000, 0xffe0e000, sve_misc, 0, OP3
> (SME_Zt4, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
> + FP8_INSN("bf1cvtl", 0x2ea17800, 0xfffffc00, asimdmisc, OP2 (Vd, Vn),
> QL_V2FP8B8H, 0),
> + FP8_INSN("bf1cvtl2", 0x6ea17800, 0xfffffc00, asimdmisc, OP2 (Vd, Vn),
> QL_V28H16B, 0),
> + FP8_INSN("bf2cvtl", 0x2ee17800, 0xfffffc00, asimdmisc, OP2 (Vd, Vn),
> QL_V2FP8B8H, 0),
> + FP8_INSN("bf2cvtl2", 0x6ee17800, 0xfffffc00, asimdmisc, OP2 (Vd, Vn),
> QL_V28H16B, 0),
> + FP8_INSN("f1cvtl", 0x2e217800, 0xfffffc00, asimdmisc, OP2 (Vd, Vn),
> QL_V2FP8B8H, 0),
> + FP8_INSN("f1cvtl2", 0x6e217800, 0xfffffc00, asimdmisc, OP2 (Vd, Vn),
> QL_V28H16B, 0),
> + FP8_INSN("f2cvtl", 0x2e617800, 0xfffffc00, asimdmisc, OP2 (Vd, Vn),
> QL_V2FP8B8H, 0),
> + FP8_INSN("f2cvtl2", 0x6e617800, 0xfffffc00, asimdmisc, OP2 (Vd, Vn),
> QL_V28H16B, 0),
> + FP8_INSN("fcvtn", 0xe00f400, 0xffe0fc00, asimdmisc, OP3 (Vd, Vn,
> Vm), QL_V3_BSS_LOWER, 0),
Nit: The opcode in this pattern is missing the leading zero, which makes it a bit harder to verify. Please can you add that before pushing.
> + FP8_INSN("fcvtn2", 0x4e00f400, 0xffe0fc00, asimdmisc, OP3 (Vd, Vn,
> Vm), QL_V3_BSS_FULL, 0),
> + FP8_INSN("fcvtn", 0xe40f400, 0xbfe0fc00, asimdmisc, OP3 (Vd, Vn,
> Vm), QL_V3_BHH, F_SIZEQ),
Same here.
> + FP8_INSN("fscale", 0x2ec03c00, 0xbfe0fc00, asimdmisc, OP3 (Vd, Vn,
> Vm), QL_VSHIFT_H, F_SIZEQ),
> + FP8_INSN("fscale", 0x2ea0fc00, 0xbfa0fc00, asimdmisc, OP3 (Vd, Vn,
> Vm), QL_V3SAMESD, F_SIZEQ),
R.
On Wed, Apr 10, 2024 at 04:29:48PM +0100, Victor Do Nascimento wrote:
> Add the advanced SIMD variant of the FP8 convert and scale
> instructions, enabled at assembly-time using the `+fp8'
> architectural extension flag. More specifically, support is
> added for the following instructions:
>
...
> diff --git a/gas/testsuite/gas/aarch64/advsimd-fp8.s b/gas/testsuite/gas/aarch64/advsimd-fp8.s
> new file mode 100644
> index 00000000000..e49f38d420a
> --- /dev/null
> +++ b/gas/testsuite/gas/aarch64/advsimd-fp8.s
> @@ -0,0 +1,76 @@
> + /* advsimd-fp8.s Test file for AArch64 8-bit floating-point vector
> + instructions. */
> +
> + /* Instructions convert the elements from the lower half of the source
> + vector while scaling the values by 2^-UInt(FPMR.LSCALE{2}[3:0]). */
> +
> + .macro cvrt_lowerhalf, op
> + \op v0.8h, v0.8b
> + \op v1.8h, v0.8b
> + \op v0.8h, v1.8b
> + \op v1.8h, v1.8b
> + \op v16.8h, v17.8b
> + .endm
> +
> + cvrt_lowerhalf bf1cvtl
> + cvrt_lowerhalf bf2cvtl
> + cvrt_lowerhalf f1cvtl
> + cvrt_lowerhalf f2cvtl
> +
> + /* Instructions convert the elements from the upper half of the source
> + vector while scaling the values by 2^-UInt(FPMR.LSCALE{2}[3:0]). */
> +
> + .macro cvrt_upperhalf, op
> + \op v0.8h, v0.16b
> + \op v1.8h, v0.16b
> + \op v0.8h, v1.16b
> + \op v1.8h, v1.16b
> + \op v16.8h, v17.16b
> + .endm
> +
> + cvrt_upperhalf bf1cvtl2
> + cvrt_upperhalf bf2cvtl2
> + cvrt_upperhalf f1cvtl2
> + cvrt_upperhalf f2cvtl2
> +
> + /* Floating-point adjust exponent by vector. */
> +
> + .macro fscale_gen, op_var
> + fscale v0.\op_var, v0.\op_var, v0.\op_var
> + fscale v1.\op_var, v0.\op_var, v0.\op_var
> + fscale v0.\op_var, v1.\op_var, v0.\op_var
> + fscale v0.\op_var, v0.\op_var, v1.\op_var
> + fscale v1.\op_var, v1.\op_var, v0.\op_var
> + fscale v0.\op_var, v1.\op_var, v1.\op_var
> + fscale v1.\op_var, v1.\op_var, v1.\op_var
> + fscale v16.\op_var, v17.\op_var, v18.\op_var
> + .endm
> +
> + /* Half-precision variant. */
> + fscale_gen 4h
> + fscale_gen 8h
> + /* Single-precision variant. */
> + fscale_gen 2s
> + fscale_gen 4s
> + fscale_gen 2d
> +
> + /* Half and single-precision to FP8 convert and narrow. */
> +
> + .macro fcvtn_to_fp8, op, sd, ss
> + \op v0.\sd, v0.\ss, v0.\ss
> + \op v1.\sd, v0.\ss, v0.\ss
> + \op v0.\sd, v1.\ss, v0.\ss
> + \op v0.\sd, v0.\ss, v1.\ss
> + \op v1.\sd, v1.\ss, v0.\ss
> + \op v0.\sd, v1.\ss, v1.\ss
> + \op v1.\sd, v1.\ss, v1.\ss
> + \op v16.\sd, v17.\ss, v18.\ss
> + .endm
> +
> + /* Half-precision variant. */
> + fcvtn_to_fp8 fcvtn 8b, 4h
> + fcvtn_to_fp8 fcvtn 16b, 8h
> +
> + /* Single-precision variant. */
> + fcvtn_to_fp8 fcvtn, 8b, 4s
> + fcvtn_to_fp8 fcvtn2, 16b, 4s
Some register operand bits always take the value 0 in these tests, so they don't
show that the opcode mask is correct at those locations. It would be better to
ensure that each operand bit is set to both 0 and 1 in some test. A good way
to do this in general is to have tests that use register 31 (or the highest
valid register) for each operand in turn, while leaving the other registers set
to 0.
> diff --git a/opcodes/aarch64-tbl.h b/opcodes/aarch64-tbl.h
> index 7e603462a37..f876c1b342f 100644
> --- a/opcodes/aarch64-tbl.h
> +++ b/opcodes/aarch64-tbl.h
> @@ -2368,6 +2368,34 @@
> QLF3(X,X,NIL), \
> }
>
> +#define QL_V3_BSS_LOWER \
> +{ \
> + QLF3(V_8B, V_4S, V_4S), \
> +}
> +
> +#define QL_V3_BSS_FULL \
> +{ \
> + QLF3(V_16B, V_4S, V_4S), \
> +}
> +
> +#define QL_V3_BHH \
> +{ \
> + QLF3(V_8B, V_4H, V_4H), \
> + QLF3(V_16B, V_8H, V_8H), \
> +}
> +
> +/* e.g. BF1CVTL <Vd>.8H, <Vn>.8B. */
> +#define QL_V2FP8B8H \
> +{ \
> + QLF2(V_8H, V_8B), \
> +}
How about aligning names with existing qualifier sets - e.g. QL_V2LONGB{2} to
match QL_V2LONGHS{2}? Or, alternatively, QL_V2_HB_{LOWER|FULL} to match your
other new qualifiers?
> +/* e.g. BF1CVTL2 <Vd>.8H, <Vn>.16B. */
> +#define QL_V28H16B \
> +{ \
> + QLF2(V_8H, V_16B), \
> +}
> +
> /* e.g. UDOT <Vd>.2S, <Vn>.8B, <Vm>.8B. */
> #define QL_V3DOT \
> { \
> @@ -6459,6 +6487,19 @@ const struct aarch64_opcode aarch64_opcode_table[] =
> SVE2p1_INSNC("st2q",0xe4600000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt2, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
> SVE2p1_INSNC("st3q",0xe4a00000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt3, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
> SVE2p1_INSNC("st4q",0xe4e00000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt4, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
> + FP8_INSN("bf1cvtl", 0x2ea17800, 0xfffffc00, asimdmisc, OP2 (Vd, Vn), QL_V2FP8B8H, 0),
> + FP8_INSN("bf1cvtl2", 0x6ea17800, 0xfffffc00, asimdmisc, OP2 (Vd, Vn), QL_V28H16B, 0),
> + FP8_INSN("bf2cvtl", 0x2ee17800, 0xfffffc00, asimdmisc, OP2 (Vd, Vn), QL_V2FP8B8H, 0),
> + FP8_INSN("bf2cvtl2", 0x6ee17800, 0xfffffc00, asimdmisc, OP2 (Vd, Vn), QL_V28H16B, 0),
> + FP8_INSN("f1cvtl", 0x2e217800, 0xfffffc00, asimdmisc, OP2 (Vd, Vn), QL_V2FP8B8H, 0),
> + FP8_INSN("f1cvtl2", 0x6e217800, 0xfffffc00, asimdmisc, OP2 (Vd, Vn), QL_V28H16B, 0),
> + FP8_INSN("f2cvtl", 0x2e617800, 0xfffffc00, asimdmisc, OP2 (Vd, Vn), QL_V2FP8B8H, 0),
> + FP8_INSN("f2cvtl2", 0x6e617800, 0xfffffc00, asimdmisc, OP2 (Vd, Vn), QL_V28H16B, 0),
> + FP8_INSN("fcvtn", 0xe00f400, 0xffe0fc00, asimdmisc, OP3 (Vd, Vn, Vm), QL_V3_BSS_LOWER, 0),
> + FP8_INSN("fcvtn2", 0x4e00f400, 0xffe0fc00, asimdmisc, OP3 (Vd, Vn, Vm), QL_V3_BSS_FULL, 0),
> + FP8_INSN("fcvtn", 0xe40f400, 0xbfe0fc00, asimdmisc, OP3 (Vd, Vn, Vm), QL_V3_BHH, F_SIZEQ),
> + FP8_INSN("fscale", 0x2ec03c00, 0xbfe0fc00, asimdmisc, OP3 (Vd, Vn, Vm), QL_VSHIFT_H, F_SIZEQ),
> + FP8_INSN("fscale", 0x2ea0fc00, 0xbfa0fc00, asimdmisc, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ),
>
> /* Checked Pointer Arithmetic Instructions. */
> CPA_INSN ("addpt", 0x9a002000, 0xffe0e000, aarch64_misc, OP3 (Rd_SP, Rn_SP, Rm_LSL), QL_I3SAMEX),
> --
> 2.34.1
>
new file mode 100644
@@ -0,0 +1,2 @@
+#as: -march=armv8.5-a+fp8 -mno-verbose-error
+#error_output: advsimd-fp8-fail.l
\ No newline at end of file
new file mode 100644
@@ -0,0 +1,262 @@
+[^:]+: Assembler messages:
+[^:]+:9: Error: operand mismatch -- `bf1cvtl v0.16b,v1.8b'
+[^:]+:27: Info: macro invoked from here
+[^:]+:10: Error: operand mismatch -- `bf1cvtl v0.8b,v1.8b'
+[^:]+:27: Info: macro invoked from here
+[^:]+:11: Error: operand mismatch -- `bf1cvtl v0.4h,v1.8b'
+[^:]+:27: Info: macro invoked from here
+[^:]+:13: Error: operand mismatch -- `bf1cvtl v0.2s,v1.8b'
+[^:]+:27: Info: macro invoked from here
+[^:]+:14: Error: operand mismatch -- `bf1cvtl v0.4s,v1.8b'
+[^:]+:27: Info: macro invoked from here
+[^:]+:15: Error: operand mismatch -- `bf1cvtl v0.2d,v1.8b'
+[^:]+:27: Info: macro invoked from here
+[^:]+:16: Error: invalid use of vector register at operand 1 -- `bf1cvtl v0,v1.8b'
+[^:]+:27: Info: macro invoked from here
+[^:]+:18: Error: operand mismatch -- `bf1cvtl v0.8h,v1.4h'
+[^:]+:27: Info: macro invoked from here
+[^:]+:19: Error: operand mismatch -- `bf1cvtl v0.8h,v1.2s'
+[^:]+:27: Info: macro invoked from here
+[^:]+:20: Error: operand mismatch -- `bf1cvtl v0.8h,v1.16b'
+[^:]+:27: Info: macro invoked from here
+[^:]+:21: Error: operand mismatch -- `bf1cvtl v0.8h,v1.8h'
+[^:]+:27: Info: macro invoked from here
+[^:]+:22: Error: operand mismatch -- `bf1cvtl v0.8h,v1.4s'
+[^:]+:27: Info: macro invoked from here
+[^:]+:23: Error: operand mismatch -- `bf1cvtl v0.8h,v1.2d'
+[^:]+:27: Info: macro invoked from here
+[^:]+:24: Error: invalid use of vector register at operand 2 -- `bf1cvtl v0.8h,v0'
+[^:]+:27: Info: macro invoked from here
+[^:]+:9: Error: operand mismatch -- `bf2cvtl v0.16b,v1.8b'
+[^:]+:28: Info: macro invoked from here
+[^:]+:10: Error: operand mismatch -- `bf2cvtl v0.8b,v1.8b'
+[^:]+:28: Info: macro invoked from here
+[^:]+:11: Error: operand mismatch -- `bf2cvtl v0.4h,v1.8b'
+[^:]+:28: Info: macro invoked from here
+[^:]+:13: Error: operand mismatch -- `bf2cvtl v0.2s,v1.8b'
+[^:]+:28: Info: macro invoked from here
+[^:]+:14: Error: operand mismatch -- `bf2cvtl v0.4s,v1.8b'
+[^:]+:28: Info: macro invoked from here
+[^:]+:15: Error: operand mismatch -- `bf2cvtl v0.2d,v1.8b'
+[^:]+:28: Info: macro invoked from here
+[^:]+:16: Error: invalid use of vector register at operand 1 -- `bf2cvtl v0,v1.8b'
+[^:]+:28: Info: macro invoked from here
+[^:]+:18: Error: operand mismatch -- `bf2cvtl v0.8h,v1.4h'
+[^:]+:28: Info: macro invoked from here
+[^:]+:19: Error: operand mismatch -- `bf2cvtl v0.8h,v1.2s'
+[^:]+:28: Info: macro invoked from here
+[^:]+:20: Error: operand mismatch -- `bf2cvtl v0.8h,v1.16b'
+[^:]+:28: Info: macro invoked from here
+[^:]+:21: Error: operand mismatch -- `bf2cvtl v0.8h,v1.8h'
+[^:]+:28: Info: macro invoked from here
+[^:]+:22: Error: operand mismatch -- `bf2cvtl v0.8h,v1.4s'
+[^:]+:28: Info: macro invoked from here
+[^:]+:23: Error: operand mismatch -- `bf2cvtl v0.8h,v1.2d'
+[^:]+:28: Info: macro invoked from here
+[^:]+:24: Error: invalid use of vector register at operand 2 -- `bf2cvtl v0.8h,v0'
+[^:]+:28: Info: macro invoked from here
+[^:]+:9: Error: operand mismatch -- `f1cvtl v0.16b,v1.8b'
+[^:]+:29: Info: macro invoked from here
+[^:]+:10: Error: operand mismatch -- `f1cvtl v0.8b,v1.8b'
+[^:]+:29: Info: macro invoked from here
+[^:]+:11: Error: operand mismatch -- `f1cvtl v0.4h,v1.8b'
+[^:]+:29: Info: macro invoked from here
+[^:]+:13: Error: operand mismatch -- `f1cvtl v0.2s,v1.8b'
+[^:]+:29: Info: macro invoked from here
+[^:]+:14: Error: operand mismatch -- `f1cvtl v0.4s,v1.8b'
+[^:]+:29: Info: macro invoked from here
+[^:]+:15: Error: operand mismatch -- `f1cvtl v0.2d,v1.8b'
+[^:]+:29: Info: macro invoked from here
+[^:]+:16: Error: invalid use of vector register at operand 1 -- `f1cvtl v0,v1.8b'
+[^:]+:29: Info: macro invoked from here
+[^:]+:18: Error: operand mismatch -- `f1cvtl v0.8h,v1.4h'
+[^:]+:29: Info: macro invoked from here
+[^:]+:19: Error: operand mismatch -- `f1cvtl v0.8h,v1.2s'
+[^:]+:29: Info: macro invoked from here
+[^:]+:20: Error: operand mismatch -- `f1cvtl v0.8h,v1.16b'
+[^:]+:29: Info: macro invoked from here
+[^:]+:21: Error: operand mismatch -- `f1cvtl v0.8h,v1.8h'
+[^:]+:29: Info: macro invoked from here
+[^:]+:22: Error: operand mismatch -- `f1cvtl v0.8h,v1.4s'
+[^:]+:29: Info: macro invoked from here
+[^:]+:23: Error: operand mismatch -- `f1cvtl v0.8h,v1.2d'
+[^:]+:29: Info: macro invoked from here
+[^:]+:24: Error: invalid use of vector register at operand 2 -- `f1cvtl v0.8h,v0'
+[^:]+:29: Info: macro invoked from here
+[^:]+:9: Error: operand mismatch -- `f2cvtl v0.16b,v1.8b'
+[^:]+:30: Info: macro invoked from here
+[^:]+:10: Error: operand mismatch -- `f2cvtl v0.8b,v1.8b'
+[^:]+:30: Info: macro invoked from here
+[^:]+:11: Error: operand mismatch -- `f2cvtl v0.4h,v1.8b'
+[^:]+:30: Info: macro invoked from here
+[^:]+:13: Error: operand mismatch -- `f2cvtl v0.2s,v1.8b'
+[^:]+:30: Info: macro invoked from here
+[^:]+:14: Error: operand mismatch -- `f2cvtl v0.4s,v1.8b'
+[^:]+:30: Info: macro invoked from here
+[^:]+:15: Error: operand mismatch -- `f2cvtl v0.2d,v1.8b'
+[^:]+:30: Info: macro invoked from here
+[^:]+:16: Error: invalid use of vector register at operand 1 -- `f2cvtl v0,v1.8b'
+[^:]+:30: Info: macro invoked from here
+[^:]+:18: Error: operand mismatch -- `f2cvtl v0.8h,v1.4h'
+[^:]+:30: Info: macro invoked from here
+[^:]+:19: Error: operand mismatch -- `f2cvtl v0.8h,v1.2s'
+[^:]+:30: Info: macro invoked from here
+[^:]+:20: Error: operand mismatch -- `f2cvtl v0.8h,v1.16b'
+[^:]+:30: Info: macro invoked from here
+[^:]+:21: Error: operand mismatch -- `f2cvtl v0.8h,v1.8h'
+[^:]+:30: Info: macro invoked from here
+[^:]+:22: Error: operand mismatch -- `f2cvtl v0.8h,v1.4s'
+[^:]+:30: Info: macro invoked from here
+[^:]+:23: Error: operand mismatch -- `f2cvtl v0.8h,v1.2d'
+[^:]+:30: Info: macro invoked from here
+[^:]+:24: Error: invalid use of vector register at operand 2 -- `f2cvtl v0.8h,v0'
+[^:]+:30: Info: macro invoked from here
+[^:]+:37: Error: operand mismatch -- `bf1cvtl2 v0.16b,v1.16b'
+[^:]+:56: Info: macro invoked from here
+[^:]+:38: Error: operand mismatch -- `bf1cvtl2 v0.8b,v1.16b'
+[^:]+:56: Info: macro invoked from here
+[^:]+:39: Error: operand mismatch -- `bf1cvtl2 v0.4h,v1.16b'
+[^:]+:56: Info: macro invoked from here
+[^:]+:41: Error: operand mismatch -- `bf1cvtl2 v0.2s,v1.16b'
+[^:]+:56: Info: macro invoked from here
+[^:]+:42: Error: operand mismatch -- `bf1cvtl2 v0.4s,v1.16b'
+[^:]+:56: Info: macro invoked from here
+[^:]+:43: Error: operand mismatch -- `bf1cvtl2 v0.2d,v1.16b'
+[^:]+:56: Info: macro invoked from here
+[^:]+:44: Error: invalid use of vector register at operand 1 -- `bf1cvtl2 v0,v1.16b'
+[^:]+:56: Info: macro invoked from here
+[^:]+:46: Error: operand mismatch -- `bf1cvtl2 v0.8h,v1.8b'
+[^:]+:56: Info: macro invoked from here
+[^:]+:47: Error: operand mismatch -- `bf1cvtl2 v0.8h,v1.4h'
+[^:]+:56: Info: macro invoked from here
+[^:]+:48: Error: operand mismatch -- `bf1cvtl2 v0.8h,v1.2s'
+[^:]+:56: Info: macro invoked from here
+[^:]+:50: Error: operand mismatch -- `bf1cvtl2 v0.8h,v1.8h'
+[^:]+:56: Info: macro invoked from here
+[^:]+:51: Error: operand mismatch -- `bf1cvtl2 v0.8h,v1.4s'
+[^:]+:56: Info: macro invoked from here
+[^:]+:52: Error: operand mismatch -- `bf1cvtl2 v0.8h,v1.2d'
+[^:]+:56: Info: macro invoked from here
+[^:]+:53: Error: invalid use of vector register at operand 2 -- `bf1cvtl2 v0.8h,v1'
+[^:]+:56: Info: macro invoked from here
+[^:]+:37: Error: operand mismatch -- `bf2cvtl2 v0.16b,v1.16b'
+[^:]+:57: Info: macro invoked from here
+[^:]+:38: Error: operand mismatch -- `bf2cvtl2 v0.8b,v1.16b'
+[^:]+:57: Info: macro invoked from here
+[^:]+:39: Error: operand mismatch -- `bf2cvtl2 v0.4h,v1.16b'
+[^:]+:57: Info: macro invoked from here
+[^:]+:41: Error: operand mismatch -- `bf2cvtl2 v0.2s,v1.16b'
+[^:]+:57: Info: macro invoked from here
+[^:]+:42: Error: operand mismatch -- `bf2cvtl2 v0.4s,v1.16b'
+[^:]+:57: Info: macro invoked from here
+[^:]+:43: Error: operand mismatch -- `bf2cvtl2 v0.2d,v1.16b'
+[^:]+:57: Info: macro invoked from here
+[^:]+:44: Error: invalid use of vector register at operand 1 -- `bf2cvtl2 v0,v1.16b'
+[^:]+:57: Info: macro invoked from here
+[^:]+:46: Error: operand mismatch -- `bf2cvtl2 v0.8h,v1.8b'
+[^:]+:57: Info: macro invoked from here
+[^:]+:47: Error: operand mismatch -- `bf2cvtl2 v0.8h,v1.4h'
+[^:]+:57: Info: macro invoked from here
+[^:]+:48: Error: operand mismatch -- `bf2cvtl2 v0.8h,v1.2s'
+[^:]+:57: Info: macro invoked from here
+[^:]+:50: Error: operand mismatch -- `bf2cvtl2 v0.8h,v1.8h'
+[^:]+:57: Info: macro invoked from here
+[^:]+:51: Error: operand mismatch -- `bf2cvtl2 v0.8h,v1.4s'
+[^:]+:57: Info: macro invoked from here
+[^:]+:52: Error: operand mismatch -- `bf2cvtl2 v0.8h,v1.2d'
+[^:]+:57: Info: macro invoked from here
+[^:]+:53: Error: invalid use of vector register at operand 2 -- `bf2cvtl2 v0.8h,v1'
+[^:]+:57: Info: macro invoked from here
+[^:]+:37: Error: operand mismatch -- `f1cvtl2 v0.16b,v1.16b'
+[^:]+:58: Info: macro invoked from here
+[^:]+:38: Error: operand mismatch -- `f1cvtl2 v0.8b,v1.16b'
+[^:]+:58: Info: macro invoked from here
+[^:]+:39: Error: operand mismatch -- `f1cvtl2 v0.4h,v1.16b'
+[^:]+:58: Info: macro invoked from here
+[^:]+:41: Error: operand mismatch -- `f1cvtl2 v0.2s,v1.16b'
+[^:]+:58: Info: macro invoked from here
+[^:]+:42: Error: operand mismatch -- `f1cvtl2 v0.4s,v1.16b'
+[^:]+:58: Info: macro invoked from here
+[^:]+:43: Error: operand mismatch -- `f1cvtl2 v0.2d,v1.16b'
+[^:]+:58: Info: macro invoked from here
+[^:]+:44: Error: invalid use of vector register at operand 1 -- `f1cvtl2 v0,v1.16b'
+[^:]+:58: Info: macro invoked from here
+[^:]+:46: Error: operand mismatch -- `f1cvtl2 v0.8h,v1.8b'
+[^:]+:58: Info: macro invoked from here
+[^:]+:47: Error: operand mismatch -- `f1cvtl2 v0.8h,v1.4h'
+[^:]+:58: Info: macro invoked from here
+[^:]+:48: Error: operand mismatch -- `f1cvtl2 v0.8h,v1.2s'
+[^:]+:58: Info: macro invoked from here
+[^:]+:50: Error: operand mismatch -- `f1cvtl2 v0.8h,v1.8h'
+[^:]+:58: Info: macro invoked from here
+[^:]+:51: Error: operand mismatch -- `f1cvtl2 v0.8h,v1.4s'
+[^:]+:58: Info: macro invoked from here
+[^:]+:52: Error: operand mismatch -- `f1cvtl2 v0.8h,v1.2d'
+[^:]+:58: Info: macro invoked from here
+[^:]+:53: Error: invalid use of vector register at operand 2 -- `f1cvtl2 v0.8h,v1'
+[^:]+:58: Info: macro invoked from here
+[^:]+:37: Error: operand mismatch -- `f2cvtl2 v0.16b,v1.16b'
+[^:]+:59: Info: macro invoked from here
+[^:]+:38: Error: operand mismatch -- `f2cvtl2 v0.8b,v1.16b'
+[^:]+:59: Info: macro invoked from here
+[^:]+:39: Error: operand mismatch -- `f2cvtl2 v0.4h,v1.16b'
+[^:]+:59: Info: macro invoked from here
+[^:]+:41: Error: operand mismatch -- `f2cvtl2 v0.2s,v1.16b'
+[^:]+:59: Info: macro invoked from here
+[^:]+:42: Error: operand mismatch -- `f2cvtl2 v0.4s,v1.16b'
+[^:]+:59: Info: macro invoked from here
+[^:]+:43: Error: operand mismatch -- `f2cvtl2 v0.2d,v1.16b'
+[^:]+:59: Info: macro invoked from here
+[^:]+:44: Error: invalid use of vector register at operand 1 -- `f2cvtl2 v0,v1.16b'
+[^:]+:59: Info: macro invoked from here
+[^:]+:46: Error: operand mismatch -- `f2cvtl2 v0.8h,v1.8b'
+[^:]+:59: Info: macro invoked from here
+[^:]+:47: Error: operand mismatch -- `f2cvtl2 v0.8h,v1.4h'
+[^:]+:59: Info: macro invoked from here
+[^:]+:48: Error: operand mismatch -- `f2cvtl2 v0.8h,v1.2s'
+[^:]+:59: Info: macro invoked from here
+[^:]+:50: Error: operand mismatch -- `f2cvtl2 v0.8h,v1.8h'
+[^:]+:59: Info: macro invoked from here
+[^:]+:51: Error: operand mismatch -- `f2cvtl2 v0.8h,v1.4s'
+[^:]+:59: Info: macro invoked from here
+[^:]+:52: Error: operand mismatch -- `f2cvtl2 v0.8h,v1.2d'
+[^:]+:59: Info: macro invoked from here
+[^:]+:53: Error: invalid use of vector register at operand 2 -- `f2cvtl2 v0.8h,v1'
+[^:]+:59: Info: macro invoked from here
+[^:]+:64: Error: operand mismatch -- `fscale v0.8b,v0.8b,v0.8b'
+[^:]+:65: Error: operand mismatch -- `fscale v0.16b,v0.16b,v0.16b'
+[^:]+:72: Error: operand mismatch -- `fcvtn v0.4h,v1.4h,v2.4h'
+[^:]+:73: Error: operand mismatch -- `fcvtn v0.2s,v1.4h,v2.4h'
+[^:]+:74: Error: operand mismatch -- `fcvtn v0.16b,v1.4h,v2.4h'
+[^:]+:75: Error: operand mismatch -- `fcvtn v0.8h,v1.4h,v2.4h'
+[^:]+:76: Error: operand mismatch -- `fcvtn v0.4s,v1.4h,v2.4h'
+[^:]+:77: Error: operand mismatch -- `fcvtn v0.2d,v1.4h,v2.4h'
+[^:]+:79: Error: operand mismatch -- `fcvtn v0.8b,v1.8h,v2.8h'
+[^:]+:80: Error: operand mismatch -- `fcvtn v0.4h,v1.8h,v2.8h'
+[^:]+:81: Error: operand mismatch -- `fcvtn v0.2s,v1.8h,v2.8h'
+[^:]+:83: Error: operand mismatch -- `fcvtn v0.8h,v1.8h,v2.8h'
+[^:]+:84: Error: operand mismatch -- `fcvtn v0.4s,v1.8h,v2.8h'
+[^:]+:85: Error: operand mismatch -- `fcvtn v0.2d,v1.8h,v2.8h'
+[^:]+:88: Error: operand mismatch -- `fcvtn v0.8b,v1.8b,v2.8b'
+[^:]+:90: Error: operand mismatch -- `fcvtn v0.8b,v1.2s,v2.2s'
+[^:]+:91: Error: operand mismatch -- `fcvtn v0.8b,v1.16b,v2.16b'
+[^:]+:92: Error: operand mismatch -- `fcvtn v0.8b,v1.8h,v2.8h'
+[^:]+:94: Error: operand mismatch -- `fcvtn v0.8b,v1.2d,v2.2d'
+[^:]+:96: Error: operand mismatch -- `fcvtn v0.16b,v1.8b,v2.8b'
+[^:]+:97: Error: operand mismatch -- `fcvtn v0.16b,v1.4h,v2.4h'
+[^:]+:98: Error: operand mismatch -- `fcvtn v0.16b,v1.2s,v2.2s'
+[^:]+:99: Error: operand mismatch -- `fcvtn v0.16b,v1.16b,v2.16b'
+[^:]+:101: Error: operand mismatch -- `fcvtn v0.16b,v1.4s,v2.4s'
+[^:]+:102: Error: operand mismatch -- `fcvtn v0.16b,v1.2d,v2.2d'
+[^:]+:107: Error: operand mismatch -- `fcvtn v0.4h,v1.4s,v2.4s'
+[^:]+:108: Error: operand mismatch -- `fcvtn v0.2s,v1.4s,v2.4s'
+[^:]+:109: Error: operand mismatch -- `fcvtn v0.16b,v1.4s,v2.4s'
+[^:]+:110: Error: operand mismatch -- `fcvtn v0.8h,v1.4s,v2.4s'
+[^:]+:111: Error: operand mismatch -- `fcvtn v0.4s,v1.4s,v2.4s'
+[^:]+:112: Error: operand mismatch -- `fcvtn v0.2d,v1.4s,v2.4s'
+[^:]+:115: Error: operand mismatch -- `fcvtn2 v0.16b,v1.8b,v2.8b'
+[^:]+:116: Error: operand mismatch -- `fcvtn2 v0.16b,v1.4h,v2.4h'
+[^:]+:117: Error: operand mismatch -- `fcvtn2 v0.16b,v1.2d,v2.2d'
+[^:]+:118: Error: operand mismatch -- `fcvtn2 v0.16b,v1.16b,v2.16b'
+[^:]+:119: Error: operand mismatch -- `fcvtn2 v0.16b,v1.8h,v2.8h'
+[^:]+:121: Error: operand mismatch -- `fcvtn2 v0.16b,v1.2d,v2.2d'
new file mode 100644
@@ -0,0 +1,121 @@
+ /* advsimd-fp8-fail.s Test file for error-checking AArch64 8-bit
+ floating-point vector instructions. */
+
+ /* Instructions convert the elements from the lower half of the source
+ vector while scaling the values by 2^-UInt(FPMR.LSCALE{2}[3:0]). */
+
+ .macro cvrt_lowerhalf, op
+ /* Check the number and width of destination vector elements. */
+ \op v0.16b, v1.8b
+ \op v0.8b, v1.8b
+ \op v0.4h, v1.8b
+ \op v0.8h, v1.8b /* Valid. */
+ \op v0.2s, v1.8b
+ \op v0.4s, v1.8b
+ \op v0.2d, v1.8b
+ \op v0, v1.8b
+ /* Check the number and width of source vector elements. */
+ \op v0.8h, v1.4h
+ \op v0.8h, v1.2s
+ \op v0.8h, v1.16b
+ \op v0.8h, v1.8h
+ \op v0.8h, v1.4s
+ \op v0.8h, v1.2d
+ \op v0.8h, v0
+ .endm
+
+ cvrt_lowerhalf bf1cvtl
+ cvrt_lowerhalf bf2cvtl
+ cvrt_lowerhalf f1cvtl
+ cvrt_lowerhalf f2cvtl
+
+ /* Instructions convert the elements from the upper half of the source
+ vector while scaling the values by 2^-UInt(FPMR.LSCALE{2}[3:0]). */
+
+ .macro cvrt_upperhalf, op
+ /* Check the number and width of destination vector elements. */
+ \op v0.16b, v1.16b
+ \op v0.8b, v1.16b
+ \op v0.4h, v1.16b
+ \op v0.8h, v1.16b /* Valid. */
+ \op v0.2s, v1.16b
+ \op v0.4s, v1.16b
+ \op v0.2d, v1.16b
+ \op v0, v1.16b
+ /* Check the number and width of source vector elements. */
+ \op v0.8h, v1.8b
+ \op v0.8h, v1.4h
+ \op v0.8h, v1.2s
+ \op v0.8h, v1.16b /* Valid. */
+ \op v0.8h, v1.8h
+ \op v0.8h, v1.4s
+ \op v0.8h, v1.2d
+ \op v0.8h, v1
+ .endm
+
+ cvrt_upperhalf bf1cvtl2
+ cvrt_upperhalf bf2cvtl2
+ cvrt_upperhalf f1cvtl2
+ cvrt_upperhalf f2cvtl2
+
+ /* Floating-point adjust exponent by vector. */
+
+ /* Check invalid vector element number and width combinations. */
+ fscale v0.8b, v0.8b, v0.8b
+ fscale v0.16b, v0.16b, v0.16b
+
+ /* Half and single-precision to FP8 convert and narrow. */
+
+ /* Half-precision variant. */
+ /* Check the number and width of destination vector elements. */
+ fcvtn v0.8b, v1.4h, v2.4h /* Valid. */
+ fcvtn v0.4h, v1.4h, v2.4h
+ fcvtn v0.2s, v1.4h, v2.4h
+ fcvtn v0.16b, v1.4h, v2.4h
+ fcvtn v0.8h, v1.4h, v2.4h
+ fcvtn v0.4s, v1.4h, v2.4h
+ fcvtn v0.2d, v1.4h, v2.4h
+
+ fcvtn v0.8b, v1.8h, v2.8h
+ fcvtn v0.4h, v1.8h, v2.8h
+ fcvtn v0.2s, v1.8h, v2.8h
+ fcvtn v0.16b, v1.8h, v2.8h /* Valid. */
+ fcvtn v0.8h, v1.8h, v2.8h
+ fcvtn v0.4s, v1.8h, v2.8h
+ fcvtn v0.2d, v1.8h, v2.8h
+
+ /* Check the number and width of source vector elements. */
+ fcvtn v0.8b, v1.8b, v2.8b
+ fcvtn v0.8b, v1.4h, v2.4h /* Valid. */
+ fcvtn v0.8b, v1.2s, v2.2s
+ fcvtn v0.8b, v1.16b, v2.16b
+ fcvtn v0.8b, v1.8h, v2.8h
+ fcvtn v0.8b, v1.4s, v2.4s /* Valid. */
+ fcvtn v0.8b, v1.2d, v2.2d
+
+ fcvtn v0.16b, v1.8b, v2.8b
+ fcvtn v0.16b, v1.4h, v2.4h
+ fcvtn v0.16b, v1.2s, v2.2s
+ fcvtn v0.16b, v1.16b, v2.16b
+ fcvtn v0.16b, v1.8h, v2.8h /* Valid. */
+ fcvtn v0.16b, v1.4s, v2.4s
+ fcvtn v0.16b, v1.2d, v2.2d
+
+ /* Single-precision variant. */
+ /* Check the number and width of destination vector elements. */
+ fcvtn v0.8b, v1.4s, v2.4s /* Valid. */
+ fcvtn v0.4h, v1.4s, v2.4s
+ fcvtn v0.2s, v1.4s, v2.4s
+ fcvtn v0.16b, v1.4s, v2.4s
+ fcvtn v0.8h, v1.4s, v2.4s
+ fcvtn v0.4s, v1.4s, v2.4s
+ fcvtn v0.2d, v1.4s, v2.4s
+
+ /* Check the number and width of source vector elements. */
+ fcvtn2 v0.16b, v1.8b, v2.8b
+ fcvtn2 v0.16b, v1.4h, v2.4h
+ fcvtn2 v0.16b, v1.2d, v2.2d
+ fcvtn2 v0.16b, v1.16b, v2.16b
+ fcvtn2 v0.16b, v1.8h, v2.8h
+ fcvtn2 v0.16b, v1.4s, v2.4s /* Valid. */
+ fcvtn2 v0.16b, v1.2d, v2.2d
new file mode 100644
@@ -0,0 +1,120 @@
+#as: -march=armv8.5-a+fp8
+#objdump: -dr
+
+.*: file format .*
+
+Disassembly of section \.text:
+
+0+ <.*>:
+[ ]*[0-9a-f]+: 2ea17800 bf1cvtl v0.8h, v0.8b
+[ ]*[0-9a-f]+: 2ea17801 bf1cvtl v1.8h, v0.8b
+[ ]*[0-9a-f]+: 2ea17820 bf1cvtl v0.8h, v1.8b
+[ ]*[0-9a-f]+: 2ea17821 bf1cvtl v1.8h, v1.8b
+[ ]*[0-9a-f]+: 2ea17a30 bf1cvtl v16.8h, v17.8b
+[ ]*[0-9a-f]+: 2ee17800 bf2cvtl v0.8h, v0.8b
+[ ]*[0-9a-f]+: 2ee17801 bf2cvtl v1.8h, v0.8b
+[ ]*[0-9a-f]+: 2ee17820 bf2cvtl v0.8h, v1.8b
+[ ]*[0-9a-f]+: 2ee17821 bf2cvtl v1.8h, v1.8b
+[ ]*[0-9a-f]+: 2ee17a30 bf2cvtl v16.8h, v17.8b
+[ ]*[0-9a-f]+: 2e217800 f1cvtl v0.8h, v0.8b
+[ ]*[0-9a-f]+: 2e217801 f1cvtl v1.8h, v0.8b
+[ ]*[0-9a-f]+: 2e217820 f1cvtl v0.8h, v1.8b
+[ ]*[0-9a-f]+: 2e217821 f1cvtl v1.8h, v1.8b
+[ ]*[0-9a-f]+: 2e217a30 f1cvtl v16.8h, v17.8b
+[ ]*[0-9a-f]+: 2e617800 f2cvtl v0.8h, v0.8b
+[ ]*[0-9a-f]+: 2e617801 f2cvtl v1.8h, v0.8b
+[ ]*[0-9a-f]+: 2e617820 f2cvtl v0.8h, v1.8b
+[ ]*[0-9a-f]+: 2e617821 f2cvtl v1.8h, v1.8b
+[ ]*[0-9a-f]+: 2e617a30 f2cvtl v16.8h, v17.8b
+[ ]*[0-9a-f]+: 6ea17800 bf1cvtl2 v0.8h, v0.16b
+[ ]*[0-9a-f]+: 6ea17801 bf1cvtl2 v1.8h, v0.16b
+[ ]*[0-9a-f]+: 6ea17820 bf1cvtl2 v0.8h, v1.16b
+[ ]*[0-9a-f]+: 6ea17821 bf1cvtl2 v1.8h, v1.16b
+[ ]*[0-9a-f]+: 6ea17a30 bf1cvtl2 v16.8h, v17.16b
+[ ]*[0-9a-f]+: 6ee17800 bf2cvtl2 v0.8h, v0.16b
+[ ]*[0-9a-f]+: 6ee17801 bf2cvtl2 v1.8h, v0.16b
+[ ]*[0-9a-f]+: 6ee17820 bf2cvtl2 v0.8h, v1.16b
+[ ]*[0-9a-f]+: 6ee17821 bf2cvtl2 v1.8h, v1.16b
+[ ]*[0-9a-f]+: 6ee17a30 bf2cvtl2 v16.8h, v17.16b
+[ ]*[0-9a-f]+: 6e217800 f1cvtl2 v0.8h, v0.16b
+[ ]*[0-9a-f]+: 6e217801 f1cvtl2 v1.8h, v0.16b
+[ ]*[0-9a-f]+: 6e217820 f1cvtl2 v0.8h, v1.16b
+[ ]*[0-9a-f]+: 6e217821 f1cvtl2 v1.8h, v1.16b
+[ ]*[0-9a-f]+: 6e217a30 f1cvtl2 v16.8h, v17.16b
+[ ]*[0-9a-f]+: 6e617800 f2cvtl2 v0.8h, v0.16b
+[ ]*[0-9a-f]+: 6e617801 f2cvtl2 v1.8h, v0.16b
+[ ]*[0-9a-f]+: 6e617820 f2cvtl2 v0.8h, v1.16b
+[ ]*[0-9a-f]+: 6e617821 f2cvtl2 v1.8h, v1.16b
+[ ]*[0-9a-f]+: 6e617a30 f2cvtl2 v16.8h, v17.16b
+[ ]*[0-9a-f]+: 2ec03c00 fscale v0.4h, v0.4h, v0.4h
+[ ]*[0-9a-f]+: 2ec03c01 fscale v1.4h, v0.4h, v0.4h
+[ ]*[0-9a-f]+: 2ec03c20 fscale v0.4h, v1.4h, v0.4h
+[ ]*[0-9a-f]+: 2ec13c00 fscale v0.4h, v0.4h, v1.4h
+[ ]*[0-9a-f]+: 2ec03c21 fscale v1.4h, v1.4h, v0.4h
+[ ]*[0-9a-f]+: 2ec13c20 fscale v0.4h, v1.4h, v1.4h
+[ ]*[0-9a-f]+: 2ec13c21 fscale v1.4h, v1.4h, v1.4h
+[ ]*[0-9a-f]+: 2ed23e30 fscale v16.4h, v17.4h, v18.4h
+[ ]*[0-9a-f]+: 6ec03c00 fscale v0.8h, v0.8h, v0.8h
+[ ]*[0-9a-f]+: 6ec03c01 fscale v1.8h, v0.8h, v0.8h
+[ ]*[0-9a-f]+: 6ec03c20 fscale v0.8h, v1.8h, v0.8h
+[ ]*[0-9a-f]+: 6ec13c00 fscale v0.8h, v0.8h, v1.8h
+[ ]*[0-9a-f]+: 6ec03c21 fscale v1.8h, v1.8h, v0.8h
+[ ]*[0-9a-f]+: 6ec13c20 fscale v0.8h, v1.8h, v1.8h
+[ ]*[0-9a-f]+: 6ec13c21 fscale v1.8h, v1.8h, v1.8h
+[ ]*[0-9a-f]+: 6ed23e30 fscale v16.8h, v17.8h, v18.8h
+[ ]*[0-9a-f]+: 2ea0fc00 fscale v0.2s, v0.2s, v0.2s
+[ ]*[0-9a-f]+: 2ea0fc01 fscale v1.2s, v0.2s, v0.2s
+[ ]*[0-9a-f]+: 2ea0fc20 fscale v0.2s, v1.2s, v0.2s
+[ ]*[0-9a-f]+: 2ea1fc00 fscale v0.2s, v0.2s, v1.2s
+[ ]*[0-9a-f]+: 2ea0fc21 fscale v1.2s, v1.2s, v0.2s
+[ ]*[0-9a-f]+: 2ea1fc20 fscale v0.2s, v1.2s, v1.2s
+[ ]*[0-9a-f]+: 2ea1fc21 fscale v1.2s, v1.2s, v1.2s
+[ ]*[0-9a-f]+: 2eb2fe30 fscale v16.2s, v17.2s, v18.2s
+[ ]*[0-9a-f]+: 6ea0fc00 fscale v0.4s, v0.4s, v0.4s
+[ ]*[0-9a-f]+: 6ea0fc01 fscale v1.4s, v0.4s, v0.4s
+[ ]*[0-9a-f]+: 6ea0fc20 fscale v0.4s, v1.4s, v0.4s
+[ ]*[0-9a-f]+: 6ea1fc00 fscale v0.4s, v0.4s, v1.4s
+[ ]*[0-9a-f]+: 6ea0fc21 fscale v1.4s, v1.4s, v0.4s
+[ ]*[0-9a-f]+: 6ea1fc20 fscale v0.4s, v1.4s, v1.4s
+[ ]*[0-9a-f]+: 6ea1fc21 fscale v1.4s, v1.4s, v1.4s
+[ ]*[0-9a-f]+: 6eb2fe30 fscale v16.4s, v17.4s, v18.4s
+[ ]*[0-9a-f]+: 6ee0fc00 fscale v0.2d, v0.2d, v0.2d
+[ ]*[0-9a-f]+: 6ee0fc01 fscale v1.2d, v0.2d, v0.2d
+[ ]*[0-9a-f]+: 6ee0fc20 fscale v0.2d, v1.2d, v0.2d
+[ ]*[0-9a-f]+: 6ee1fc00 fscale v0.2d, v0.2d, v1.2d
+[ ]*[0-9a-f]+: 6ee0fc21 fscale v1.2d, v1.2d, v0.2d
+[ ]*[0-9a-f]+: 6ee1fc20 fscale v0.2d, v1.2d, v1.2d
+[ ]*[0-9a-f]+: 6ee1fc21 fscale v1.2d, v1.2d, v1.2d
+[ ]*[0-9a-f]+: 6ef2fe30 fscale v16.2d, v17.2d, v18.2d
+[ ]*[0-9a-f]+: 0e40f400 fcvtn v0.8b, v0.4h, v0.4h
+[ ]*[0-9a-f]+: 0e40f401 fcvtn v1.8b, v0.4h, v0.4h
+[ ]*[0-9a-f]+: 0e40f420 fcvtn v0.8b, v1.4h, v0.4h
+[ ]*[0-9a-f]+: 0e41f400 fcvtn v0.8b, v0.4h, v1.4h
+[ ]*[0-9a-f]+: 0e40f421 fcvtn v1.8b, v1.4h, v0.4h
+[ ]*[0-9a-f]+: 0e41f420 fcvtn v0.8b, v1.4h, v1.4h
+[ ]*[0-9a-f]+: 0e41f421 fcvtn v1.8b, v1.4h, v1.4h
+[ ]*[0-9a-f]+: 0e52f630 fcvtn v16.8b, v17.4h, v18.4h
+[ ]*[0-9a-f]+: 4e40f400 fcvtn v0.16b, v0.8h, v0.8h
+[ ]*[0-9a-f]+: 4e40f401 fcvtn v1.16b, v0.8h, v0.8h
+[ ]*[0-9a-f]+: 4e40f420 fcvtn v0.16b, v1.8h, v0.8h
+[ ]*[0-9a-f]+: 4e41f400 fcvtn v0.16b, v0.8h, v1.8h
+[ ]*[0-9a-f]+: 4e40f421 fcvtn v1.16b, v1.8h, v0.8h
+[ ]*[0-9a-f]+: 4e41f420 fcvtn v0.16b, v1.8h, v1.8h
+[ ]*[0-9a-f]+: 4e41f421 fcvtn v1.16b, v1.8h, v1.8h
+[ ]*[0-9a-f]+: 4e52f630 fcvtn v16.16b, v17.8h, v18.8h
+[ ]*[0-9a-f]+: 0e00f400 fcvtn v0.8b, v0.4s, v0.4s
+[ ]*[0-9a-f]+: 0e00f401 fcvtn v1.8b, v0.4s, v0.4s
+[ ]*[0-9a-f]+: 0e00f420 fcvtn v0.8b, v1.4s, v0.4s
+[ ]*[0-9a-f]+: 0e01f400 fcvtn v0.8b, v0.4s, v1.4s
+[ ]*[0-9a-f]+: 0e00f421 fcvtn v1.8b, v1.4s, v0.4s
+[ ]*[0-9a-f]+: 0e01f420 fcvtn v0.8b, v1.4s, v1.4s
+[ ]*[0-9a-f]+: 0e01f421 fcvtn v1.8b, v1.4s, v1.4s
+[ ]*[0-9a-f]+: 0e12f630 fcvtn v16.8b, v17.4s, v18.4s
+[ ]*[0-9a-f]+: 4e00f400 fcvtn2 v0.16b, v0.4s, v0.4s
+[ ]*[0-9a-f]+: 4e00f401 fcvtn2 v1.16b, v0.4s, v0.4s
+[ ]*[0-9a-f]+: 4e00f420 fcvtn2 v0.16b, v1.4s, v0.4s
+[ ]*[0-9a-f]+: 4e01f400 fcvtn2 v0.16b, v0.4s, v1.4s
+[ ]*[0-9a-f]+: 4e00f421 fcvtn2 v1.16b, v1.4s, v0.4s
+[ ]*[0-9a-f]+: 4e01f420 fcvtn2 v0.16b, v1.4s, v1.4s
+[ ]*[0-9a-f]+: 4e01f421 fcvtn2 v1.16b, v1.4s, v1.4s
+[ ]*[0-9a-f]+: 4e12f630 fcvtn2 v16.16b, v17.4s, v18.4s
new file mode 100644
@@ -0,0 +1,76 @@
+ /* advsimd-fp8.s Test file for AArch64 8-bit floating-point vector
+ instructions. */
+
+ /* Instructions convert the elements from the lower half of the source
+ vector while scaling the values by 2^-UInt(FPMR.LSCALE{2}[3:0]). */
+
+ .macro cvrt_lowerhalf, op
+ \op v0.8h, v0.8b
+ \op v1.8h, v0.8b
+ \op v0.8h, v1.8b
+ \op v1.8h, v1.8b
+ \op v16.8h, v17.8b
+ .endm
+
+ cvrt_lowerhalf bf1cvtl
+ cvrt_lowerhalf bf2cvtl
+ cvrt_lowerhalf f1cvtl
+ cvrt_lowerhalf f2cvtl
+
+ /* Instructions convert the elements from the upper half of the source
+ vector while scaling the values by 2^-UInt(FPMR.LSCALE{2}[3:0]). */
+
+ .macro cvrt_upperhalf, op
+ \op v0.8h, v0.16b
+ \op v1.8h, v0.16b
+ \op v0.8h, v1.16b
+ \op v1.8h, v1.16b
+ \op v16.8h, v17.16b
+ .endm
+
+ cvrt_upperhalf bf1cvtl2
+ cvrt_upperhalf bf2cvtl2
+ cvrt_upperhalf f1cvtl2
+ cvrt_upperhalf f2cvtl2
+
+ /* Floating-point adjust exponent by vector. */
+
+ .macro fscale_gen, op_var
+ fscale v0.\op_var, v0.\op_var, v0.\op_var
+ fscale v1.\op_var, v0.\op_var, v0.\op_var
+ fscale v0.\op_var, v1.\op_var, v0.\op_var
+ fscale v0.\op_var, v0.\op_var, v1.\op_var
+ fscale v1.\op_var, v1.\op_var, v0.\op_var
+ fscale v0.\op_var, v1.\op_var, v1.\op_var
+ fscale v1.\op_var, v1.\op_var, v1.\op_var
+ fscale v16.\op_var, v17.\op_var, v18.\op_var
+ .endm
+
+ /* Half-precision variant. */
+ fscale_gen 4h
+ fscale_gen 8h
+ /* Single-precision variant. */
+ fscale_gen 2s
+ fscale_gen 4s
+ fscale_gen 2d
+
+ /* Half and single-precision to FP8 convert and narrow. */
+
+ .macro fcvtn_to_fp8, op, sd, ss
+ \op v0.\sd, v0.\ss, v0.\ss
+ \op v1.\sd, v0.\ss, v0.\ss
+ \op v0.\sd, v1.\ss, v0.\ss
+ \op v0.\sd, v0.\ss, v1.\ss
+ \op v1.\sd, v1.\ss, v0.\ss
+ \op v0.\sd, v1.\ss, v1.\ss
+ \op v1.\sd, v1.\ss, v1.\ss
+ \op v16.\sd, v17.\ss, v18.\ss
+ .endm
+
+ /* Half-precision variant. */
+ fcvtn_to_fp8 fcvtn 8b, 4h
+ fcvtn_to_fp8 fcvtn 16b, 8h
+
+ /* Single-precision variant. */
+ fcvtn_to_fp8 fcvtn, 8b, 4s
+ fcvtn_to_fp8 fcvtn2, 16b, 4s
@@ -10334,7 +10334,7 @@ aarch64_opcode_lookup_1 (uint32_t word)
10987654321098765432109876543210
x0x11010000xxxxxxx1xxxxxxxxxxxxx
addpt. */
- return 3333;
+ return 3346;
}
else
{
@@ -10342,7 +10342,7 @@ aarch64_opcode_lookup_1 (uint32_t word)
10987654321098765432109876543210
x1x11010000xxxxxxx1xxxxxxxxxxxxx
subpt. */
- return 3334;
+ return 3347;
}
}
}
@@ -11260,7 +11260,7 @@ aarch64_opcode_lookup_1 (uint32_t word)
10987654321098765432109876543210
xxxx1011x11xxxxx0xxxxxxxxxxxxxxx
maddpt. */
- return 3335;
+ return 3348;
}
else
{
@@ -11268,7 +11268,7 @@ aarch64_opcode_lookup_1 (uint32_t word)
10987654321098765432109876543210
xxxx1011x11xxxxx1xxxxxxxxxxxxxxx
msubpt. */
- return 3336;
+ return 3349;
}
}
}
@@ -11353,7 +11353,7 @@ aarch64_opcode_lookup_1 (uint32_t word)
10987654321098765432109876543210
000001x0xx000100000xxxxxxxxxxxxx
addpt. */
- return 3337;
+ return 3350;
}
else
{
@@ -11460,7 +11460,7 @@ aarch64_opcode_lookup_1 (uint32_t word)
10987654321098765432109876543210
000001x0xx000101000xxxxxxxxxxxxx
subpt. */
- return 3339;
+ return 3352;
}
else
{
@@ -11665,7 +11665,7 @@ aarch64_opcode_lookup_1 (uint32_t word)
10987654321098765432109876543210
000001x0xx1xxxxx000010xxxxxxxxxx
addpt. */
- return 3338;
+ return 3351;
}
else
{
@@ -11706,7 +11706,7 @@ aarch64_opcode_lookup_1 (uint32_t word)
10987654321098765432109876543210
000001x0xx1xxxxx000011xxxxxxxxxx
subpt. */
- return 3340;
+ return 3353;
}
else
{
@@ -13364,7 +13364,7 @@ aarch64_opcode_lookup_1 (uint32_t word)
10987654321098765432109876543210
010001x0xx0xxxxx110100xxxxxxxxxx
mlapt. */
- return 3342;
+ return 3355;
}
}
else
@@ -13394,7 +13394,7 @@ aarch64_opcode_lookup_1 (uint32_t word)
10987654321098765432109876543210
010001x0xx0xxxxx110110xxxxxxxxxx
madpt. */
- return 3341;
+ return 3354;
}
}
}
@@ -25607,21 +25607,54 @@ aarch64_opcode_lookup_1 (uint32_t word)
}
else
{
- if (((word >> 23) & 0x1) == 0)
+ if (((word >> 14) & 0x1) == 0)
{
- /* 33222222222211111111110000000000
- 10987654321098765432109876543210
- 0x0011100x0xxxxxxx1101xxxxxxxxxx
- fmax. */
- return 302;
+ if (((word >> 23) & 0x1) == 0)
+ {
+ /* 33222222222211111111110000000000
+ 10987654321098765432109876543210
+ 0x0011100x0xxxxxx01101xxxxxxxxxx
+ fmax. */
+ return 302;
+ }
+ else
+ {
+ /* 33222222222211111111110000000000
+ 10987654321098765432109876543210
+ 0x0011101x0xxxxxx01101xxxxxxxxxx
+ fmin. */
+ return 314;
+ }
}
else
{
- /* 33222222222211111111110000000000
- 10987654321098765432109876543210
- 0x0011101x0xxxxxxx1101xxxxxxxxxx
- fmin. */
- return 314;
+ if (((word >> 22) & 0x1) == 0)
+ {
+ if (((word >> 30) & 0x1) == 0)
+ {
+ /* 33222222222211111111110000000000
+ 10987654321098765432109876543210
+ 00001110x00xxxxxx11101xxxxxxxxxx
+ fcvtn. */
+ return 3341;
+ }
+ else
+ {
+ /* 33222222222211111111110000000000
+ 10987654321098765432109876543210
+ 01001110x00xxxxxx11101xxxxxxxxxx
+ fcvtn2. */
+ return 3342;
+ }
+ }
+ else
+ {
+ /* 33222222222211111111110000000000
+ 10987654321098765432109876543210
+ 0x001110x10xxxxxx11101xxxxxxxxxx
+ fcvtn. */
+ return 3343;
+ }
}
}
}
@@ -26007,11 +26040,22 @@ aarch64_opcode_lookup_1 (uint32_t word)
}
else
{
- /* 33222222222211111111110000000000
- 10987654321098765432109876543210
- xx101110x10xxxxx0x1111xxxxxxxxxx
- fdiv. */
- return 355;
+ if (((word >> 23) & 0x1) == 0)
+ {
+ /* 33222222222211111111110000000000
+ 10987654321098765432109876543210
+ xx101110010xxxxx0x1111xxxxxxxxxx
+ fdiv. */
+ return 355;
+ }
+ else
+ {
+ /* 33222222222211111111110000000000
+ 10987654321098765432109876543210
+ xx101110110xxxxx0x1111xxxxxxxxxx
+ fscale. */
+ return 3344;
+ }
}
}
}
@@ -27382,11 +27426,99 @@ aarch64_opcode_lookup_1 (uint32_t word)
}
else
{
- /* 33222222222211111111110000000000
- 10987654321098765432109876543210
- xx101110xx1xxxxx011110xxxxxxxxxx
- sqneg. */
- return 218;
+ if (((word >> 16) & 0x1) == 0)
+ {
+ /* 33222222222211111111110000000000
+ 10987654321098765432109876543210
+ xx101110xx1xxxx0011110xxxxxxxxxx
+ sqneg. */
+ return 218;
+ }
+ else
+ {
+ if (((word >> 22) & 0x1) == 0)
+ {
+ if (((word >> 23) & 0x1) == 0)
+ {
+ if (((word >> 30) & 0x1) == 0)
+ {
+ /* 33222222222211111111110000000000
+ 10987654321098765432109876543210
+ x0101110001xxxx1011110xxxxxxxxxx
+ f1cvtl. */
+ return 3337;
+ }
+ else
+ {
+ /* 33222222222211111111110000000000
+ 10987654321098765432109876543210
+ x1101110001xxxx1011110xxxxxxxxxx
+ f1cvtl2. */
+ return 3338;
+ }
+ }
+ else
+ {
+ if (((word >> 30) & 0x1) == 0)
+ {
+ /* 33222222222211111111110000000000
+ 10987654321098765432109876543210
+ x0101110101xxxx1011110xxxxxxxxxx
+ bf1cvtl. */
+ return 3333;
+ }
+ else
+ {
+ /* 33222222222211111111110000000000
+ 10987654321098765432109876543210
+ x1101110101xxxx1011110xxxxxxxxxx
+ bf1cvtl2. */
+ return 3334;
+ }
+ }
+ }
+ else
+ {
+ if (((word >> 23) & 0x1) == 0)
+ {
+ if (((word >> 30) & 0x1) == 0)
+ {
+ /* 33222222222211111111110000000000
+ 10987654321098765432109876543210
+ x0101110011xxxx1011110xxxxxxxxxx
+ f2cvtl. */
+ return 3339;
+ }
+ else
+ {
+ /* 33222222222211111111110000000000
+ 10987654321098765432109876543210
+ x1101110011xxxx1011110xxxxxxxxxx
+ f2cvtl2. */
+ return 3340;
+ }
+ }
+ else
+ {
+ if (((word >> 30) & 0x1) == 0)
+ {
+ /* 33222222222211111111110000000000
+ 10987654321098765432109876543210
+ x0101110111xxxx1011110xxxxxxxxxx
+ bf2cvtl. */
+ return 3335;
+ }
+ else
+ {
+ /* 33222222222211111111110000000000
+ 10987654321098765432109876543210
+ x1101110111xxxx1011110xxxxxxxxxx
+ bf2cvtl2. */
+ return 3336;
+ }
+ }
+ }
+ }
}
}
}
@@ -29367,11 +29499,22 @@ aarch64_opcode_lookup_1 (uint32_t word)
}
else
{
- /* 33222222222211111111110000000000
- 10987654321098765432109876543210
- xxx011101x1xxxxx111111xxxxxxxxxx
- frsqrts. */
- return 315;
+ if (((word >> 29) & 0x1) == 0)
+ {
+ /* 33222222222211111111110000000000
+ 10987654321098765432109876543210
+ xx0011101x1xxxxx111111xxxxxxxxxx
+ frsqrts. */
+ return 315;
+ }
+ else
+ {
+ /* 33222222222211111111110000000000
+ 10987654321098765432109876543210
+ xx1011101x1xxxxx111111xxxxxxxxxx
+ fscale. */
+ return 3345;
+ }
}
}
}
@@ -2368,6 +2368,34 @@
QLF3(X,X,NIL), \
}
+#define QL_V3_BSS_LOWER \
+{ \
+ QLF3(V_8B, V_4S, V_4S), \
+}
+
+#define QL_V3_BSS_FULL \
+{ \
+ QLF3(V_16B, V_4S, V_4S), \
+}
+
+#define QL_V3_BHH \
+{ \
+ QLF3(V_8B, V_4H, V_4H), \
+ QLF3(V_16B, V_8H, V_8H), \
+}
+
+/* e.g. BF1CVTL <Vd>.8H, <Vn>.8B. */
+#define QL_V2FP8B8H \
+{ \
+ QLF2(V_8H, V_8B), \
+}
+
+/* e.g. BF1CVTL2 <Vd>.8H, <Vn>.16B. */
+#define QL_V28H16B \
+{ \
+ QLF2(V_8H, V_16B), \
+}
+
/* e.g. UDOT <Vd>.2S, <Vn>.8B, <Vm>.8B. */
#define QL_V3DOT \
{ \
@@ -6459,6 +6487,19 @@ const struct aarch64_opcode aarch64_opcode_table[] =
SVE2p1_INSNC("st2q",0xe4600000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt2, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("st3q",0xe4a00000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt3, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
SVE2p1_INSNC("st4q",0xe4e00000, 0xffe0e000, sve_misc, 0, OP3 (SME_Zt4, SVE_Pg3, SVE_ADDR_RR_LSL4), OP_SVE_QUU, 0, C_SCAN_MOVPRFX, 0),
+ FP8_INSN("bf1cvtl", 0x2ea17800, 0xfffffc00, asimdmisc, OP2 (Vd, Vn), QL_V2FP8B8H, 0),
+ FP8_INSN("bf1cvtl2", 0x6ea17800, 0xfffffc00, asimdmisc, OP2 (Vd, Vn), QL_V28H16B, 0),
+ FP8_INSN("bf2cvtl", 0x2ee17800, 0xfffffc00, asimdmisc, OP2 (Vd, Vn), QL_V2FP8B8H, 0),
+ FP8_INSN("bf2cvtl2", 0x6ee17800, 0xfffffc00, asimdmisc, OP2 (Vd, Vn), QL_V28H16B, 0),
+ FP8_INSN("f1cvtl", 0x2e217800, 0xfffffc00, asimdmisc, OP2 (Vd, Vn), QL_V2FP8B8H, 0),
+ FP8_INSN("f1cvtl2", 0x6e217800, 0xfffffc00, asimdmisc, OP2 (Vd, Vn), QL_V28H16B, 0),
+ FP8_INSN("f2cvtl", 0x2e617800, 0xfffffc00, asimdmisc, OP2 (Vd, Vn), QL_V2FP8B8H, 0),
+ FP8_INSN("f2cvtl2", 0x6e617800, 0xfffffc00, asimdmisc, OP2 (Vd, Vn), QL_V28H16B, 0),
+ FP8_INSN("fcvtn", 0xe00f400, 0xffe0fc00, asimdmisc, OP3 (Vd, Vn, Vm), QL_V3_BSS_LOWER, 0),
+ FP8_INSN("fcvtn2", 0x4e00f400, 0xffe0fc00, asimdmisc, OP3 (Vd, Vn, Vm), QL_V3_BSS_FULL, 0),
+ FP8_INSN("fcvtn", 0xe40f400, 0xbfe0fc00, asimdmisc, OP3 (Vd, Vn, Vm), QL_V3_BHH, F_SIZEQ),
+ FP8_INSN("fscale", 0x2ec03c00, 0xbfe0fc00, asimdmisc, OP3 (Vd, Vn, Vm), QL_VSHIFT_H, F_SIZEQ),
+ FP8_INSN("fscale", 0x2ea0fc00, 0xbfa0fc00, asimdmisc, OP3 (Vd, Vn, Vm), QL_V3SAMESD, F_SIZEQ),
/* Checked Pointer Arithmetic Instructions. */
CPA_INSN ("addpt", 0x9a002000, 0xffe0e000, aarch64_misc, OP3 (Rd_SP, Rn_SP, Rm_LSL), QL_I3SAMEX),