[v2,1/5] aarch64: Add support for FEAT_SVE_B16B16 feature.

Message ID 20240719121438.2929286-2-srinath.parvathaneni@arm.com
State Superseded
Headers
Series aarch64: Add support for sme2 and sve2 BFloat16 feature. |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 success Test passed
linaro-tcwg-bot/tcwg_binutils_check--master-arm success Test passed

Commit Message

Srinath Parvathaneni July 19, 2024, 12:14 p.m. UTC
  In the current code, SVE2 Bfloat16 instructions are implemented with tick
FEAT_B16B16 and command line flag "+b16b16" and this feature was suspended
due to incomplete support.

In the new spec available here[1], FEAT_B16B16 is replaced with
FEAT_SVE_B16B16 and command line flag "+b16b16" is replace with "sve-b16b16".

This patch supports the SVE Z-targeting non-widening BFloat16 instructions
with command line flag "+sve-b16b16+sve2".
[1]: https://developer.arm.com/documentation/ddi0602/2024-06/SVE-Instructions?lang=en
---
 gas/config/tc-aarch64.c                       |  5 +-
 .../gas/aarch64/bfloat16-1-invalid.d          |  2 +-
 gas/testsuite/gas/aarch64/bfloat16-1.d        |  3 +-
 .../gas/aarch64/bfloat16-2-invalid.d          |  3 +-
 include/opcode/aarch64.h                      |  6 ++-
 opcodes/aarch64-tbl.h                         | 48 +++++++++----------
 6 files changed, 33 insertions(+), 34 deletions(-)
  

Patch

diff --git a/gas/config/tc-aarch64.c b/gas/config/tc-aarch64.c
index e94a0cff406..4249374cf64 100644
--- a/gas/config/tc-aarch64.c
+++ b/gas/config/tc-aarch64.c
@@ -10721,9 +10721,7 @@  static const struct aarch64_option_cpu_value_table aarch64_features[] = {
   {"rasv2",		AARCH64_FEATURE (RASv2), AARCH64_FEATURE (RAS)},
   {"ite",		AARCH64_FEATURE (ITE), AARCH64_NO_FEATURES},
   {"d128",		AARCH64_FEATURE (D128), D128_FEATURE_DEPS},
-  // Feature b16b16 is currently incomplete.
-  // TODO: finish implementation and enable relevant flags.
-  //{"b16b16",		AARCH64_FEATURE (B16B16), AARCH64_FEATURE (SVE2)},
+  {"sve-b16b16",	AARCH64_FEATURE (SVE_B16B16), AARCH64_NO_FEATURES},
   {"sme2p1",		AARCH64_FEATURE (SME2p1), AARCH64_FEATURE (SME2)},
   {"sve2p1",		AARCH64_FEATURE (SVE2p1), AARCH64_FEATURE (SVE2)},
   {"rcpc3",		AARCH64_FEATURE (RCPC3), AARCH64_FEATURE (RCPC2)},
@@ -10766,6 +10764,7 @@  static const struct aarch64_virtual_dependency_table aarch64_dependencies[] = {
   {AARCH64_FEATURE (SSVE_FP8DOT2), AARCH64_FEATURE (FP8DOT2_SVE)},
   /* TODO: Add SME_F16F16->SME_F16F16_F8F16 when SME_F16F16 is added.  */
   {AARCH64_FEATURE (SME_F8F16), AARCH64_FEATURE (SME_F16F16_F8F16)},
+  {AARCH64_FEATURES (2, SVE_B16B16, SVE2), AARCH64_FEATURE (SVE_SVE2_B16B16)},
 };
 
 static aarch64_feature_set
diff --git a/gas/testsuite/gas/aarch64/bfloat16-1-invalid.d b/gas/testsuite/gas/aarch64/bfloat16-1-invalid.d
index 531a59717c8..a5cd4479b6e 100644
--- a/gas/testsuite/gas/aarch64/bfloat16-1-invalid.d
+++ b/gas/testsuite/gas/aarch64/bfloat16-1-invalid.d
@@ -1,4 +1,4 @@ 
-#name: Negative test with missing +b16b16 flag.
+#name: Negative test with missing +sve-b16b16 flag.
 #as: -march=armv9.4-a
 #source: bfloat16-1-invalid.s
 #error_output: bfloat16-1-invalid.l
diff --git a/gas/testsuite/gas/aarch64/bfloat16-1.d b/gas/testsuite/gas/aarch64/bfloat16-1.d
index 68a47f5afc5..5581b4ef11e 100644
--- a/gas/testsuite/gas/aarch64/bfloat16-1.d
+++ b/gas/testsuite/gas/aarch64/bfloat16-1.d
@@ -1,7 +1,6 @@ 
 #name: Test of SVE2.1 and SME2.1 non-widening BFloat16 instructions.
-#as: -march=armv9.4-a+b16b16
+#as: -march=armv9.4-a+sve-b16b16
 #objdump: -dr
-#xfail: *-*-*
 
 [^:]+:     file format .*
 
diff --git a/gas/testsuite/gas/aarch64/bfloat16-2-invalid.d b/gas/testsuite/gas/aarch64/bfloat16-2-invalid.d
index 1e1b701c0e3..e939878ccbe 100644
--- a/gas/testsuite/gas/aarch64/bfloat16-2-invalid.d
+++ b/gas/testsuite/gas/aarch64/bfloat16-2-invalid.d
@@ -1,5 +1,4 @@ 
 #name: Test Bfloat16 instructions with wrong operand combinations
-#as: -march=armv9.4-a+b16b16
+#as: -march=armv9.4-a+sve-b16b16
 #source: bfloat16-2-invalid.s
 #error_output: bfloat16-2-invalid.l
-#xfail: *-*-*
diff --git a/include/opcode/aarch64.h b/include/opcode/aarch64.h
index bc779c973d6..72b48183882 100644
--- a/include/opcode/aarch64.h
+++ b/include/opcode/aarch64.h
@@ -226,8 +226,6 @@  enum aarch64_feature_bit {
   AARCH64_FEATURE_SPMU2,
   /* Performance Monitors Synchronous-Exception-Based Event Extension.  */
   AARCH64_FEATURE_SEBEP,
-  /* SVE2.1 and SME2.1 non-widening BFloat16 instructions.  */
-  AARCH64_FEATURE_B16B16,
   /* SME2.1 instructions.  */
   AARCH64_FEATURE_SME2p1,
   /* SVE2.1 instructions.  */
@@ -264,6 +262,8 @@  enum aarch64_feature_bit {
   AARCH64_FEATURE_SME_F8F32,
   /* SME F8F16 instructions.  */
   AARCH64_FEATURE_SME_F8F16,
+  /* SVE2 non-widening BFloat16 instructions.  */
+  AARCH64_FEATURE_SVE_B16B16,
 
   /* Virtual features.  These are used to gate instructions that are enabled
      by either of two (or more) sets of command line flags.  */
@@ -277,6 +277,8 @@  enum aarch64_feature_bit {
   AARCH64_FEATURE_SME_F16F16_F8F16,
   /* Armv9.5-A processors.  */
   AARCH64_FEATURE_V9_5A,
+  /* +sve-b16b16+sve2.  */
+  AARCH64_FEATURE_SVE_SVE2_B16B16,
   AARCH64_NUM_FEATURES
 };
 
diff --git a/opcodes/aarch64-tbl.h b/opcodes/aarch64-tbl.h
index 172c9a44845..ea30fb31ba5 100644
--- a/opcodes/aarch64-tbl.h
+++ b/opcodes/aarch64-tbl.h
@@ -2811,8 +2811,8 @@  static const aarch64_feature_set aarch64_feature_the =
   AARCH64_FEATURE (THE);
 static const aarch64_feature_set aarch64_feature_d128_the =
   AARCH64_FEATURES (2, D128, THE);
-static const aarch64_feature_set aarch64_feature_b16b16_sve2 =
-  AARCH64_FEATURES (2, B16B16, SVE2);
+static const aarch64_feature_set aarch64_feature_sve_sve2_b16b16 =
+  AARCH64_FEATURES (3, SVE_B16B16, SVE2, SVE_SVE2_B16B16);
 static const aarch64_feature_set aarch64_feature_sme2p1 =
   AARCH64_FEATURE (SME2p1);
 static const aarch64_feature_set aarch64_feature_sve2p1 =
@@ -2923,7 +2923,7 @@  static const aarch64_feature_set aarch64_feature_sme_f16f16_f8f16 =
 #define D128	  &aarch64_feature_d128
 #define THE	  &aarch64_feature_the
 #define D128_THE  &aarch64_feature_d128_the
-#define B16B16_SVE2  &aarch64_feature_b16b16_sve2
+#define SVE_SVE2_B16B16  &aarch64_feature_sve_sve2_b16b16
 #define SME2p1  &aarch64_feature_sme2p1
 #define SVE2p1  &aarch64_feature_sve2p1
 #define RCPC3	  &aarch64_feature_rcpc3
@@ -3020,11 +3020,11 @@  static const aarch64_feature_set aarch64_feature_sme_f16f16_f8f16 =
 #define SVE2_INSNC(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS,CONSTRAINTS,TIED) \
   { NAME, OPCODE, MASK, CLASS, OP, SVE2, OPS, QUALS, \
     FLAGS | F_STRICT, CONSTRAINTS, TIED, NULL }
-#define B16B16_SVE2_INSN(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS,TIED) \
-  { NAME, OPCODE, MASK, CLASS, OP, B16B16_SVE2, OPS, QUALS, \
+#define SVE_SVE2_B16B16_INSN(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS,TIED) \
+  { NAME, OPCODE, MASK, CLASS, OP, SVE_SVE2_B16B16, OPS, QUALS, \
     FLAGS | F_STRICT, 0, TIED, NULL }
-#define B16B16_SVE2_INSNC(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS,CONSTRAINTS,TIED) \
-  { NAME, OPCODE, MASK, CLASS, OP, B16B16_SVE2, OPS, QUALS, \
+#define SVE_SVE2_B16B16_INSNC(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS,CONSTRAINTS,TIED) \
+  { NAME, OPCODE, MASK, CLASS, OP, SVE_SVE2_B16B16, OPS, QUALS, \
     FLAGS | F_STRICT, CONSTRAINTS, TIED, NULL }
 #define SVE2p1_INSN(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS,TIED) \
   { NAME, OPCODE, MASK, CLASS, OP, SVE2p1, OPS, QUALS, \
@@ -6625,23 +6625,23 @@  const struct aarch64_opcode aarch64_opcode_table[] =
   D128_THE_INSN("rcwsswppal", 0x59e0a000, 0xffe0fc00, OP3 (Rt, Rs, ADDR_SIMPLE), QL_X2NIL, 0),
   D128_THE_INSN("rcwsswppl", 0x5960a000, 0xffe0fc00, OP3 (Rt, Rs, ADDR_SIMPLE), QL_X2NIL, 0),
 
-/* BFloat16 SVE Instructions.  */
-  B16B16_SVE2_INSNC("bfadd", 0x65008000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
-  B16B16_SVE2_INSNC("bfmax", 0x65068000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
-  B16B16_SVE2_INSNC("bfmaxnm", 0x65048000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
-  B16B16_SVE2_INSNC("bfmin", 0x65078000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
-  B16B16_SVE2_INSNC("bfminnm", 0x65058000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
-  B16B16_SVE2_INSNC("bfmla", 0x65200000, 0xffe0e000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zn, SVE_Zm_16), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
-  B16B16_SVE2_INSNC("bfmls", 0x65202000, 0xffe0e000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zn, SVE_Zm_16), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
-  B16B16_SVE2_INSNC("bfmul", 0x65028000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
-  B16B16_SVE2_INSNC("bfsub", 0x65018000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
-  B16B16_SVE2_INSNC("bfclamp", 0x64202400, 0xffe0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0, C_SCAN_MOVPRFX, 0),
-  B16B16_SVE2_INSNC("bfmla", 0x64200800, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_22_INDEX), OP_SVE_VVV_H, 0, C_SCAN_MOVPRFX, 0),
-  B16B16_SVE2_INSNC("bfmls", 0x64200c00, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_22_INDEX), OP_SVE_VVV_H, 0, C_SCAN_MOVPRFX, 0),
-  B16B16_SVE2_INSN("bfadd", 0x65000000, 0xffe0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0, 0),
-  B16B16_SVE2_INSN("bfmul", 0x65000800, 0xffe0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0, 0),
-  B16B16_SVE2_INSN("bfsub", 0x65000400, 0xffe0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0, 0),
-  B16B16_SVE2_INSN("bfmul", 0x64202800, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_22_INDEX), OP_SVE_VVV_H, 0, 0),
+/* SVE Z-targeting non-widening BFloat16 instructions.  */
+  SVE_SVE2_B16B16_INSNC("bfadd", 0x65008000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
+  SVE_SVE2_B16B16_INSNC("bfmax", 0x65068000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
+  SVE_SVE2_B16B16_INSNC("bfmaxnm", 0x65048000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
+  SVE_SVE2_B16B16_INSNC("bfmin", 0x65078000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
+  SVE_SVE2_B16B16_INSNC("bfminnm", 0x65058000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
+  SVE_SVE2_B16B16_INSNC("bfmla", 0x65200000, 0xffe0e000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zn, SVE_Zm_16), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
+  SVE_SVE2_B16B16_INSNC("bfmls", 0x65202000, 0xffe0e000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zn, SVE_Zm_16), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 0),
+  SVE_SVE2_B16B16_INSNC("bfmul", 0x65028000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
+  SVE_SVE2_B16B16_INSNC("bfsub", 0x65018000, 0xffffe000, sve_misc, 0, OP4 (SVE_Zd, SVE_Pg3, SVE_Zd, SVE_Zm_5), OP_SVE_SMSS, 0, C_SCAN_MOVPRFX, 2),
+  SVE_SVE2_B16B16_INSNC("bfclamp", 0x64202400, 0xffe0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0, C_SCAN_MOVPRFX, 0),
+  SVE_SVE2_B16B16_INSNC("bfmla", 0x64200800, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_22_INDEX), OP_SVE_VVV_H, 0, C_SCAN_MOVPRFX, 0),
+  SVE_SVE2_B16B16_INSNC("bfmls", 0x64200c00, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_22_INDEX), OP_SVE_VVV_H, 0, C_SCAN_MOVPRFX, 0),
+  SVE_SVE2_B16B16_INSN("bfadd", 0x65000000, 0xffe0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0, 0),
+  SVE_SVE2_B16B16_INSN("bfmul", 0x65000800, 0xffe0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0, 0),
+  SVE_SVE2_B16B16_INSN("bfsub", 0x65000400, 0xffe0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_HHH, 0, 0),
+  SVE_SVE2_B16B16_INSN("bfmul", 0x64202800, 0xffa0fc00, sve_misc, 0, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_22_INDEX), OP_SVE_VVV_H, 0, 0),
 
 /* SME2.1 movaz instructions.  */
   SME2p1_INSN ("movaz", 0xc0060600, 0xffff1f83, sme2_movaz, 0, OP2 (SME_Zdnx4, SME_ZA_array_vrsb_2), OP_SVE_BB, 0, 0),