There is inconsistency regarding whether or not +sme implies +sve2 and
whether +nosve2 implies +nosme. In particular, GCC 14 assumes the
dependency exists, and canonicalises target strings accordingly, whereas
LLVM treats the features as independent.
This patch removes the positive implication while retaining the negative
implication. This is the more permissive choice in each case, and
allows us to support target strings written with either interpretation
in mind.
This reduces our ability to detect invalid instructions, but we already
can't rely on this detection because gas doesn't know whether functions
might be executed in streaming mode and/or non-streaming mode.
The aarch64_feature_enable_set change is functionally redundant within
this patch. It is included because the longer term intention is to
instead remove the workaround in aarch64_parse_features, once the
internal feature checks have been modified to support having both
AARCH64_FEATURE_SME set and AARCH64_FEATURE_SVE unset.
Similarly, the dependency from +sme to +fp16 is currently redundant, but
this redundancy relies upon an incorrect dependency from +fcma to +fp16.
This can be fixed in the future, but it might require modifying internal
feature checks for a few FCMA instructions, so it's left unchanged for
now.
@@ -10715,7 +10715,7 @@ static const struct aarch64_option_cpu_value_table aarch64_features[] = {
{"sve2-bitperm", AARCH64_FEATURE (SVE2_BITPERM),
AARCH64_FEATURE (SVE2)},
{"sme", AARCH64_FEATURE (SME),
- AARCH64_FEATURES (2, SVE2, BFLOAT16)},
+ AARCH64_FEATURES (3, BFLOAT16, F16, COMPNUM)},
{"sme-f64", AARCH64_FEATURE (SME_F64F64), AARCH64_FEATURE (SME)},
{"sme-f64f64", AARCH64_FEATURE (SME_F64F64), AARCH64_FEATURE (SME)},
{"sme-i64", AARCH64_FEATURE (SME_I16I64), AARCH64_FEATURE (SME)},
@@ -10837,6 +10837,13 @@ aarch64_feature_enable_set (aarch64_feature_set set)
for (opt = aarch64_features; opt->name != NULL; opt++)
if (AARCH64_CPU_HAS_ALL_FEATURES (set, opt->value))
AARCH64_MERGE_FEATURE_SETS (set, set, opt->require);
+ /* As a special case, we want +sme to imply +sve2, without letting
+ +nosve2 imply +nosme. This is to ensure maximum compatibility with
+ both toolchains that assume this dependency and those that don't. */
+ aarch64_feature_set sme = AARCH64_FEATURE (SME);
+ aarch64_feature_set sve2 = AARCH64_FEATURE (SVE2);
+ if (AARCH64_CPU_HAS_ALL_FEATURES (set, sme))
+ AARCH64_MERGE_FEATURE_SETS (set, set, sve2);
}
return set;
}
@@ -10941,6 +10948,20 @@ aarch64_parse_features (const char *str, const aarch64_feature_set **opt_p,
str = ext;
};
+ /* The special handling in aarch64_feature_enable_set ought to be sufficient
+ to accommodate uncertainty over whether or not +sme in a target string
+ implies +sve2. Unfortunately, many streaming SVE instructions are
+ currently marked as requiring SVE or SVE2, and some parsing and error
+ reporting decisions also depend on SVE or SVE2 being specified. So for
+ now we will reenable the SVE and SVE2 bits if SME is enabled. This allows
+ us to support, for example, a compiler passing the command line
+ `-march=armv9-a+sme+nosve` and expecting all SME instructions to remain
+ enabled. */
+ aarch64_feature_set sme = AARCH64_FEATURE (SME);
+ aarch64_feature_set sve_sve2 = AARCH64_FEATURES (2, SVE, SVE2);
+ if (AARCH64_CPU_HAS_ALL_FEATURES (*ext_set, sme))
+ AARCH64_MERGE_FEATURE_SETS (*ext_set, *ext_set, sve_sve2);
+
*ext_set = aarch64_update_virtual_dependencies (*ext_set);
return 1;
}
@@ -267,8 +267,8 @@ automatically cause those extensions to be disabled.
@tab Enable Advanced SIMD extensions.
@item @code{sm4} @tab @code{simd}
@tab Enable the SM3 and SM4 cryptographic extensions.
-@item @code{sme} @tab @code{sve2}, @code{bf16}
- @tab Enable the Scalable Matrix Extension.
+@item @code{sme} @tab @code{bf16}, @code{fp16}, @code{fcma}
+ @tab Enable the Scalable Matrix Extension. This will also enable @code{sve2}, but disabling @code{sve2} does not disable @code{sme}.
@item @code{sme-b16b16} @tab @code{sme2}, @code{sve-b16b16}
@tab Enable SME ZA-targeting non-widening BFloat16 instructions.
@item @code{sme-f8f16} @tab @code{sme2}, @code{fp8}
new file mode 100644
@@ -0,0 +1,15 @@
+#as: -march=armv8-a+sme
+#as: -march=armv8-a+sme+nosve
+#as: -march=armv8-a+sme+nosve2
+#as: -march=armv9-a+sme+nosve
+#objdump: -dr
+
+.*: file format .*
+
+
+Disassembly of section \.text:
+
+0+ <\.text>:
+ *[0-9a-f]+: 04d6a441 abs z1\.d, p1/m, z2\.d
+ *[0-9a-f]+: 4503d041 adclb z1\.s, z2\.s, z3\.s
+ *[0-9a-f]+: c0904460 addha za0\.s, p1/m, p2/m, z3\.s
new file mode 100644
@@ -0,0 +1,3 @@
+abs z1.d, p1/m, z2.d
+adclb z1.s, z2.s, z3.s
+addha za0.s, p1/m, p2/m, z3.s