[pushed] aarch64: Avoid using mismatched ZERO ZA sizes
Checks
Commit Message
The svzero_mask_za intrinsic tried to use the shortest combination
of .b, .h, .s and .d tiles, allowing mixtures of sizes where necessary.
However, Iain S pointed out that LLVM instead requires the tiles to
have the same suffix. GAS supports both versions, so this patch
generates the LLVM-friendly form.
Tested on aarch64-linux-gnu & pushed.
Please revert the patch if it causes any problems.
Richard
gcc/
* config/aarch64/aarch64.cc (aarch64_output_sme_zero_za): Require
all tiles to have the same suffix.
gcc/testsuite/
* gcc.target/aarch64/sme/acle-asm/zero_mask_za.c (zero_mask_za_ab)
(zero_mask_za_d7, zero_mask_za_bf): Expect a list of .d tiles instead
of a mixture.
---
gcc/config/aarch64/aarch64.cc | 20 +++++++++++--------
.../aarch64/sme/acle-asm/zero_mask_za.c | 6 +++---
2 files changed, 15 insertions(+), 11 deletions(-)
@@ -13210,29 +13210,33 @@ aarch64_output_sme_zero_za (rtx mask)
/* The last entry in the list has the form "za7.d }", but that's the
same length as "za7.d, ". */
static char buffer[sizeof("zero\t{ ") + sizeof ("za7.d, ") * 8 + 1];
- unsigned int i = 0;
- i += snprintf (buffer + i, sizeof (buffer) - i, "zero\t");
- const char *prefix = "{ ";
for (auto &tile : tiles)
{
unsigned int tile_mask = tile.mask;
unsigned int tile_index = 0;
+ unsigned int i = snprintf (buffer, sizeof (buffer), "zero\t");
+ const char *prefix = "{ ";
+ auto remaining_mask = mask_val;
while (tile_mask < 0x100)
{
- if ((mask_val & tile_mask) == tile_mask)
+ if ((remaining_mask & tile_mask) == tile_mask)
{
i += snprintf (buffer + i, sizeof (buffer) - i, "%sza%d.%c",
prefix, tile_index, tile.letter);
prefix = ", ";
- mask_val &= ~tile_mask;
+ remaining_mask &= ~tile_mask;
}
tile_mask <<= 1;
tile_index += 1;
}
+ if (remaining_mask == 0)
+ {
+ gcc_assert (i + 3 <= sizeof (buffer));
+ snprintf (buffer + i, sizeof (buffer) - i, " }");
+ return buffer;
+ }
}
- gcc_assert (mask_val == 0 && i + 3 <= sizeof (buffer));
- snprintf (buffer + i, sizeof (buffer) - i, " }");
- return buffer;
+ gcc_unreachable ();
}
/* Return size in bits of an arithmetic operand which is shifted/scaled and
@@ -103,21 +103,21 @@ PROTO (zero_mask_za_aa, void, ()) { svzero_mask_za (0xaa); }
/*
** zero_mask_za_ab:
-** zero { za1\.h, za0\.d }
+** zero { za0\.d, za1\.d, za3\.d, za5\.d, za7\.d }
** ret
*/
PROTO (zero_mask_za_ab, void, ()) { svzero_mask_za (0xab); }
/*
** zero_mask_za_d7:
-** zero { za0\.h, za1\.d, za7\.d }
+** zero { za0\.d, za1\.d, za2\.d, za4\.d, za6\.d, za7\.d }
** ret
*/
PROTO (zero_mask_za_d7, void, ()) { svzero_mask_za (0xd7); }
/*
** zero_mask_za_bf:
-** zero { za1\.h, za0\.s, za2\.d }
+** zero { za0\.d, za1\.d, za2\.d, za3\.d, za4\.d, za5\.d, za7\.d }
** ret
*/
PROTO (zero_mask_za_bf, void, ()) { svzero_mask_za (0xbf); }