[v10,06/11] Handle variable-length vector types in store_constructor

Message ID 20260603131548.50668-7-chris.bazley@arm.com
State New
Headers
Series Extend BB SLP vectorization to use predicated tails |

Commit Message

Christopher Bazley June 3, 2026, 1:15 p.m. UTC
  When given a constructor of variable-length vector type, the
store_constructor function now builds a vector with the lower bound
of the number of subparts in the vector type and uses it to emit the
body of the kind of insn chosen by the convert_optab_handler function.
Previously, this function used a fallback path of calling
store_constructor_field upon discovering that the number of subparts
in the vector type was not a constant multiple of the number of
subparts in the element type.

For example, this allows GCC to generate the following AArch64 assembly
language output for the tail of a reduction in the slp_6 test:

uaddv d31, p6, z31.b
uaddv d27, p6, z27.b
uaddv d26, p6, z26.b
movi  d30, #0
insr  z30.b, b26
insr  z30.b, b27
insr  z30.b, b31
add   z25.b, z25.b, z30.b

instead of the following output (with predicated tails for basic block
SLP vectorization but without this change):

addvl  x0, sp, #2
movi   d0, #0
st1b   z0.b, p6, [sp, #2, mul vl]
uaddv  d27, p6, z27.b
uaddv  d26, p6, z26.b
uaddv  d25, p6, z25.b
str    b27, [x0]
addvl  x0, sp, #1
add    x0, x0, 1
ptrue  p7.b, vl3
ld1b   z0.b, p6/z, [sp, #2, mul vl]
st1b   z0.b, p6, [sp, #1, mul vl]
str    b26, [x0]
ld1b   z0.b, p6/z, [sp, #1, mul vl]
st1b   z0.b, p6, [sp]
str    b25, [sp, 2]
ld1b   z0.b, p6/z, [sp]
add    z28.b, z28.b, z0.b
st1b   z28.b, p7, [x1]
addvl  sp, sp, #3

or the original assembly language output (with neither predicated tails
for basic block SLP vectorization nor this change):

uaddv  d31, p6, z31.b
fmov   x0, d31
uaddv  d31, p6, z26.b
add    w6, w6, w0
fmov   x0, d31
uaddv  d31, p6, z27.b
add    w5, w5, w0
fmov   x0, d31
add    w4, w4, w0

gcc/ChangeLog:

	* expr.cc (store_constructor): Add an else block to handle
	cases of TREE_CODE (TREE_TYPE (exp)) == VECTOR_TYPE in which
	exact_div (n_elts, GET_MODE_NUNITS (eltmode)).is_constant
	(&const_n_elts) is false similar to the existing "element type
	is not a vector type" case except that const_n_elts is taken
	from the lower bound of the subparts of the vector type.
---
 gcc/expr.cc | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)
  

Patch

diff --git a/gcc/expr.cc b/gcc/expr.cc
index de73215ccc6..f163ea16fe2 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -7498,11 +7498,14 @@  fields_length (const_tree type)
   return count;
 }
 
-
 /* Store the value of constructor EXP into the rtx TARGET.
    TARGET is either a REG or a MEM; we know it cannot conflict, since
    safe_from_p has been called.
    CLEARED is true if TARGET is known to have been zero'd.
+   If the constructor EXP has a vector type then elements of TARGET for which
+   there is no corresponding element in EXP are zero'd.  For a variable-length
+   vector type, elements beyond the minimum number of subparts of the type are
+   not zero'd.
    SIZE is the number of bytes of TARGET we are allowed to modify: this
    may not be the same as the size of EXP if we are assigning to a field
    which has been packed to exclude padding bits.
@@ -8075,7 +8078,12 @@  store_constructor (tree exp, rtx target, int cleared, poly_int64 size,
 		   similarly non-const type vectors. */
 		icode = convert_optab_handler (vec_init_optab, mode, eltmode);
 	      }
-
+	    else
+	      {
+		/* Handle variable-length vector types.  */
+		icode = convert_optab_handler (vec_init_optab, mode, eltmode);
+		const_n_elts = constant_lower_bound (n_elts);
+	      }
 	  if (const_n_elts && icode != CODE_FOR_nothing)
 	    {
 	      vector = rtvec_alloc (const_n_elts);