AArch64 Fix the AAPCs for new partial and full SIMD structure types [PR103094]

Message ID patch-15150-tamar@arm.com
State Committed
Headers
Series AArch64 Fix the AAPCs for new partial and full SIMD structure types [PR103094] |

Commit Message

Tamar Christina Dec. 14, 2021, 9:40 a.m. UTC
  Hi All,

The new partial and full vector types added to AArch64, e.g.

int8x8x2_t with mode V2x8QI are incorrectly being defined as being short
vectors and not being composite types.

This causes the layout code to incorrectly conclude that the registers are
packed. i.e. for V2x8QI it thinks those 16-bytes are in the same registers.

Because of this the code under !aarch64_composite_type_p is unreachable but also
lacked any extra checks to see that nregs is what we expected it to be.

I have also updated aarch64_advsimd_full_struct_mode_p and 
aarch64_advsimd_partial_struct_mode_p to only consider vector types as struct
modes.  Otherwise types such as OImode and friends would qualify leading to
incorrect results.

This patch fixes up the issues and we now generate correct code.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar



gcc/ChangeLog:

	PR target/103094
	* config/aarch64/aarch64.c (aarch64_function_value, aarch64_layout_arg):
	Fix unreachable code for partial vectors and re-order switch to perform
	the simplest test first.
	(aarch64_short_vector_p): Mark as not short vectors.
	(aarch64_composite_type_p): Mark as composite types.
	(aarch64_advsimd_partial_struct_mode_p,
	aarch64_advsimd_full_struct_mode_p): Restrict to actual SIMD types.

gcc/testsuite/ChangeLog:

	PR target/103094
	* gcc.target/aarch64/pr103094.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index fdf05505846721b02059df494d6395ae9423a8ef..d9104ddac3cdd44f7c2290b8725d05be4fd6468f 100644


--
  

Comments

Richard Sandiford Dec. 14, 2021, 12:37 p.m. UTC | #1
Tamar Christina <tamar.christina@arm.com> writes:
> Hi All,
>
> The new partial and full vector types added to AArch64, e.g.
>
> int8x8x2_t with mode V2x8QI are incorrectly being defined as being short
> vectors and not being composite types.
>
> This causes the layout code to incorrectly conclude that the registers are
> packed. i.e. for V2x8QI it thinks those 16-bytes are in the same registers.
>
> Because of this the code under !aarch64_composite_type_p is unreachable but also
> lacked any extra checks to see that nregs is what we expected it to be.
>
> I have also updated aarch64_advsimd_full_struct_mode_p and 
> aarch64_advsimd_partial_struct_mode_p to only consider vector types as struct
> modes.  Otherwise types such as OImode and friends would qualify leading to
> incorrect results.

How easy would it be to fix the bug without doing this last bit?
The idea was that OI, CI and XI should continue to be structure
modes until we remove them.  aarch64_advsimd_partial_struct_mode_p
and aarch64_advsimd_full_struct_mode_p are meant to be convenience
wrappers and so they shouldn't make different decisions from the
underlying aarch64_classify_vector_mode.

>
> This patch fixes up the issues and we now generate correct code.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
>
>
> gcc/ChangeLog:
>
> 	PR target/103094
> 	* config/aarch64/aarch64.c (aarch64_function_value, aarch64_layout_arg):
> 	Fix unreachable code for partial vectors and re-order switch to perform
> 	the simplest test first.
> 	(aarch64_short_vector_p): Mark as not short vectors.
> 	(aarch64_composite_type_p): Mark as composite types.
> 	(aarch64_advsimd_partial_struct_mode_p,
> 	aarch64_advsimd_full_struct_mode_p): Restrict to actual SIMD types.
>
> gcc/testsuite/ChangeLog:
>
> 	PR target/103094
> 	* gcc.target/aarch64/pr103094.c: New test.
>
> --- inline copy of patch -- 
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index fdf05505846721b02059df494d6395ae9423a8ef..d9104ddac3cdd44f7c2290b8725d05be4fd6468f 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -3055,15 +3055,17 @@ aarch64_advsimd_struct_mode_p (machine_mode mode)
>  static bool
>  aarch64_advsimd_partial_struct_mode_p (machine_mode mode)
>  {
> -  return (aarch64_classify_vector_mode (mode)
> -	  == (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL));
> +  return VECTOR_MODE_P (mode)
> +	 && (aarch64_classify_vector_mode (mode)
> +		== (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL));
>  }
>  
>  /* Return true if MODE is an Advanced SIMD Q-register structure mode.  */
>  static bool
>  aarch64_advsimd_full_struct_mode_p (machine_mode mode)
>  {
> -  return (aarch64_classify_vector_mode (mode) == (VEC_ADVSIMD | VEC_STRUCT));
> +  return VECTOR_MODE_P (mode)
> +	 && (aarch64_classify_vector_mode (mode) == (VEC_ADVSIMD | VEC_STRUCT));
>  }
>  
>  /* Return true if MODE is any of the data vector modes, including
> @@ -6468,17 +6470,21 @@ aarch64_function_value (const_tree type, const_tree func,
>  					       NULL, false))
>      {
>        gcc_assert (!sve_p);
> -      if (!aarch64_composite_type_p (type, mode))
> +      if (aarch64_advsimd_full_struct_mode_p (mode))
> +	{
> +	  gcc_assert (known_eq (exact_div (GET_MODE_SIZE (mode), 16), count));
> +	  return gen_rtx_REG (mode, V0_REGNUM);
> +	}
> +      else if (aarch64_advsimd_partial_struct_mode_p (mode))
> +	{
> +	  gcc_assert (known_eq (exact_div (GET_MODE_SIZE (mode), 8), count));
> +	  return gen_rtx_REG (mode, V0_REGNUM);
> +	}
> +      else if (!aarch64_composite_type_p (type, mode))
>  	{
>  	  gcc_assert (count == 1 && mode == ag_mode);
>  	  return gen_rtx_REG (mode, V0_REGNUM);
>  	}
> -      else if (aarch64_advsimd_full_struct_mode_p (mode)
> -	       && known_eq (GET_MODE_SIZE (ag_mode), 16))
> -	return gen_rtx_REG (mode, V0_REGNUM);
> -      else if (aarch64_advsimd_partial_struct_mode_p (mode)
> -	       && known_eq (GET_MODE_SIZE (ag_mode), 8))
> -	return gen_rtx_REG (mode, V0_REGNUM);
>        else
>  	{
>  	  int i;
> @@ -6745,6 +6751,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
>      /* No frontends can create types with variable-sized modes, so we
>         shouldn't be asked to pass or return them.  */
>      size = GET_MODE_SIZE (mode).to_constant ();
> +
>    size = ROUND_UP (size, UNITS_PER_WORD);
>  
>    allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
> @@ -6769,17 +6776,21 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
>        if (nvrn + nregs <= NUM_FP_ARG_REGS)
>  	{
>  	  pcum->aapcs_nextnvrn = nvrn + nregs;
> -	  if (!aarch64_composite_type_p (type, mode))
> +	  if (aarch64_advsimd_full_struct_mode_p (mode))
> +	    {
> +	      gcc_assert (nregs == size / 16);
> +	      pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
> +	    }
> +	  else if (aarch64_advsimd_partial_struct_mode_p (mode))
> +	    {
> +	      gcc_assert (nregs == size / 8);
> +	      pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
> +	    }
> +	  else if (!aarch64_composite_type_p (type, mode))
>  	    {
>  	      gcc_assert (nregs == 1);
>  	      pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
>  	    }
> -	  else if (aarch64_advsimd_full_struct_mode_p (mode)
> -		   && known_eq (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), 16))
> -	    pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
> -	  else if (aarch64_advsimd_partial_struct_mode_p (mode)
> -		   && known_eq (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), 8))
> -	    pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
>  	  else
>  	    {
>  	      rtx par;
> @@ -19285,6 +19296,13 @@ aarch64_short_vector_p (const_tree type,
>        else
>  	size = GET_MODE_SIZE (mode);
>      }
> +
> +  /* If a Advanced SIMD partial or full aggregate vector type we aren't a short
> +     type.  */
> +  if (aarch64_advsimd_partial_struct_mode_p (mode)
> +      || aarch64_advsimd_full_struct_mode_p (mode))
> +    return false;
> +
>    if (known_eq (size, 8) || known_eq (size, 16))
>      {
>        /* 64-bit and 128-bit vectors should only acquire an SVE mode if

I think the bug here is that we trust the mode even if we're
given a conflicting type.  In principle it would be OK to use,
say, V4SI for an array of 4 ints, but that shouldn't suddenly
make aarch64_short_vector_p true.

Unfortunately that ship has sailed, so we e.g. treat:

  struct wrapper { int32x4_t x; int :0; };

as a short vector too.

So it feels like this a case of limiting the contagion and
that the check should go in here:

  else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
	   || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
    {
      /* Rely only on the type, not the mode, when processing SVE types.  */
      if (type && aarch64_some_values_include_pst_objects_p (type))
	/* Leave later code to report an error if SVE is disabled.  */
	gcc_assert (!TARGET_SVE || aarch64_sve_mode_p (mode));
      else
	size = GET_MODE_SIZE (mode);
    }

where we needed similar protection for SVE.  E.g. we could change the
inner else to:

      else if (!aarch64_advsimd_struct_mode_p (mode))

or keep it is an early-out (but within the outer “else if”)
if that seems clearer.

> @@ -19316,6 +19334,12 @@ static bool
>  aarch64_composite_type_p (const_tree type,
>  			  machine_mode mode)
>  {
> +  /* If a Advanced SIMD partial or full aggregate vector type we are a
> +     composite type.  */
> +  if (aarch64_advsimd_partial_struct_mode_p (mode)
> +      || aarch64_advsimd_full_struct_mode_p (mode))
> +    return true;
> +

Isn't this naturally true after the fix to aarch64_short_vector_p?
It would be good to avoid adding new “mode only” tests if we can
help it.

Also, the old code didn't handle OI, CI or XI specially here,
so doing something different now might be dangerous.

Thanks,
Richard

>    if (aarch64_short_vector_p (type, mode))
>      return false;
>  
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr103094.c b/gcc/testsuite/gcc.target/aarch64/pr103094.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..441e602928ce8ac4e9890a1376acbc25671e284d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr103094.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-fdump-rtl-expand -w" } */
> +
> +#include <arm_neon.h>
> +
> +void foo (uint8x8x2_t cols_01_23, uint8x8x2_t cols_45_67, uint16_t* outptr0)
> +{
> +  uint16x4x4_t cols_01_23_45_67 = { {
> +    vreinterpret_u16_u8(cols_01_23.val[0]),
> +    vreinterpret_u16_u8(cols_01_23.val[1]),
> +    vreinterpret_u16_u8(cols_45_67.val[0]),
> +    vreinterpret_u16_u8(cols_45_67.val[1])
> +  } };
> +
> +  vst4_lane_u16(outptr0, cols_01_23_45_67, 0);
> +}
> +
> +/* Check that we expand to v0 and v2 from the function arguments.  */
> +/* { dg-final { scan-rtl-dump {\(reg:V2x8QI \d+ v0 \[ cols_01_23 \]\)} expand } } */
> +/* { dg-final { scan-rtl-dump {\(reg:V2x8QI \d+ v2 \[ cols_45_67 \]\)} expand } } */
> +
  
Tamar Christina Dec. 14, 2021, 12:56 p.m. UTC | #2
> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com>
> Sent: Tuesday, December 14, 2021 12:38 PM
> To: Tamar Christina <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; Richard Earnshaw
> <Richard.Earnshaw@arm.com>; Marcus Shawcroft
> <Marcus.Shawcroft@arm.com>; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>
> Subject: Re: [PATCH]AArch64 Fix the AAPCs for new partial and full SIMD
> structure types [PR103094]
> 
> Tamar Christina <tamar.christina@arm.com> writes:
> > Hi All,
> >
> > The new partial and full vector types added to AArch64, e.g.
> >
> > int8x8x2_t with mode V2x8QI are incorrectly being defined as being
> > short vectors and not being composite types.
> >
> > This causes the layout code to incorrectly conclude that the registers
> > are packed. i.e. for V2x8QI it thinks those 16-bytes are in the same registers.
> >
> > Because of this the code under !aarch64_composite_type_p is
> > unreachable but also lacked any extra checks to see that nregs is what we
> expected it to be.
> >
> > I have also updated aarch64_advsimd_full_struct_mode_p and
> > aarch64_advsimd_partial_struct_mode_p to only consider vector types as
> > struct modes.  Otherwise types such as OImode and friends would
> > qualify leading to incorrect results.
> 
> How easy would it be to fix the bug without doing this last bit?
> The idea was that OI, CI and XI should continue to be structure modes until
> we remove them.  aarch64_advsimd_partial_struct_mode_p
> and aarch64_advsimd_full_struct_mode_p are meant to be convenience
> wrappers and so they shouldn't make different decisions from the
> underlying aarch64_classify_vector_mode.

It can be done by moving the check higher in callers of these functions, but the problem is that
With an e.g. an OImode there's no real indication of how many registers are used to create the
IOmode. It could be 4, 6, 8 as it's just a bag of bits.

My concern is that these functions are misleading without this, with any of these opaque
types returning true for both of these functions it becomes harder to make decisions between
the two, in particular because we still expand to these modes for certain structures.

> 
> >
> > This patch fixes up the issues and we now generate correct code.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> >
> > Ok for master?
> >
> > Thanks,
> > Tamar
> >
> >
> >
> > gcc/ChangeLog:
> >
> > 	PR target/103094
> > 	* config/aarch64/aarch64.c (aarch64_function_value,
> aarch64_layout_arg):
> > 	Fix unreachable code for partial vectors and re-order switch to
> perform
> > 	the simplest test first.
> > 	(aarch64_short_vector_p): Mark as not short vectors.
> > 	(aarch64_composite_type_p): Mark as composite types.
> > 	(aarch64_advsimd_partial_struct_mode_p,
> > 	aarch64_advsimd_full_struct_mode_p): Restrict to actual SIMD types.
> >
> > gcc/testsuite/ChangeLog:
> >
> > 	PR target/103094
> > 	* gcc.target/aarch64/pr103094.c: New test.
> >
> > --- inline copy of patch --
> > diff --git a/gcc/config/aarch64/aarch64.c
> > b/gcc/config/aarch64/aarch64.c index
> >
> fdf05505846721b02059df494d6395ae9423a8ef..d9104ddac3cdd44f7c2290b872
> 5d
> > 05be4fd6468f 100644
> > --- a/gcc/config/aarch64/aarch64.c
> > +++ b/gcc/config/aarch64/aarch64.c
> > @@ -3055,15 +3055,17 @@ aarch64_advsimd_struct_mode_p
> (machine_mode
> > mode)  static bool  aarch64_advsimd_partial_struct_mode_p
> > (machine_mode mode)  {
> > -  return (aarch64_classify_vector_mode (mode)
> > -	  == (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL));
> > +  return VECTOR_MODE_P (mode)
> > +	 && (aarch64_classify_vector_mode (mode)
> > +		== (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL));
> >  }
> >
> >  /* Return true if MODE is an Advanced SIMD Q-register structure mode.
> > */  static bool  aarch64_advsimd_full_struct_mode_p (machine_mode
> > mode)  {
> > -  return (aarch64_classify_vector_mode (mode) == (VEC_ADVSIMD |
> > VEC_STRUCT));
> > +  return VECTOR_MODE_P (mode)
> > +	 && (aarch64_classify_vector_mode (mode) == (VEC_ADVSIMD |
> > +VEC_STRUCT));
> >  }
> >
> >  /* Return true if MODE is any of the data vector modes, including @@
> > -6468,17 +6470,21 @@ aarch64_function_value (const_tree type,
> const_tree func,
> >  					       NULL, false))
> >      {
> >        gcc_assert (!sve_p);
> > -      if (!aarch64_composite_type_p (type, mode))
> > +      if (aarch64_advsimd_full_struct_mode_p (mode))
> > +	{
> > +	  gcc_assert (known_eq (exact_div (GET_MODE_SIZE (mode), 16),
> count));
> > +	  return gen_rtx_REG (mode, V0_REGNUM);
> > +	}
> > +      else if (aarch64_advsimd_partial_struct_mode_p (mode))
> > +	{
> > +	  gcc_assert (known_eq (exact_div (GET_MODE_SIZE (mode), 8),
> count));
> > +	  return gen_rtx_REG (mode, V0_REGNUM);
> > +	}
> > +      else if (!aarch64_composite_type_p (type, mode))
> >  	{
> >  	  gcc_assert (count == 1 && mode == ag_mode);
> >  	  return gen_rtx_REG (mode, V0_REGNUM);
> >  	}
> > -      else if (aarch64_advsimd_full_struct_mode_p (mode)
> > -	       && known_eq (GET_MODE_SIZE (ag_mode), 16))
> > -	return gen_rtx_REG (mode, V0_REGNUM);
> > -      else if (aarch64_advsimd_partial_struct_mode_p (mode)
> > -	       && known_eq (GET_MODE_SIZE (ag_mode), 8))
> > -	return gen_rtx_REG (mode, V0_REGNUM);
> >        else
> >  	{
> >  	  int i;
> > @@ -6745,6 +6751,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v,
> const function_arg_info &arg)
> >      /* No frontends can create types with variable-sized modes, so we
> >         shouldn't be asked to pass or return them.  */
> >      size = GET_MODE_SIZE (mode).to_constant ();
> > +
> >    size = ROUND_UP (size, UNITS_PER_WORD);
> >
> >    allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P
> > (mode); @@ -6769,17 +6776,21 @@ aarch64_layout_arg
> (cumulative_args_t pcum_v, const function_arg_info &arg)
> >        if (nvrn + nregs <= NUM_FP_ARG_REGS)
> >  	{
> >  	  pcum->aapcs_nextnvrn = nvrn + nregs;
> > -	  if (!aarch64_composite_type_p (type, mode))
> > +	  if (aarch64_advsimd_full_struct_mode_p (mode))
> > +	    {
> > +	      gcc_assert (nregs == size / 16);
> > +	      pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
> > +	    }
> > +	  else if (aarch64_advsimd_partial_struct_mode_p (mode))
> > +	    {
> > +	      gcc_assert (nregs == size / 8);
> > +	      pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
> > +	    }
> > +	  else if (!aarch64_composite_type_p (type, mode))
> >  	    {
> >  	      gcc_assert (nregs == 1);
> >  	      pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
> >  	    }
> > -	  else if (aarch64_advsimd_full_struct_mode_p (mode)
> > -		   && known_eq (GET_MODE_SIZE (pcum-
> >aapcs_vfp_rmode), 16))
> > -	    pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
> > -	  else if (aarch64_advsimd_partial_struct_mode_p (mode)
> > -		   && known_eq (GET_MODE_SIZE (pcum-
> >aapcs_vfp_rmode), 8))
> > -	    pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
> >  	  else
> >  	    {
> >  	      rtx par;
> > @@ -19285,6 +19296,13 @@ aarch64_short_vector_p (const_tree type,
> >        else
> >  	size = GET_MODE_SIZE (mode);
> >      }
> > +
> > +  /* If a Advanced SIMD partial or full aggregate vector type we aren't a
> short
> > +     type.  */
> > +  if (aarch64_advsimd_partial_struct_mode_p (mode)
> > +      || aarch64_advsimd_full_struct_mode_p (mode))
> > +    return false;
> > +
> >    if (known_eq (size, 8) || known_eq (size, 16))
> >      {
> >        /* 64-bit and 128-bit vectors should only acquire an SVE mode
> > if
> 
> I think the bug here is that we trust the mode even if we're given a
> conflicting type.  In principle it would be OK to use, say, V4SI for an array of 4
> ints, but that shouldn't suddenly make aarch64_short_vector_p true.
> 
> Unfortunately that ship has sailed, so we e.g. treat:
> 
>   struct wrapper { int32x4_t x; int :0; };
> 
> as a short vector too.
> 
> So it feels like this a case of limiting the contagion and that the check should
> go in here:
> 
>   else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
> 	   || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
>     {
>       /* Rely only on the type, not the mode, when processing SVE types.  */
>       if (type && aarch64_some_values_include_pst_objects_p (type))
> 	/* Leave later code to report an error if SVE is disabled.  */
> 	gcc_assert (!TARGET_SVE || aarch64_sve_mode_p (mode));
>       else
> 	size = GET_MODE_SIZE (mode);
>     }
> 
> where we needed similar protection for SVE.  E.g. we could change the inner
> else to:

Indeed, I did see that for SVE we use the types instead of the modes, but the
types are not passed to all functions. So this would get these to return a different
nregs than what e.g. aarch64_layout_arg calculates itself.  Of course I can remove
the asserts but I think they're useful in catching issues like these.

I can also just change all that code to use type instead.

> 
>       else if (!aarch64_advsimd_struct_mode_p (mode))
> 
> or keep it is an early-out (but within the outer “else if”) if that seems clearer.
> 
> > @@ -19316,6 +19334,12 @@ static bool
> >  aarch64_composite_type_p (const_tree type,
> >  			  machine_mode mode)
> >  {
> > +  /* If a Advanced SIMD partial or full aggregate vector type we are a
> > +     composite type.  */
> > +  if (aarch64_advsimd_partial_struct_mode_p (mode)
> > +      || aarch64_advsimd_full_struct_mode_p (mode))
> > +    return true;
> > +
> 
> Isn't this naturally true after the fix to aarch64_short_vector_p?
> It would be good to avoid adding new “mode only” tests if we can help it.

Yes but you can call this function directly and it should still return the right
value for the new struct modes. 

> 
> Also, the old code didn't handle OI, CI or XI specially here, so doing
> something different now might be dangerous.

This shouldn't change the handling of OI mode and friends though. Since they would
all return false here and fall through to the old code.  It's only problematic if these new
convenience functions don't exclude OI and other non-vector modes.

So this should only change the behaviour for actual structure modes.  But as you say,
 I can look at the types, though my concern is that there's technically nothing stopping
an expand pattern from expanding to OImode with a structure "type", in which case
inspecting the type will change the behavior whereas the mode is a bit safer until we
remove the other modes entirely.

But happy to rewrite it to use the type instead if that's preferred. 

Cheers,
Tamar

> 
> Thanks,
> Richard
> 
> >    if (aarch64_short_vector_p (type, mode))
> >      return false;
> >
> > diff --git a/gcc/testsuite/gcc.target/aarch64/pr103094.c
> > b/gcc/testsuite/gcc.target/aarch64/pr103094.c
> > new file mode 100644
> > index
> >
> 0000000000000000000000000000000000000000..441e602928ce8ac4e9890a137
> 6ac
> > bc25671e284d
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/pr103094.c
> > @@ -0,0 +1,21 @@
> > +/* { dg-do compile } */
> > +/* { dg-additional-options "-fdump-rtl-expand -w" } */
> > +
> > +#include <arm_neon.h>
> > +
> > +void foo (uint8x8x2_t cols_01_23, uint8x8x2_t cols_45_67, uint16_t*
> > +outptr0) {
> > +  uint16x4x4_t cols_01_23_45_67 = { {
> > +    vreinterpret_u16_u8(cols_01_23.val[0]),
> > +    vreinterpret_u16_u8(cols_01_23.val[1]),
> > +    vreinterpret_u16_u8(cols_45_67.val[0]),
> > +    vreinterpret_u16_u8(cols_45_67.val[1])
> > +  } };
> > +
> > +  vst4_lane_u16(outptr0, cols_01_23_45_67, 0); }
> > +
> > +/* Check that we expand to v0 and v2 from the function arguments.  */
> > +/* { dg-final { scan-rtl-dump {\(reg:V2x8QI \d+ v0 \[ cols_01_23
> > +\]\)} expand } } */
> > +/* { dg-final { scan-rtl-dump {\(reg:V2x8QI \d+ v2 \[ cols_45_67
> > +\]\)} expand } } */
> > +
  
Richard Sandiford Dec. 14, 2021, 1:31 p.m. UTC | #3
Tamar Christina <Tamar.Christina@arm.com> writes:
>> -----Original Message-----
>> From: Richard Sandiford <richard.sandiford@arm.com>
>> Sent: Tuesday, December 14, 2021 12:38 PM
>> To: Tamar Christina <Tamar.Christina@arm.com>
>> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; Richard Earnshaw
>> <Richard.Earnshaw@arm.com>; Marcus Shawcroft
>> <Marcus.Shawcroft@arm.com>; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>
>> Subject: Re: [PATCH]AArch64 Fix the AAPCs for new partial and full SIMD
>> structure types [PR103094]
>> 
>> Tamar Christina <tamar.christina@arm.com> writes:
>> > Hi All,
>> >
>> > The new partial and full vector types added to AArch64, e.g.
>> >
>> > int8x8x2_t with mode V2x8QI are incorrectly being defined as being
>> > short vectors and not being composite types.
>> >
>> > This causes the layout code to incorrectly conclude that the registers
>> > are packed. i.e. for V2x8QI it thinks those 16-bytes are in the same registers.
>> >
>> > Because of this the code under !aarch64_composite_type_p is
>> > unreachable but also lacked any extra checks to see that nregs is what we
>> expected it to be.
>> >
>> > I have also updated aarch64_advsimd_full_struct_mode_p and
>> > aarch64_advsimd_partial_struct_mode_p to only consider vector types as
>> > struct modes.  Otherwise types such as OImode and friends would
>> > qualify leading to incorrect results.
>> 
>> How easy would it be to fix the bug without doing this last bit?
>> The idea was that OI, CI and XI should continue to be structure modes until
>> we remove them.  aarch64_advsimd_partial_struct_mode_p
>> and aarch64_advsimd_full_struct_mode_p are meant to be convenience
>> wrappers and so they shouldn't make different decisions from the
>> underlying aarch64_classify_vector_mode.
>
> It can be done by moving the check higher in callers of these functions, but the problem is that
> With an e.g. an OImode there's no real indication of how many registers are used to create the
> IOmode. It could be 4, 6, 8 as it's just a bag of bits.

OImode is always 2 Q registers, etc.

Which bit of code are you concerned about?  Is it the parts where
we generate gen_rtx_REG?  If so, it was the case even before the
new modes that an OImode structure could have safely been classified
as (reg:OI V0) (say) rather than as a less efficient parallel.

Thanks,
Richard


>
> My concern is that these functions are misleading without this, with any of these opaque
> types returning true for both of these functions it becomes harder to make decisions between
> the two, in particular because we still expand to these modes for certain structures.
>
>> 
>> >
>> > This patch fixes up the issues and we now generate correct code.
>> >
>> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>> >
>> > Ok for master?
>> >
>> > Thanks,
>> > Tamar
>> >
>> >
>> >
>> > gcc/ChangeLog:
>> >
>> > 	PR target/103094
>> > 	* config/aarch64/aarch64.c (aarch64_function_value,
>> aarch64_layout_arg):
>> > 	Fix unreachable code for partial vectors and re-order switch to
>> perform
>> > 	the simplest test first.
>> > 	(aarch64_short_vector_p): Mark as not short vectors.
>> > 	(aarch64_composite_type_p): Mark as composite types.
>> > 	(aarch64_advsimd_partial_struct_mode_p,
>> > 	aarch64_advsimd_full_struct_mode_p): Restrict to actual SIMD types.
>> >
>> > gcc/testsuite/ChangeLog:
>> >
>> > 	PR target/103094
>> > 	* gcc.target/aarch64/pr103094.c: New test.
>> >
>> > --- inline copy of patch --
>> > diff --git a/gcc/config/aarch64/aarch64.c
>> > b/gcc/config/aarch64/aarch64.c index
>> >
>> fdf05505846721b02059df494d6395ae9423a8ef..d9104ddac3cdd44f7c2290b872
>> 5d
>> > 05be4fd6468f 100644
>> > --- a/gcc/config/aarch64/aarch64.c
>> > +++ b/gcc/config/aarch64/aarch64.c
>> > @@ -3055,15 +3055,17 @@ aarch64_advsimd_struct_mode_p
>> (machine_mode
>> > mode)  static bool  aarch64_advsimd_partial_struct_mode_p
>> > (machine_mode mode)  {
>> > -  return (aarch64_classify_vector_mode (mode)
>> > -	  == (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL));
>> > +  return VECTOR_MODE_P (mode)
>> > +	 && (aarch64_classify_vector_mode (mode)
>> > +		== (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL));
>> >  }
>> >
>> >  /* Return true if MODE is an Advanced SIMD Q-register structure mode.
>> > */  static bool  aarch64_advsimd_full_struct_mode_p (machine_mode
>> > mode)  {
>> > -  return (aarch64_classify_vector_mode (mode) == (VEC_ADVSIMD |
>> > VEC_STRUCT));
>> > +  return VECTOR_MODE_P (mode)
>> > +	 && (aarch64_classify_vector_mode (mode) == (VEC_ADVSIMD |
>> > +VEC_STRUCT));
>> >  }
>> >
>> >  /* Return true if MODE is any of the data vector modes, including @@
>> > -6468,17 +6470,21 @@ aarch64_function_value (const_tree type,
>> const_tree func,
>> >  					       NULL, false))
>> >      {
>> >        gcc_assert (!sve_p);
>> > -      if (!aarch64_composite_type_p (type, mode))
>> > +      if (aarch64_advsimd_full_struct_mode_p (mode))
>> > +	{
>> > +	  gcc_assert (known_eq (exact_div (GET_MODE_SIZE (mode), 16),
>> count));
>> > +	  return gen_rtx_REG (mode, V0_REGNUM);
>> > +	}
>> > +      else if (aarch64_advsimd_partial_struct_mode_p (mode))
>> > +	{
>> > +	  gcc_assert (known_eq (exact_div (GET_MODE_SIZE (mode), 8),
>> count));
>> > +	  return gen_rtx_REG (mode, V0_REGNUM);
>> > +	}
>> > +      else if (!aarch64_composite_type_p (type, mode))
>> >  	{
>> >  	  gcc_assert (count == 1 && mode == ag_mode);
>> >  	  return gen_rtx_REG (mode, V0_REGNUM);
>> >  	}
>> > -      else if (aarch64_advsimd_full_struct_mode_p (mode)
>> > -	       && known_eq (GET_MODE_SIZE (ag_mode), 16))
>> > -	return gen_rtx_REG (mode, V0_REGNUM);
>> > -      else if (aarch64_advsimd_partial_struct_mode_p (mode)
>> > -	       && known_eq (GET_MODE_SIZE (ag_mode), 8))
>> > -	return gen_rtx_REG (mode, V0_REGNUM);
>> >        else
>> >  	{
>> >  	  int i;
>> > @@ -6745,6 +6751,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v,
>> const function_arg_info &arg)
>> >      /* No frontends can create types with variable-sized modes, so we
>> >         shouldn't be asked to pass or return them.  */
>> >      size = GET_MODE_SIZE (mode).to_constant ();
>> > +
>> >    size = ROUND_UP (size, UNITS_PER_WORD);
>> >
>> >    allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P
>> > (mode); @@ -6769,17 +6776,21 @@ aarch64_layout_arg
>> (cumulative_args_t pcum_v, const function_arg_info &arg)
>> >        if (nvrn + nregs <= NUM_FP_ARG_REGS)
>> >  	{
>> >  	  pcum->aapcs_nextnvrn = nvrn + nregs;
>> > -	  if (!aarch64_composite_type_p (type, mode))
>> > +	  if (aarch64_advsimd_full_struct_mode_p (mode))
>> > +	    {
>> > +	      gcc_assert (nregs == size / 16);
>> > +	      pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
>> > +	    }
>> > +	  else if (aarch64_advsimd_partial_struct_mode_p (mode))
>> > +	    {
>> > +	      gcc_assert (nregs == size / 8);
>> > +	      pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
>> > +	    }
>> > +	  else if (!aarch64_composite_type_p (type, mode))
>> >  	    {
>> >  	      gcc_assert (nregs == 1);
>> >  	      pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
>> >  	    }
>> > -	  else if (aarch64_advsimd_full_struct_mode_p (mode)
>> > -		   && known_eq (GET_MODE_SIZE (pcum-
>> >aapcs_vfp_rmode), 16))
>> > -	    pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
>> > -	  else if (aarch64_advsimd_partial_struct_mode_p (mode)
>> > -		   && known_eq (GET_MODE_SIZE (pcum-
>> >aapcs_vfp_rmode), 8))
>> > -	    pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
>> >  	  else
>> >  	    {
>> >  	      rtx par;
>> > @@ -19285,6 +19296,13 @@ aarch64_short_vector_p (const_tree type,
>> >        else
>> >  	size = GET_MODE_SIZE (mode);
>> >      }
>> > +
>> > +  /* If a Advanced SIMD partial or full aggregate vector type we aren't a
>> short
>> > +     type.  */
>> > +  if (aarch64_advsimd_partial_struct_mode_p (mode)
>> > +      || aarch64_advsimd_full_struct_mode_p (mode))
>> > +    return false;
>> > +
>> >    if (known_eq (size, 8) || known_eq (size, 16))
>> >      {
>> >        /* 64-bit and 128-bit vectors should only acquire an SVE mode
>> > if
>> 
>> I think the bug here is that we trust the mode even if we're given a
>> conflicting type.  In principle it would be OK to use, say, V4SI for an array of 4
>> ints, but that shouldn't suddenly make aarch64_short_vector_p true.
>> 
>> Unfortunately that ship has sailed, so we e.g. treat:
>> 
>>   struct wrapper { int32x4_t x; int :0; };
>> 
>> as a short vector too.
>> 
>> So it feels like this a case of limiting the contagion and that the check should
>> go in here:
>> 
>>   else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
>> 	   || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
>>     {
>>       /* Rely only on the type, not the mode, when processing SVE types.  */
>>       if (type && aarch64_some_values_include_pst_objects_p (type))
>> 	/* Leave later code to report an error if SVE is disabled.  */
>> 	gcc_assert (!TARGET_SVE || aarch64_sve_mode_p (mode));
>>       else
>> 	size = GET_MODE_SIZE (mode);
>>     }
>> 
>> where we needed similar protection for SVE.  E.g. we could change the inner
>> else to:
>
> Indeed, I did see that for SVE we use the types instead of the modes, but the
> types are not passed to all functions. So this would get these to return a different
> nregs than what e.g. aarch64_layout_arg calculates itself.  Of course I can remove
> the asserts but I think they're useful in catching issues like these.
>
> I can also just change all that code to use type instead.
>
>> 
>>       else if (!aarch64_advsimd_struct_mode_p (mode))
>> 
>> or keep it is an early-out (but within the outer “else if”) if that seems clearer.
>> 
>> > @@ -19316,6 +19334,12 @@ static bool
>> >  aarch64_composite_type_p (const_tree type,
>> >  			  machine_mode mode)
>> >  {
>> > +  /* If a Advanced SIMD partial or full aggregate vector type we are a
>> > +     composite type.  */
>> > +  if (aarch64_advsimd_partial_struct_mode_p (mode)
>> > +      || aarch64_advsimd_full_struct_mode_p (mode))
>> > +    return true;
>> > +
>> 
>> Isn't this naturally true after the fix to aarch64_short_vector_p?
>> It would be good to avoid adding new “mode only” tests if we can help it.
>
> Yes but you can call this function directly and it should still return the right
> value for the new struct modes. 
>
>> 
>> Also, the old code didn't handle OI, CI or XI specially here, so doing
>> something different now might be dangerous.
>
> This shouldn't change the handling of OI mode and friends though. Since they would
> all return false here and fall through to the old code.  It's only problematic if these new
> convenience functions don't exclude OI and other non-vector modes.
>
> So this should only change the behaviour for actual structure modes.  But as you say,
>  I can look at the types, though my concern is that there's technically nothing stopping
> an expand pattern from expanding to OImode with a structure "type", in which case
> inspecting the type will change the behavior whereas the mode is a bit safer until we
> remove the other modes entirely.
>
> But happy to rewrite it to use the type instead if that's preferred. 
>
> Cheers,
> Tamar
>
>> 
>> Thanks,
>> Richard
>> 
>> >    if (aarch64_short_vector_p (type, mode))
>> >      return false;
>> >
>> > diff --git a/gcc/testsuite/gcc.target/aarch64/pr103094.c
>> > b/gcc/testsuite/gcc.target/aarch64/pr103094.c
>> > new file mode 100644
>> > index
>> >
>> 0000000000000000000000000000000000000000..441e602928ce8ac4e9890a137
>> 6ac
>> > bc25671e284d
>> > --- /dev/null
>> > +++ b/gcc/testsuite/gcc.target/aarch64/pr103094.c
>> > @@ -0,0 +1,21 @@
>> > +/* { dg-do compile } */
>> > +/* { dg-additional-options "-fdump-rtl-expand -w" } */
>> > +
>> > +#include <arm_neon.h>
>> > +
>> > +void foo (uint8x8x2_t cols_01_23, uint8x8x2_t cols_45_67, uint16_t*
>> > +outptr0) {
>> > +  uint16x4x4_t cols_01_23_45_67 = { {
>> > +    vreinterpret_u16_u8(cols_01_23.val[0]),
>> > +    vreinterpret_u16_u8(cols_01_23.val[1]),
>> > +    vreinterpret_u16_u8(cols_45_67.val[0]),
>> > +    vreinterpret_u16_u8(cols_45_67.val[1])
>> > +  } };
>> > +
>> > +  vst4_lane_u16(outptr0, cols_01_23_45_67, 0); }
>> > +
>> > +/* Check that we expand to v0 and v2 from the function arguments.  */
>> > +/* { dg-final { scan-rtl-dump {\(reg:V2x8QI \d+ v0 \[ cols_01_23
>> > +\]\)} expand } } */
>> > +/* { dg-final { scan-rtl-dump {\(reg:V2x8QI \d+ v2 \[ cols_45_67
>> > +\]\)} expand } } */
>> > +
  
Richard Sandiford Dec. 15, 2021, 12:23 p.m. UTC | #4
Richard Sandiford via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> Tamar Christina <tamar.christina@arm.com> writes:
>> Hi All,
>>
>> The new partial and full vector types added to AArch64, e.g.
>>
>> int8x8x2_t with mode V2x8QI are incorrectly being defined as being short
>> vectors and not being composite types.
>>
>> This causes the layout code to incorrectly conclude that the registers are
>> packed. i.e. for V2x8QI it thinks those 16-bytes are in the same registers.
>>
>> Because of this the code under !aarch64_composite_type_p is unreachable but also
>> lacked any extra checks to see that nregs is what we expected it to be.
>>
>> I have also updated aarch64_advsimd_full_struct_mode_p and 
>> aarch64_advsimd_partial_struct_mode_p to only consider vector types as struct
>> modes.  Otherwise types such as OImode and friends would qualify leading to
>> incorrect results.
>
> How easy would it be to fix the bug without doing this last bit?
> The idea was that OI, CI and XI should continue to be structure
> modes until we remove them.  aarch64_advsimd_partial_struct_mode_p
> and aarch64_advsimd_full_struct_mode_p are meant to be convenience
> wrappers and so they shouldn't make different decisions from the
> underlying aarch64_classify_vector_mode.
>
>>
>> This patch fixes up the issues and we now generate correct code.
>>
>> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>>
>> Ok for master?
>>
>> Thanks,
>> Tamar
>>
>>
>>
>> gcc/ChangeLog:
>>
>> 	PR target/103094
>> 	* config/aarch64/aarch64.c (aarch64_function_value, aarch64_layout_arg):
>> 	Fix unreachable code for partial vectors and re-order switch to perform
>> 	the simplest test first.
>> 	(aarch64_short_vector_p): Mark as not short vectors.
>> 	(aarch64_composite_type_p): Mark as composite types.
>> 	(aarch64_advsimd_partial_struct_mode_p,
>> 	aarch64_advsimd_full_struct_mode_p): Restrict to actual SIMD types.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 	PR target/103094
>> 	* gcc.target/aarch64/pr103094.c: New test.
>>
>> --- inline copy of patch -- 
>> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
>> index fdf05505846721b02059df494d6395ae9423a8ef..d9104ddac3cdd44f7c2290b8725d05be4fd6468f 100644
>> --- a/gcc/config/aarch64/aarch64.c
>> +++ b/gcc/config/aarch64/aarch64.c
>> @@ -3055,15 +3055,17 @@ aarch64_advsimd_struct_mode_p (machine_mode mode)
>>  static bool
>>  aarch64_advsimd_partial_struct_mode_p (machine_mode mode)
>>  {
>> -  return (aarch64_classify_vector_mode (mode)
>> -	  == (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL));
>> +  return VECTOR_MODE_P (mode)
>> +	 && (aarch64_classify_vector_mode (mode)
>> +		== (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL));
>>  }
>>  
>>  /* Return true if MODE is an Advanced SIMD Q-register structure mode.  */
>>  static bool
>>  aarch64_advsimd_full_struct_mode_p (machine_mode mode)
>>  {
>> -  return (aarch64_classify_vector_mode (mode) == (VEC_ADVSIMD | VEC_STRUCT));
>> +  return VECTOR_MODE_P (mode)
>> +	 && (aarch64_classify_vector_mode (mode) == (VEC_ADVSIMD | VEC_STRUCT));
>>  }
>>  
>>  /* Return true if MODE is any of the data vector modes, including
>> @@ -6468,17 +6470,21 @@ aarch64_function_value (const_tree type, const_tree func,
>>  					       NULL, false))
>>      {
>>        gcc_assert (!sve_p);
>> -      if (!aarch64_composite_type_p (type, mode))
>> +      if (aarch64_advsimd_full_struct_mode_p (mode))
>> +	{
>> +	  gcc_assert (known_eq (exact_div (GET_MODE_SIZE (mode), 16), count));
>> +	  return gen_rtx_REG (mode, V0_REGNUM);
>> +	}
>> +      else if (aarch64_advsimd_partial_struct_mode_p (mode))
>> +	{
>> +	  gcc_assert (known_eq (exact_div (GET_MODE_SIZE (mode), 8), count));
>> +	  return gen_rtx_REG (mode, V0_REGNUM);
>> +	}
>> +      else if (!aarch64_composite_type_p (type, mode))
>>  	{
>>  	  gcc_assert (count == 1 && mode == ag_mode);
>>  	  return gen_rtx_REG (mode, V0_REGNUM);
>>  	}
>> -      else if (aarch64_advsimd_full_struct_mode_p (mode)
>> -	       && known_eq (GET_MODE_SIZE (ag_mode), 16))
>> -	return gen_rtx_REG (mode, V0_REGNUM);
>> -      else if (aarch64_advsimd_partial_struct_mode_p (mode)
>> -	       && known_eq (GET_MODE_SIZE (ag_mode), 8))
>> -	return gen_rtx_REG (mode, V0_REGNUM);
>>        else
>>  	{
>>  	  int i;
>> @@ -6745,6 +6751,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
>>      /* No frontends can create types with variable-sized modes, so we
>>         shouldn't be asked to pass or return them.  */
>>      size = GET_MODE_SIZE (mode).to_constant ();
>> +
>>    size = ROUND_UP (size, UNITS_PER_WORD);
>>  
>>    allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
>> @@ -6769,17 +6776,21 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
>>        if (nvrn + nregs <= NUM_FP_ARG_REGS)
>>  	{
>>  	  pcum->aapcs_nextnvrn = nvrn + nregs;
>> -	  if (!aarch64_composite_type_p (type, mode))
>> +	  if (aarch64_advsimd_full_struct_mode_p (mode))
>> +	    {
>> +	      gcc_assert (nregs == size / 16);
>> +	      pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
>> +	    }
>> +	  else if (aarch64_advsimd_partial_struct_mode_p (mode))
>> +	    {
>> +	      gcc_assert (nregs == size / 8);
>> +	      pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
>> +	    }
>> +	  else if (!aarch64_composite_type_p (type, mode))
>>  	    {
>>  	      gcc_assert (nregs == 1);
>>  	      pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
>>  	    }
>> -	  else if (aarch64_advsimd_full_struct_mode_p (mode)
>> -		   && known_eq (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), 16))
>> -	    pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
>> -	  else if (aarch64_advsimd_partial_struct_mode_p (mode)
>> -		   && known_eq (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), 8))
>> -	    pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
>>  	  else
>>  	    {
>>  	      rtx par;
>> @@ -19285,6 +19296,13 @@ aarch64_short_vector_p (const_tree type,
>>        else
>>  	size = GET_MODE_SIZE (mode);
>>      }
>> +
>> +  /* If a Advanced SIMD partial or full aggregate vector type we aren't a short
>> +     type.  */
>> +  if (aarch64_advsimd_partial_struct_mode_p (mode)
>> +      || aarch64_advsimd_full_struct_mode_p (mode))
>> +    return false;
>> +
>>    if (known_eq (size, 8) || known_eq (size, 16))
>>      {
>>        /* 64-bit and 128-bit vectors should only acquire an SVE mode if
>
> I think the bug here is that we trust the mode even if we're
> given a conflicting type.  In principle it would be OK to use,
> say, V4SI for an array of 4 ints, but that shouldn't suddenly
> make aarch64_short_vector_p true.
>
> Unfortunately that ship has sailed, so we e.g. treat:
>
>   struct wrapper { int32x4_t x; int :0; };
>
> as a short vector too.
>
> So it feels like this a case of limiting the contagion and
> that the check should go in here:
>
>   else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
> 	   || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
>     {
>       /* Rely only on the type, not the mode, when processing SVE types.  */
>       if (type && aarch64_some_values_include_pst_objects_p (type))
> 	/* Leave later code to report an error if SVE is disabled.  */
> 	gcc_assert (!TARGET_SVE || aarch64_sve_mode_p (mode));
>       else
> 	size = GET_MODE_SIZE (mode);
>     }
>
> where we needed similar protection for SVE.  E.g. we could change the
> inner else to:
>
>       else if (!aarch64_advsimd_struct_mode_p (mode))
>
> or keep it is an early-out (but within the outer “else if”)
> if that seems clearer.

Following some off-line discussion, I've committed the following
combined patch after testing on aarch64-linux-gnu.

Thanks,
Richard


In this PR we were wrongly classifying a pair of 8-byte vectors
as a 16-byte “short vector” (in the AAPCS64 sense).  As the
comment in the patch says, this stems from an old condition
in aarch64_short_vector_p that is too loose, but that would
be difficult to tighten now.

We can still do the right thing for the newly-added modes though,
since there are no backwards compatibility concerns there.

Co-authored-by: Tamar Christina <tamar.christina@arm.com>

gcc/
	PR target/103094
	* config/aarch64/aarch64.c (aarch64_short_vector_p): Return false
	for structure modes, rather than ignoring the type in that case.

gcc/testsuite/
	PR target/103094
	* gcc.target/aarch64/pr103094.c: New test.
---
 gcc/config/aarch64/aarch64.c                | 19 ++++++++++++++++--
 gcc/testsuite/gcc.target/aarch64/pr103094.c | 22 +++++++++++++++++++++
 2 files changed, 39 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr103094.c

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index f07330cff4f..ff4a808629b 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -19299,7 +19299,21 @@ aarch64_short_vector_p (const_tree type,
   else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
 	   || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
     {
-      /* Rely only on the type, not the mode, when processing SVE types.  */
+      /* The containing "else if" is too loose: it means that we look at TYPE
+	 if the type is a vector type (good), but that we otherwise ignore TYPE
+	 and look only at the mode.  This is wrong because the type describes
+	 the language-level information whereas the mode is purely an internal
+	 GCC concept.  We can therefore reach here for types that are not
+	 vectors in the AAPCS64 sense.
+
+	 We can't "fix" that for the traditional Advanced SIMD vector modes
+	 without breaking backwards compatibility.  However, there's no such
+	 baggage for the structure modes, which were introduced in GCC 12.  */
+      if (aarch64_advsimd_struct_mode_p (mode))
+	return false;
+
+      /* For similar reasons, rely only on the type, not the mode, when
+	 processing SVE types.  */
       if (type && aarch64_some_values_include_pst_objects_p (type))
 	/* Leave later code to report an error if SVE is disabled.  */
 	gcc_assert (!TARGET_SVE || aarch64_sve_mode_p (mode));
@@ -19310,7 +19324,8 @@ aarch64_short_vector_p (const_tree type,
     {
       /* 64-bit and 128-bit vectors should only acquire an SVE mode if
 	 they are being treated as scalable AAPCS64 types.  */
-      gcc_assert (!aarch64_sve_mode_p (mode));
+      gcc_assert (!aarch64_sve_mode_p (mode)
+		  && !aarch64_advsimd_struct_mode_p (mode));
       return true;
     }
   return false;
diff --git a/gcc/testsuite/gcc.target/aarch64/pr103094.c b/gcc/testsuite/gcc.target/aarch64/pr103094.c
new file mode 100644
index 00000000000..beda99dc1f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr103094.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fdump-rtl-expand -w" } */
+
+#include <arm_neon.h>
+
+void foo (uint8x8x2_t cols_01_23, uint8x8x2_t cols_45_67, uint16_t*
+outptr0) {
+  uint16x4x4_t cols_01_23_45_67 = { {
+    vreinterpret_u16_u8(cols_01_23.val[0]),
+    vreinterpret_u16_u8(cols_01_23.val[1]),
+    vreinterpret_u16_u8(cols_45_67.val[0]),
+    vreinterpret_u16_u8(cols_45_67.val[1])
+  } };
+
+  vst4_lane_u16(outptr0, cols_01_23_45_67, 0); }
+
+/* Check that we expand to v0 and v2 from the function arguments.  */
+/* { dg-final { scan-rtl-dump {\(reg:V2x8QI \d+ v0 \[ cols_01_23
+\]\)} expand } } */
+/* { dg-final { scan-rtl-dump {\(reg:V2x8QI \d+ v2 \[ cols_45_67
+\]\)} expand } } */
+
  

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index fdf05505846721b02059df494d6395ae9423a8ef..d9104ddac3cdd44f7c2290b8725d05be4fd6468f 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3055,15 +3055,17 @@  aarch64_advsimd_struct_mode_p (machine_mode mode)
 static bool
 aarch64_advsimd_partial_struct_mode_p (machine_mode mode)
 {
-  return (aarch64_classify_vector_mode (mode)
-	  == (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL));
+  return VECTOR_MODE_P (mode)
+	 && (aarch64_classify_vector_mode (mode)
+		== (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL));
 }
 
 /* Return true if MODE is an Advanced SIMD Q-register structure mode.  */
 static bool
 aarch64_advsimd_full_struct_mode_p (machine_mode mode)
 {
-  return (aarch64_classify_vector_mode (mode) == (VEC_ADVSIMD | VEC_STRUCT));
+  return VECTOR_MODE_P (mode)
+	 && (aarch64_classify_vector_mode (mode) == (VEC_ADVSIMD | VEC_STRUCT));
 }
 
 /* Return true if MODE is any of the data vector modes, including
@@ -6468,17 +6470,21 @@  aarch64_function_value (const_tree type, const_tree func,
 					       NULL, false))
     {
       gcc_assert (!sve_p);
-      if (!aarch64_composite_type_p (type, mode))
+      if (aarch64_advsimd_full_struct_mode_p (mode))
+	{
+	  gcc_assert (known_eq (exact_div (GET_MODE_SIZE (mode), 16), count));
+	  return gen_rtx_REG (mode, V0_REGNUM);
+	}
+      else if (aarch64_advsimd_partial_struct_mode_p (mode))
+	{
+	  gcc_assert (known_eq (exact_div (GET_MODE_SIZE (mode), 8), count));
+	  return gen_rtx_REG (mode, V0_REGNUM);
+	}
+      else if (!aarch64_composite_type_p (type, mode))
 	{
 	  gcc_assert (count == 1 && mode == ag_mode);
 	  return gen_rtx_REG (mode, V0_REGNUM);
 	}
-      else if (aarch64_advsimd_full_struct_mode_p (mode)
-	       && known_eq (GET_MODE_SIZE (ag_mode), 16))
-	return gen_rtx_REG (mode, V0_REGNUM);
-      else if (aarch64_advsimd_partial_struct_mode_p (mode)
-	       && known_eq (GET_MODE_SIZE (ag_mode), 8))
-	return gen_rtx_REG (mode, V0_REGNUM);
       else
 	{
 	  int i;
@@ -6745,6 +6751,7 @@  aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
     /* No frontends can create types with variable-sized modes, so we
        shouldn't be asked to pass or return them.  */
     size = GET_MODE_SIZE (mode).to_constant ();
+
   size = ROUND_UP (size, UNITS_PER_WORD);
 
   allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
@@ -6769,17 +6776,21 @@  aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
       if (nvrn + nregs <= NUM_FP_ARG_REGS)
 	{
 	  pcum->aapcs_nextnvrn = nvrn + nregs;
-	  if (!aarch64_composite_type_p (type, mode))
+	  if (aarch64_advsimd_full_struct_mode_p (mode))
+	    {
+	      gcc_assert (nregs == size / 16);
+	      pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
+	    }
+	  else if (aarch64_advsimd_partial_struct_mode_p (mode))
+	    {
+	      gcc_assert (nregs == size / 8);
+	      pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
+	    }
+	  else if (!aarch64_composite_type_p (type, mode))
 	    {
 	      gcc_assert (nregs == 1);
 	      pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
 	    }
-	  else if (aarch64_advsimd_full_struct_mode_p (mode)
-		   && known_eq (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), 16))
-	    pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
-	  else if (aarch64_advsimd_partial_struct_mode_p (mode)
-		   && known_eq (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), 8))
-	    pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
 	  else
 	    {
 	      rtx par;
@@ -19285,6 +19296,13 @@  aarch64_short_vector_p (const_tree type,
       else
 	size = GET_MODE_SIZE (mode);
     }
+
+  /* If a Advanced SIMD partial or full aggregate vector type we aren't a short
+     type.  */
+  if (aarch64_advsimd_partial_struct_mode_p (mode)
+      || aarch64_advsimd_full_struct_mode_p (mode))
+    return false;
+
   if (known_eq (size, 8) || known_eq (size, 16))
     {
       /* 64-bit and 128-bit vectors should only acquire an SVE mode if
@@ -19316,6 +19334,12 @@  static bool
 aarch64_composite_type_p (const_tree type,
 			  machine_mode mode)
 {
+  /* If a Advanced SIMD partial or full aggregate vector type we are a
+     composite type.  */
+  if (aarch64_advsimd_partial_struct_mode_p (mode)
+      || aarch64_advsimd_full_struct_mode_p (mode))
+    return true;
+
   if (aarch64_short_vector_p (type, mode))
     return false;
 
diff --git a/gcc/testsuite/gcc.target/aarch64/pr103094.c b/gcc/testsuite/gcc.target/aarch64/pr103094.c
new file mode 100644
index 0000000000000000000000000000000000000000..441e602928ce8ac4e9890a1376acbc25671e284d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr103094.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-fdump-rtl-expand -w" } */
+
+#include <arm_neon.h>
+
+void foo (uint8x8x2_t cols_01_23, uint8x8x2_t cols_45_67, uint16_t* outptr0)
+{
+  uint16x4x4_t cols_01_23_45_67 = { {
+    vreinterpret_u16_u8(cols_01_23.val[0]),
+    vreinterpret_u16_u8(cols_01_23.val[1]),
+    vreinterpret_u16_u8(cols_45_67.val[0]),
+    vreinterpret_u16_u8(cols_45_67.val[1])
+  } };
+
+  vst4_lane_u16(outptr0, cols_01_23_45_67, 0);
+}
+
+/* Check that we expand to v0 and v2 from the function arguments.  */
+/* { dg-final { scan-rtl-dump {\(reg:V2x8QI \d+ v0 \[ cols_01_23 \]\)} expand } } */
+/* { dg-final { scan-rtl-dump {\(reg:V2x8QI \d+ v2 \[ cols_45_67 \]\)} expand } } */
+