aarch64: Fix bitfield alignment in param passing [PR105549]

Message ID 20220607162431.243236-1-christophe.lyon@arm.com
State New
Headers
Series aarch64: Fix bitfield alignment in param passing [PR105549] |

Commit Message

Christophe Lyon June 7, 2022, 4:24 p.m. UTC
  While working on enabling DFP for AArch64, I noticed new failures in
gcc.dg/compat/struct-layout-1.exp (t028) which were not actually
caused by DFP types handling. These tests are generated during 'make
check' and enabling DFP made generation different (not sure if new
non-DFP tests are generated, or if existing ones are generated
differently, the tests in question are huge and difficult to compare).

Anyway, I reduced the problem to what I attach at the end of the new
gcc.target/aarch64/aapcs64/va_arg-17.c test and rewrote it in the same
scheme as other va_arg* AArch64 tests.  Richard Sandiford further
reduced this to a non-vararg function, added as a second testcase.

This is a tough case mixing bitfields and alignment, where
aarch64_function_arg_alignment did not follow what its descriptive
comment says: we want to use the natural alignment of the bitfield
type only if the user didn't override the alignment for the bitfield
itself.

The fix is thus very small, and this patch adds two new tests
(va_arg-17.c and pr105549.c). va_arg-17.c contains the reduced
offending testcase from struct-layout-1.exp for reference.

We also take the opportunity to fix the comment above
aarch64_function_arg_alignment since the value of the abi_break
parameter was changed in a previous commit, no longer match the
description.

2022-06-02  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	PR target/105549
	* config/aarch64/aarch64.cc (aarch64_function_arg_alignment):
	Check DECL_USER_ALIGN for bitfield.

	gcc/testsuite/
	PR target/105549
	* gcc.target/aarch64/aapcs64/va_arg-17.c: New.
	* gcc.target/aarch64/pr105549.c: New.


###############     Attachment also inlined for ease of reply    ###############
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 40fc5e633992036a2c06867857a681792178ef00..2c6ccce7cb5dc32097d24514ee525729efb6b7ff 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -7262,9 +7262,9 @@ aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
 /* Given MODE and TYPE of a function argument, return the alignment in
    bits.  The idea is to suppress any stronger alignment requested by
    the user and opt for the natural alignment (specified in AAPCS64 \S
-   4.1).  ABI_BREAK is set to true if the alignment was incorrectly
-   calculated in versions of GCC prior to GCC-9.  This is a helper
-   function for local use only.  */
+   4.1).  ABI_BREAK is set to the old alignment if the alignment was
+   incorrectly calculated in versions of GCC prior to GCC-9.  This is
+   a helper function for local use only.  */
 
 static unsigned int
 aarch64_function_arg_alignment (machine_mode mode, const_tree type,
@@ -7304,7 +7304,10 @@ aarch64_function_arg_alignment (machine_mode mode, const_tree type,
 	   "s" contains only one Fundamental Data Type (the int field)
 	   but gains 8-byte alignment and size thanks to "e".  */
 	alignment = std::max (alignment, DECL_ALIGN (field));
-	if (DECL_BIT_FIELD_TYPE (field))
+
+	/* Take bit-field type's alignment into account only if the
+	   user didn't override this field's alignment.  */
+	if (DECL_BIT_FIELD_TYPE (field) && !DECL_USER_ALIGN (field))
 	  bitfield_alignment
 	    = std::max (bitfield_alignment,
 			TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)));
diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-17.c b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-17.c
new file mode 100644
index 0000000000000000000000000000000000000000..24895c3ab48309b601f6f22c176f1e52350c2257
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-17.c
@@ -0,0 +1,105 @@
+/* Test AAPCS64 layout and __builtin_va_arg.
+
+   This test covers a corner case where a composite type parameter fits in one
+   register: we do not need a double-word alignment when accessing it in the
+   va_arg stack area.  */
+
+/* { dg-do run { target aarch64*-*-* } } */
+
+#ifndef IN_FRAMEWORK
+#define AAPCS64_TEST_STDARG
+#define TESTFILE "va_arg-17.c"
+#include "type-def.h"
+
+enum E6 { e6_0, e6_1, e6_2, e6_3, e6_65533 = 65533, e6_65534, e6_65535 };
+typedef enum E6 Tal16E6 __attribute__((aligned (16)));
+typedef unsigned int Tuint;
+
+int fails;
+
+union S2844 {
+  Tuint a:((((10) - 1) & 31) + 1);
+  Tal16E6 __attribute__((aligned (2), packed)) b:31;
+  struct{}c[0];
+} ;
+union S2844 s2844;
+union S2844 a2844[5];
+
+#define HAS_DATA_INIT_FUNC
+void init_data ()
+{
+  memset (&s2844, '\0', sizeof (s2844));
+  memset (a2844, '\0', sizeof (a2844));
+  s2844.a = 799U;
+  a2844[2].a = 586U;
+}
+
+#include "abitest.h"
+#else
+  ARG       (int          , 1        , W0 , LAST_NAMED_ARG_ID)
+  DOTS
+  ANON_PROMOTED  (float   , 1.0f, double, 1.0, D0, 1)
+  ANON      (union S2844  , s2844    , X1 , 2)
+  ANON      (long long    , 2LL      , X2 , 3)
+  ANON      (union  S2844 , a2844[2] , X3 , 4)
+  LAST_ANON (union  S2844 , a2844[2] , X4 , 5)
+#endif
+
+#if 0
+  /* This test is derived from a case generated by struct-layout-1.exp:  */
+
+enum E6 { e6_0, e6_1, e6_2, e6_3, e6_65533 = 65533, e6_65534, e6_65535 };
+typedef enum E6 Tal16E6 __attribute__((aligned (16)));
+typedef unsigned int Tuint;
+
+int fails;
+
+union S2844 {
+  Tuint a:((((10) - 1) & 31) + 1);
+  Tal16E6 __attribute__((aligned (2), packed)) b:31;
+  struct{}c[0];
+} ;
+union S2844 s2844;
+union S2844 a2844[5];
+
+typedef __builtin_va_list __gnuc_va_list;
+typedef __gnuc_va_list va_list;
+
+void check2844va (int z, ...) {
+  union S2844 arg, *p;
+  va_list ap;
+
+  __builtin_va_start(ap,z);
+  if (__builtin_va_arg(ap,double) != 1.0)
+    printf ("fail %d.%d\n", 2844, 0), ++fails;
+
+  p = &s2844;
+  arg = __builtin_va_arg(ap,union S2844);  /* This would fail.  */
+  if (p->a != arg.a)
+    printf ("fail %d.%d\n", 2844, 1), ++fails;
+
+  if (__builtin_va_arg(ap,long long) != 3LL)
+    printf ("fail %d.%d\n", 2844, 2), ++fails;
+
+  p = &a2844[2];
+  arg = __builtin_va_arg(ap,union S2844);  /* This would fail.  */
+  if (p->a != arg.a)
+    printf ("fail %d.%d\n", 2844, 3), ++fails;
+
+  arg = __builtin_va_arg(ap,union S2844);  /* This would fail.  */
+  if (p->a != arg.a)
+    printf ("fail %d.%d\n", 2844, 4), ++fails;
+
+  __builtin_va_end(ap);
+}
+
+int main (void) {
+  int i, j;
+  memset (&s2844, '\0', sizeof (s2844));
+  memset (a2844, '\0', sizeof (a2844));
+  s2844.a = 799U;
+  a2844[2].a = 586U;
+  check2844va (1, 1.0, s2844, 2LL, a2844[2], a2844[2]);
+  exit (fails != 0);
+}
+#endif /* 0 */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr105549.c b/gcc/testsuite/gcc.target/aarch64/pr105549.c
new file mode 100644
index 0000000000000000000000000000000000000000..55a91ed6bc48b56eed61d668971e890dbd3250ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr105549.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-save-temps" } */
+
+enum e { E1 };
+typedef enum e e __attribute__((aligned(16)));
+union u {
+    __attribute__((aligned(2), packed)) e a : 1;
+    int x[4];
+};
+union u g(int a, union u u2) { return u2; }
+
+/* { dg-final { scan-assembler "stp\tx1, x2, \\\[sp, 8\\\]" } } */
  

Comments

Richard Sandiford June 7, 2022, 5:44 p.m. UTC | #1
Christophe Lyon via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> While working on enabling DFP for AArch64, I noticed new failures in
> gcc.dg/compat/struct-layout-1.exp (t028) which were not actually
> caused by DFP types handling. These tests are generated during 'make
> check' and enabling DFP made generation different (not sure if new
> non-DFP tests are generated, or if existing ones are generated
> differently, the tests in question are huge and difficult to compare).
>
> Anyway, I reduced the problem to what I attach at the end of the new
> gcc.target/aarch64/aapcs64/va_arg-17.c test and rewrote it in the same
> scheme as other va_arg* AArch64 tests.  Richard Sandiford further
> reduced this to a non-vararg function, added as a second testcase.
>
> This is a tough case mixing bitfields and alignment, where
> aarch64_function_arg_alignment did not follow what its descriptive
> comment says: we want to use the natural alignment of the bitfield
> type only if the user didn't override the alignment for the bitfield
> itself.
>
> The fix is thus very small, and this patch adds two new tests
> (va_arg-17.c and pr105549.c). va_arg-17.c contains the reduced
> offending testcase from struct-layout-1.exp for reference.
>
> We also take the opportunity to fix the comment above
> aarch64_function_arg_alignment since the value of the abi_break
> parameter was changed in a previous commit, no longer match the
> description.
>
> 2022-06-02  Christophe Lyon  <christophe.lyon@arm.com>
>
> 	gcc/
> 	PR target/105549
> 	* config/aarch64/aarch64.cc (aarch64_function_arg_alignment):
> 	Check DECL_USER_ALIGN for bitfield.
>
> 	gcc/testsuite/
> 	PR target/105549
> 	* gcc.target/aarch64/aapcs64/va_arg-17.c: New.
> 	* gcc.target/aarch64/pr105549.c: New.
>
>
> ###############     Attachment also inlined for ease of reply    ###############
>
>
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 40fc5e633992036a2c06867857a681792178ef00..2c6ccce7cb5dc32097d24514ee525729efb6b7ff 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -7262,9 +7262,9 @@ aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
>  /* Given MODE and TYPE of a function argument, return the alignment in
>     bits.  The idea is to suppress any stronger alignment requested by
>     the user and opt for the natural alignment (specified in AAPCS64 \S
> -   4.1).  ABI_BREAK is set to true if the alignment was incorrectly
> -   calculated in versions of GCC prior to GCC-9.  This is a helper
> -   function for local use only.  */
> +   4.1).  ABI_BREAK is set to the old alignment if the alignment was
> +   incorrectly calculated in versions of GCC prior to GCC-9.  This is
> +   a helper function for local use only.  */
>  
>  static unsigned int
>  aarch64_function_arg_alignment (machine_mode mode, const_tree type,
> @@ -7304,7 +7304,10 @@ aarch64_function_arg_alignment (machine_mode mode, const_tree type,
>  	   "s" contains only one Fundamental Data Type (the int field)
>  	   but gains 8-byte alignment and size thanks to "e".  */
>  	alignment = std::max (alignment, DECL_ALIGN (field));
> -	if (DECL_BIT_FIELD_TYPE (field))
> +
> +	/* Take bit-field type's alignment into account only if the
> +	   user didn't override this field's alignment.  */
> +	if (DECL_BIT_FIELD_TYPE (field) && !DECL_USER_ALIGN (field))

I think we need to check DECL_PACKED instead.  On its own, an alignment
attribute on the field can only increase alignment, not decrease it.
In constrast, the packed attribute effectively forces the alignment to
1 byte, so has an effect even without an alignment attribute.  Adding an
explicit alignment on top can then increase the alignment from 1 to any
value (bigger or smaller than the original underlying type).

E.g. for:

---------------------------------------------------------------------
typedef unsigned long long ull __attribute__((aligned(ALIGN)));

#ifndef EXTRA
#define EXTRA unsigned long long x;
#endif

struct S1 { __attribute__((aligned(1))) ull i : 1; EXTRA };
struct S2 { __attribute__((aligned(2))) ull i : 1; EXTRA };
struct S4 { __attribute__((aligned(4))) ull i : 1; EXTRA };
struct S8 { __attribute__((aligned(8))) ull i : 1; EXTRA };
struct S16 { __attribute__((aligned(16))) ull i : 1; EXTRA };

struct Sp { ull i : 1; EXTRA }__attribute__((packed));
struct S1p { __attribute__((packed, aligned(1))) ull i : 1; EXTRA };
struct S2p { __attribute__((packed, aligned(2))) ull i : 1; EXTRA };
struct S4p { __attribute__((packed, aligned(4))) ull i : 1; EXTRA };
struct S8p { __attribute__((packed, aligned(8))) ull i : 1; EXTRA };
struct S16p { __attribute__((packed, aligned(16))) ull i : 1; EXTRA };

int f1(int a, struct S1 s) { return s.i; }
int f2(int a, struct S2 s) { return s.i; }
int f4(int a, struct S4 s) { return s.i; }
int f8(int a, struct S8 s) { return s.i; }
int f16(int a, struct S16 s) { return s.i; }

int fp(int a, struct Sp s) { return s.i; }
int f1p(int a, struct S1p s) { return s.i; }
int f2p(int a, struct S2p s) { return s.i; }
int f4p(int a, struct S4p s) { return s.i; }
int f8p(int a, struct S8p s) { return s.i; }
int f16p(int a, struct S16p s) { return s.i; }
---------------------------------------------------------------------

there are at least 4 interesting cases:

  {-DALIGN=8,-DALIGN=16} x {-DEXTRA=,}

From my reading of the ABI, clang gets all of them right.
The case GCC currently gets wrong is:

  fp f1p f2p f4p f8p for -DALIGN=16   [A]

f1 to f16 are equivalent for -DALIGN=16: all the structures have 16-byte
alignment, despite the user alignments on the fields.  We currently
handle those correctly.

Unfortunately, fixing [A] means that this is an ABI break from GCC 12,
so we'll need (yet another) -Wpsabi warning.  In this case I guess we're
actually restoring the pre-GCC 9.1 behaviour.

An interesting oddity with these rules is that for:

---------------------------------------------------------------------
struct S1 { unsigned long long a, b; } __attribute__((aligned(16)));
struct S2 { struct S1 s; };

int f1(int a, struct S1 s1) { return s1.a; }
int f2(int a, struct S2 s2) { return s2.s.a; }
---------------------------------------------------------------------

S1 is not treated as 16-byte aligned, but S2 is (because the analysis
isn't recursive).  Clang and GCC seem to be consistent about that though.

Thanks,
Richard

>  	  bitfield_alignment
>  	    = std::max (bitfield_alignment,
>  			TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)));
> diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-17.c b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-17.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..24895c3ab48309b601f6f22c176f1e52350c2257
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-17.c
> @@ -0,0 +1,105 @@
> +/* Test AAPCS64 layout and __builtin_va_arg.
> +
> +   This test covers a corner case where a composite type parameter fits in one
> +   register: we do not need a double-word alignment when accessing it in the
> +   va_arg stack area.  */
> +
> +/* { dg-do run { target aarch64*-*-* } } */
> +
> +#ifndef IN_FRAMEWORK
> +#define AAPCS64_TEST_STDARG
> +#define TESTFILE "va_arg-17.c"
> +#include "type-def.h"
> +
> +enum E6 { e6_0, e6_1, e6_2, e6_3, e6_65533 = 65533, e6_65534, e6_65535 };
> +typedef enum E6 Tal16E6 __attribute__((aligned (16)));
> +typedef unsigned int Tuint;
> +
> +int fails;
> +
> +union S2844 {
> +  Tuint a:((((10) - 1) & 31) + 1);
> +  Tal16E6 __attribute__((aligned (2), packed)) b:31;
> +  struct{}c[0];
> +} ;
> +union S2844 s2844;
> +union S2844 a2844[5];
> +
> +#define HAS_DATA_INIT_FUNC
> +void init_data ()
> +{
> +  memset (&s2844, '\0', sizeof (s2844));
> +  memset (a2844, '\0', sizeof (a2844));
> +  s2844.a = 799U;
> +  a2844[2].a = 586U;
> +}
> +
> +#include "abitest.h"
> +#else
> +  ARG       (int          , 1        , W0 , LAST_NAMED_ARG_ID)
> +  DOTS
> +  ANON_PROMOTED  (float   , 1.0f, double, 1.0, D0, 1)
> +  ANON      (union S2844  , s2844    , X1 , 2)
> +  ANON      (long long    , 2LL      , X2 , 3)
> +  ANON      (union  S2844 , a2844[2] , X3 , 4)
> +  LAST_ANON (union  S2844 , a2844[2] , X4 , 5)
> +#endif
> +
> +#if 0
> +  /* This test is derived from a case generated by struct-layout-1.exp:  */
> +
> +enum E6 { e6_0, e6_1, e6_2, e6_3, e6_65533 = 65533, e6_65534, e6_65535 };
> +typedef enum E6 Tal16E6 __attribute__((aligned (16)));
> +typedef unsigned int Tuint;
> +
> +int fails;
> +
> +union S2844 {
> +  Tuint a:((((10) - 1) & 31) + 1);
> +  Tal16E6 __attribute__((aligned (2), packed)) b:31;
> +  struct{}c[0];
> +} ;
> +union S2844 s2844;
> +union S2844 a2844[5];
> +
> +typedef __builtin_va_list __gnuc_va_list;
> +typedef __gnuc_va_list va_list;
> +
> +void check2844va (int z, ...) {
> +  union S2844 arg, *p;
> +  va_list ap;
> +
> +  __builtin_va_start(ap,z);
> +  if (__builtin_va_arg(ap,double) != 1.0)
> +    printf ("fail %d.%d\n", 2844, 0), ++fails;
> +
> +  p = &s2844;
> +  arg = __builtin_va_arg(ap,union S2844);  /* This would fail.  */
> +  if (p->a != arg.a)
> +    printf ("fail %d.%d\n", 2844, 1), ++fails;
> +
> +  if (__builtin_va_arg(ap,long long) != 3LL)
> +    printf ("fail %d.%d\n", 2844, 2), ++fails;
> +
> +  p = &a2844[2];
> +  arg = __builtin_va_arg(ap,union S2844);  /* This would fail.  */
> +  if (p->a != arg.a)
> +    printf ("fail %d.%d\n", 2844, 3), ++fails;
> +
> +  arg = __builtin_va_arg(ap,union S2844);  /* This would fail.  */
> +  if (p->a != arg.a)
> +    printf ("fail %d.%d\n", 2844, 4), ++fails;
> +
> +  __builtin_va_end(ap);
> +}
> +
> +int main (void) {
> +  int i, j;
> +  memset (&s2844, '\0', sizeof (s2844));
> +  memset (a2844, '\0', sizeof (a2844));
> +  s2844.a = 799U;
> +  a2844[2].a = 586U;
> +  check2844va (1, 1.0, s2844, 2LL, a2844[2], a2844[2]);
> +  exit (fails != 0);
> +}
> +#endif /* 0 */
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr105549.c b/gcc/testsuite/gcc.target/aarch64/pr105549.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..55a91ed6bc48b56eed61d668971e890dbd3250ef
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr105549.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-save-temps" } */
> +
> +enum e { E1 };
> +typedef enum e e __attribute__((aligned(16)));
> +union u {
> +    __attribute__((aligned(2), packed)) e a : 1;
> +    int x[4];
> +};
> +union u g(int a, union u u2) { return u2; }
> +
> +/* { dg-final { scan-assembler "stp\tx1, x2, \\\[sp, 8\\\]" } } */
  
Christophe Lyon June 8, 2022, 12:25 p.m. UTC | #2
On 6/7/22 19:44, Richard Sandiford wrote:
> Christophe Lyon via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
>> While working on enabling DFP for AArch64, I noticed new failures in
>> gcc.dg/compat/struct-layout-1.exp (t028) which were not actually
>> caused by DFP types handling. These tests are generated during 'make
>> check' and enabling DFP made generation different (not sure if new
>> non-DFP tests are generated, or if existing ones are generated
>> differently, the tests in question are huge and difficult to compare).
>>
>> Anyway, I reduced the problem to what I attach at the end of the new
>> gcc.target/aarch64/aapcs64/va_arg-17.c test and rewrote it in the same
>> scheme as other va_arg* AArch64 tests.  Richard Sandiford further
>> reduced this to a non-vararg function, added as a second testcase.
>>
>> This is a tough case mixing bitfields and alignment, where
>> aarch64_function_arg_alignment did not follow what its descriptive
>> comment says: we want to use the natural alignment of the bitfield
>> type only if the user didn't override the alignment for the bitfield
>> itself.
>>
>> The fix is thus very small, and this patch adds two new tests
>> (va_arg-17.c and pr105549.c). va_arg-17.c contains the reduced
>> offending testcase from struct-layout-1.exp for reference.
>>
>> We also take the opportunity to fix the comment above
>> aarch64_function_arg_alignment since the value of the abi_break
>> parameter was changed in a previous commit, no longer match the
>> description.
>>
>> 2022-06-02  Christophe Lyon  <christophe.lyon@arm.com>
>>
>> 	gcc/
>> 	PR target/105549
>> 	* config/aarch64/aarch64.cc (aarch64_function_arg_alignment):
>> 	Check DECL_USER_ALIGN for bitfield.
>>
>> 	gcc/testsuite/
>> 	PR target/105549
>> 	* gcc.target/aarch64/aapcs64/va_arg-17.c: New.
>> 	* gcc.target/aarch64/pr105549.c: New.
>>
>>
>> ###############     Attachment also inlined for ease of reply    ###############
>>
>>
>> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
>> index 40fc5e633992036a2c06867857a681792178ef00..2c6ccce7cb5dc32097d24514ee525729efb6b7ff 100644
>> --- a/gcc/config/aarch64/aarch64.cc
>> +++ b/gcc/config/aarch64/aarch64.cc
>> @@ -7262,9 +7262,9 @@ aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
>>   /* Given MODE and TYPE of a function argument, return the alignment in
>>      bits.  The idea is to suppress any stronger alignment requested by
>>      the user and opt for the natural alignment (specified in AAPCS64 \S
>> -   4.1).  ABI_BREAK is set to true if the alignment was incorrectly
>> -   calculated in versions of GCC prior to GCC-9.  This is a helper
>> -   function for local use only.  */
>> +   4.1).  ABI_BREAK is set to the old alignment if the alignment was
>> +   incorrectly calculated in versions of GCC prior to GCC-9.  This is
>> +   a helper function for local use only.  */
>>   
>>   static unsigned int
>>   aarch64_function_arg_alignment (machine_mode mode, const_tree type,
>> @@ -7304,7 +7304,10 @@ aarch64_function_arg_alignment (machine_mode mode, const_tree type,
>>   	   "s" contains only one Fundamental Data Type (the int field)
>>   	   but gains 8-byte alignment and size thanks to "e".  */
>>   	alignment = std::max (alignment, DECL_ALIGN (field));
>> -	if (DECL_BIT_FIELD_TYPE (field))
>> +
>> +	/* Take bit-field type's alignment into account only if the
>> +	   user didn't override this field's alignment.  */
>> +	if (DECL_BIT_FIELD_TYPE (field) && !DECL_USER_ALIGN (field))
> 
> I think we need to check DECL_PACKED instead.  On its own, an alignment
> attribute on the field can only increase alignment, not decrease it.
> In constrast, the packed attribute effectively forces the alignment to
> 1 byte, so has an effect even without an alignment attribute.  Adding an
> explicit alignment on top can then increase the alignment from 1 to any
> value (bigger or smaller than the original underlying type).

Right, but the comment before aarch64_function_arg_alignment says:

"The idea is to suppress any stronger alignment requested by the user 
and opt for the natural alignment (specified in AAPCS64 \S 4.1)"

When using DECL_PACKED, wouldn't we check the opposite of this (ie. that 
the user requested a smaller alignment)?   I mean we'd not "suppress 
stronger alignment" since such cases do not have DECL_PACKED?


However I'm not sure which part of the ABI is mentioned in the comment, 
in my copy 4.1 is "Design Goals" and does not elaborate on bitfields and 
parameters.


> 
> E.g. for:
> 
> ---------------------------------------------------------------------
> typedef unsigned long long ull __attribute__((aligned(ALIGN)));
> 
> #ifndef EXTRA
> #define EXTRA unsigned long long x;
> #endif
> 
> struct S1 { __attribute__((aligned(1))) ull i : 1; EXTRA };
> struct S2 { __attribute__((aligned(2))) ull i : 1; EXTRA };
> struct S4 { __attribute__((aligned(4))) ull i : 1; EXTRA };
> struct S8 { __attribute__((aligned(8))) ull i : 1; EXTRA };
> struct S16 { __attribute__((aligned(16))) ull i : 1; EXTRA };
> 
> struct Sp { ull i : 1; EXTRA }__attribute__((packed));
> struct S1p { __attribute__((packed, aligned(1))) ull i : 1; EXTRA };
> struct S2p { __attribute__((packed, aligned(2))) ull i : 1; EXTRA };
> struct S4p { __attribute__((packed, aligned(4))) ull i : 1; EXTRA };
> struct S8p { __attribute__((packed, aligned(8))) ull i : 1; EXTRA };
> struct S16p { __attribute__((packed, aligned(16))) ull i : 1; EXTRA };
> 
> int f1(int a, struct S1 s) { return s.i; }
> int f2(int a, struct S2 s) { return s.i; }
> int f4(int a, struct S4 s) { return s.i; }
> int f8(int a, struct S8 s) { return s.i; }
> int f16(int a, struct S16 s) { return s.i; }
> 
> int fp(int a, struct Sp s) { return s.i; }
> int f1p(int a, struct S1p s) { return s.i; }
> int f2p(int a, struct S2p s) { return s.i; }
> int f4p(int a, struct S4p s) { return s.i; }
> int f8p(int a, struct S8p s) { return s.i; }
> int f16p(int a, struct S16p s) { return s.i; }
> ---------------------------------------------------------------------
> 
> there are at least 4 interesting cases:
> 
>    {-DALIGN=8,-DALIGN=16} x {-DEXTRA=,}
> 
>  From my reading of the ABI, clang gets all of them right.
Which sections of the ABI? I've re-read AAPCS64 (5.9.4 bit-fields 
subdivision, 8.1.8 bit-fields), and these specific cases are not clear 
to me :-(


> The case GCC currently gets wrong is:
> 
>    fp f1p f2p f4p f8p for -DALIGN=16   [A]
> 
> f1 to f16 are equivalent for -DALIGN=16: all the structures have 16-byte
> alignment, despite the user alignments on the fields.  We currently
> handle those correctly.
> 
> Unfortunately, fixing [A] means that this is an ABI break from GCC 12,
> so we'll need (yet another) -Wpsabi warning.  In this case I guess we're
> actually restoring the pre-GCC 9.1 behaviour.
gasp! I hoped we'd be safe as this is a bug fix ;-)

Do you mean that commit r9-5650-gc590597c45948c was in fact a mistake?

I've been looking at it and at r11-8226-g49813aad3292f7, it's a bit 
convoluted :-)


> 
> An interesting oddity with these rules is that for:
> 
> ---------------------------------------------------------------------
> struct S1 { unsigned long long a, b; } __attribute__((aligned(16)));
> struct S2 { struct S1 s; };
> 
> int f1(int a, struct S1 s1) { return s1.a; }
> int f2(int a, struct S2 s2) { return s2.s.a; }
> ---------------------------------------------------------------------
> 
> S1 is not treated as 16-byte aligned, but S2 is (because the analysis
> isn't recursive).  Clang and GCC seem to be consistent about that though.
> 
> Thanks,
> Richard
> 
>>   	  bitfield_alignment
>>   	    = std::max (bitfield_alignment,
>>   			TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)));
>> diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-17.c b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-17.c
>> new file mode 100644
>> index 0000000000000000000000000000000000000000..24895c3ab48309b601f6f22c176f1e52350c2257
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-17.c
>> @@ -0,0 +1,105 @@
>> +/* Test AAPCS64 layout and __builtin_va_arg.
>> +
>> +   This test covers a corner case where a composite type parameter fits in one
>> +   register: we do not need a double-word alignment when accessing it in the
>> +   va_arg stack area.  */
>> +
>> +/* { dg-do run { target aarch64*-*-* } } */
>> +
>> +#ifndef IN_FRAMEWORK
>> +#define AAPCS64_TEST_STDARG
>> +#define TESTFILE "va_arg-17.c"
>> +#include "type-def.h"
>> +
>> +enum E6 { e6_0, e6_1, e6_2, e6_3, e6_65533 = 65533, e6_65534, e6_65535 };
>> +typedef enum E6 Tal16E6 __attribute__((aligned (16)));
>> +typedef unsigned int Tuint;
>> +
>> +int fails;
>> +
>> +union S2844 {
>> +  Tuint a:((((10) - 1) & 31) + 1);
>> +  Tal16E6 __attribute__((aligned (2), packed)) b:31;
>> +  struct{}c[0];
>> +} ;
>> +union S2844 s2844;
>> +union S2844 a2844[5];
>> +
>> +#define HAS_DATA_INIT_FUNC
>> +void init_data ()
>> +{
>> +  memset (&s2844, '\0', sizeof (s2844));
>> +  memset (a2844, '\0', sizeof (a2844));
>> +  s2844.a = 799U;
>> +  a2844[2].a = 586U;
>> +}
>> +
>> +#include "abitest.h"
>> +#else
>> +  ARG       (int          , 1        , W0 , LAST_NAMED_ARG_ID)
>> +  DOTS
>> +  ANON_PROMOTED  (float   , 1.0f, double, 1.0, D0, 1)
>> +  ANON      (union S2844  , s2844    , X1 , 2)
>> +  ANON      (long long    , 2LL      , X2 , 3)
>> +  ANON      (union  S2844 , a2844[2] , X3 , 4)
>> +  LAST_ANON (union  S2844 , a2844[2] , X4 , 5)
>> +#endif
>> +
>> +#if 0
>> +  /* This test is derived from a case generated by struct-layout-1.exp:  */
>> +
>> +enum E6 { e6_0, e6_1, e6_2, e6_3, e6_65533 = 65533, e6_65534, e6_65535 };
>> +typedef enum E6 Tal16E6 __attribute__((aligned (16)));
>> +typedef unsigned int Tuint;
>> +
>> +int fails;
>> +
>> +union S2844 {
>> +  Tuint a:((((10) - 1) & 31) + 1);
>> +  Tal16E6 __attribute__((aligned (2), packed)) b:31;
>> +  struct{}c[0];
>> +} ;
>> +union S2844 s2844;
>> +union S2844 a2844[5];
>> +
>> +typedef __builtin_va_list __gnuc_va_list;
>> +typedef __gnuc_va_list va_list;
>> +
>> +void check2844va (int z, ...) {
>> +  union S2844 arg, *p;
>> +  va_list ap;
>> +
>> +  __builtin_va_start(ap,z);
>> +  if (__builtin_va_arg(ap,double) != 1.0)
>> +    printf ("fail %d.%d\n", 2844, 0), ++fails;
>> +
>> +  p = &s2844;
>> +  arg = __builtin_va_arg(ap,union S2844);  /* This would fail.  */
>> +  if (p->a != arg.a)
>> +    printf ("fail %d.%d\n", 2844, 1), ++fails;
>> +
>> +  if (__builtin_va_arg(ap,long long) != 3LL)
>> +    printf ("fail %d.%d\n", 2844, 2), ++fails;
>> +
>> +  p = &a2844[2];
>> +  arg = __builtin_va_arg(ap,union S2844);  /* This would fail.  */
>> +  if (p->a != arg.a)
>> +    printf ("fail %d.%d\n", 2844, 3), ++fails;
>> +
>> +  arg = __builtin_va_arg(ap,union S2844);  /* This would fail.  */
>> +  if (p->a != arg.a)
>> +    printf ("fail %d.%d\n", 2844, 4), ++fails;
>> +
>> +  __builtin_va_end(ap);
>> +}
>> +
>> +int main (void) {
>> +  int i, j;
>> +  memset (&s2844, '\0', sizeof (s2844));
>> +  memset (a2844, '\0', sizeof (a2844));
>> +  s2844.a = 799U;
>> +  a2844[2].a = 586U;
>> +  check2844va (1, 1.0, s2844, 2LL, a2844[2], a2844[2]);
>> +  exit (fails != 0);
>> +}
>> +#endif /* 0 */
>> diff --git a/gcc/testsuite/gcc.target/aarch64/pr105549.c b/gcc/testsuite/gcc.target/aarch64/pr105549.c
>> new file mode 100644
>> index 0000000000000000000000000000000000000000..55a91ed6bc48b56eed61d668971e890dbd3250ef
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/pr105549.c
>> @@ -0,0 +1,12 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-save-temps" } */
>> +
>> +enum e { E1 };
>> +typedef enum e e __attribute__((aligned(16)));
>> +union u {
>> +    __attribute__((aligned(2), packed)) e a : 1;
>> +    int x[4];
>> +};
>> +union u g(int a, union u u2) { return u2; }
>> +
>> +/* { dg-final { scan-assembler "stp\tx1, x2, \\\[sp, 8\\\]" } } */
  
Richard Sandiford June 8, 2022, 1:19 p.m. UTC | #3
Christophe Lyon <christophe.lyon@arm.com> writes:
> On 6/7/22 19:44, Richard Sandiford wrote:
>> Christophe Lyon via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
>>> While working on enabling DFP for AArch64, I noticed new failures in
>>> gcc.dg/compat/struct-layout-1.exp (t028) which were not actually
>>> caused by DFP types handling. These tests are generated during 'make
>>> check' and enabling DFP made generation different (not sure if new
>>> non-DFP tests are generated, or if existing ones are generated
>>> differently, the tests in question are huge and difficult to compare).
>>>
>>> Anyway, I reduced the problem to what I attach at the end of the new
>>> gcc.target/aarch64/aapcs64/va_arg-17.c test and rewrote it in the same
>>> scheme as other va_arg* AArch64 tests.  Richard Sandiford further
>>> reduced this to a non-vararg function, added as a second testcase.
>>>
>>> This is a tough case mixing bitfields and alignment, where
>>> aarch64_function_arg_alignment did not follow what its descriptive
>>> comment says: we want to use the natural alignment of the bitfield
>>> type only if the user didn't override the alignment for the bitfield
>>> itself.
>>>
>>> The fix is thus very small, and this patch adds two new tests
>>> (va_arg-17.c and pr105549.c). va_arg-17.c contains the reduced
>>> offending testcase from struct-layout-1.exp for reference.
>>>
>>> We also take the opportunity to fix the comment above
>>> aarch64_function_arg_alignment since the value of the abi_break
>>> parameter was changed in a previous commit, no longer match the
>>> description.
>>>
>>> 2022-06-02  Christophe Lyon  <christophe.lyon@arm.com>
>>>
>>> 	gcc/
>>> 	PR target/105549
>>> 	* config/aarch64/aarch64.cc (aarch64_function_arg_alignment):
>>> 	Check DECL_USER_ALIGN for bitfield.
>>>
>>> 	gcc/testsuite/
>>> 	PR target/105549
>>> 	* gcc.target/aarch64/aapcs64/va_arg-17.c: New.
>>> 	* gcc.target/aarch64/pr105549.c: New.
>>>
>>>
>>> ###############     Attachment also inlined for ease of reply    ###############
>>>
>>>
>>> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
>>> index 40fc5e633992036a2c06867857a681792178ef00..2c6ccce7cb5dc32097d24514ee525729efb6b7ff 100644
>>> --- a/gcc/config/aarch64/aarch64.cc
>>> +++ b/gcc/config/aarch64/aarch64.cc
>>> @@ -7262,9 +7262,9 @@ aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
>>>   /* Given MODE and TYPE of a function argument, return the alignment in
>>>      bits.  The idea is to suppress any stronger alignment requested by
>>>      the user and opt for the natural alignment (specified in AAPCS64 \S
>>> -   4.1).  ABI_BREAK is set to true if the alignment was incorrectly
>>> -   calculated in versions of GCC prior to GCC-9.  This is a helper
>>> -   function for local use only.  */
>>> +   4.1).  ABI_BREAK is set to the old alignment if the alignment was
>>> +   incorrectly calculated in versions of GCC prior to GCC-9.  This is
>>> +   a helper function for local use only.  */
>>>   
>>>   static unsigned int
>>>   aarch64_function_arg_alignment (machine_mode mode, const_tree type,
>>> @@ -7304,7 +7304,10 @@ aarch64_function_arg_alignment (machine_mode mode, const_tree type,
>>>   	   "s" contains only one Fundamental Data Type (the int field)
>>>   	   but gains 8-byte alignment and size thanks to "e".  */
>>>   	alignment = std::max (alignment, DECL_ALIGN (field));
>>> -	if (DECL_BIT_FIELD_TYPE (field))
>>> +
>>> +	/* Take bit-field type's alignment into account only if the
>>> +	   user didn't override this field's alignment.  */
>>> +	if (DECL_BIT_FIELD_TYPE (field) && !DECL_USER_ALIGN (field))
>> 
>> I think we need to check DECL_PACKED instead.  On its own, an alignment
>> attribute on the field can only increase alignment, not decrease it.
>> In constrast, the packed attribute effectively forces the alignment to
>> 1 byte, so has an effect even without an alignment attribute.  Adding an
>> explicit alignment on top can then increase the alignment from 1 to any
>> value (bigger or smaller than the original underlying type).
>
> Right, but the comment before aarch64_function_arg_alignment says:
>
> "The idea is to suppress any stronger alignment requested by the user 
> and opt for the natural alignment (specified in AAPCS64 \S 4.1)"
>
> When using DECL_PACKED, wouldn't we check the opposite of this (ie. that 
> the user requested a smaller alignment)?   I mean we'd not "suppress 
> stronger alignment" since such cases do not have DECL_PACKED?

I think "stronger alignment" here means "greater alignment" rather
than "less alignment".  But in these examples we're dealing with
alignments of the fields.  I think that part is OK, and that the
intention is to ignore any greater alignment specified at the structure
level, independently of the fields.

In other words, if field list X occupies 16 bytes, then S1 and S2
below should be handled in the same way as far as register assignment
is concerned:

  struct S1 { X };
  struct S2 { X } __attribute__((aligned(16)));

The idea is that structures are just a sequence of fields/members
and don't have any "magic" properties beyond that.

> However I'm not sure which part of the ABI is mentioned in the comment, 
> in my copy 4.1 is "Design Goals" and does not elaborate on bitfields and 
> parameters.

Yeah, the section numbers change as new stuff is added, and probably
also changed in the move to rst.  I think the comment is referring
to the following, and in particular to the first bullet point under
"Aggregates":

--------------------------------------------------------------------------
Composite Types
---------------

A Composite Type is a collection of one or more Fundamental Data Types that are handled as a single entity at the procedure call level. A Composite Type can be any of:

- An aggregate, where the members are laid out sequentially in memory (possibly with inter-member padding)

- A union, where each of the members has the same address

- An array, which is a repeated sequence of some other type (its base type).

The definitions are recursive; that is, each of the types may contain a Composite Type as a member.

*  The *member alignment* of an element of a composite type is the
   alignment of that member after the application of any language alignment
   modifiers to that member

*  The *natural alignment* of a composite type is the maximum of
   each of the member alignments of the 'top-level' members of the composite
   type i.e. before any alignment adjustment of the entire composite is
   applied

Aggregates
^^^^^^^^^^

- The alignment of an aggregate shall be the alignment of its most-aligned member.

- The size of an aggregate shall be the smallest multiple of its alignment that is sufficient to hold all of its members.
--------------------------------------------------------------------------

So:

- Types start out with a "natural alignment".  For built-in types this
  is defined directly in the AAPCS64.  For composite types it is worked
  out as above (plus similar rules for unions and arrays, snipped above).

- Non-bitfield members have an alignment that is by default the same as
  the natural alignment of their type.  However, for C/C++ this can be
  modified by things like:

  - an extra alignment applied via a typedef
  - a packed attribute on the field/member
  - a packed attribute on the containing struct (which propagates to
    all fields/members)
  - a pack pragma
  - an alignment attribute on the field/member
  - probably others too

- Bitfield members are handled as described separately in the AAPCS64.

>> E.g. for:
>> 
>> ---------------------------------------------------------------------
>> typedef unsigned long long ull __attribute__((aligned(ALIGN)));
>> 
>> #ifndef EXTRA
>> #define EXTRA unsigned long long x;
>> #endif
>> 
>> struct S1 { __attribute__((aligned(1))) ull i : 1; EXTRA };
>> struct S2 { __attribute__((aligned(2))) ull i : 1; EXTRA };
>> struct S4 { __attribute__((aligned(4))) ull i : 1; EXTRA };
>> struct S8 { __attribute__((aligned(8))) ull i : 1; EXTRA };
>> struct S16 { __attribute__((aligned(16))) ull i : 1; EXTRA };
>> 
>> struct Sp { ull i : 1; EXTRA }__attribute__((packed));
>> struct S1p { __attribute__((packed, aligned(1))) ull i : 1; EXTRA };
>> struct S2p { __attribute__((packed, aligned(2))) ull i : 1; EXTRA };
>> struct S4p { __attribute__((packed, aligned(4))) ull i : 1; EXTRA };
>> struct S8p { __attribute__((packed, aligned(8))) ull i : 1; EXTRA };
>> struct S16p { __attribute__((packed, aligned(16))) ull i : 1; EXTRA };
>> 
>> int f1(int a, struct S1 s) { return s.i; }
>> int f2(int a, struct S2 s) { return s.i; }
>> int f4(int a, struct S4 s) { return s.i; }
>> int f8(int a, struct S8 s) { return s.i; }
>> int f16(int a, struct S16 s) { return s.i; }
>> 
>> int fp(int a, struct Sp s) { return s.i; }
>> int f1p(int a, struct S1p s) { return s.i; }
>> int f2p(int a, struct S2p s) { return s.i; }
>> int f4p(int a, struct S4p s) { return s.i; }
>> int f8p(int a, struct S8p s) { return s.i; }
>> int f16p(int a, struct S16p s) { return s.i; }
>> ---------------------------------------------------------------------
>> 
>> there are at least 4 interesting cases:
>> 
>>    {-DALIGN=8,-DALIGN=16} x {-DEXTRA=,}
>> 
>>  From my reading of the ABI, clang gets all of them right.
> Which sections of the ABI? I've re-read AAPCS64 (5.9.4 bit-fields 
> subdivision, 8.1.8 bit-fields), and these specific cases are not clear 
> to me :-(

Hope the above answers this.

>> The case GCC currently gets wrong is:
>> 
>>    fp f1p f2p f4p f8p for -DALIGN=16   [A]
>> 
>> f1 to f16 are equivalent for -DALIGN=16: all the structures have 16-byte
>> alignment, despite the user alignments on the fields.  We currently
>> handle those correctly.
>> 
>> Unfortunately, fixing [A] means that this is an ABI break from GCC 12,
>> so we'll need (yet another) -Wpsabi warning.  In this case I guess we're
>> actually restoring the pre-GCC 9.1 behaviour.
> gasp! I hoped we'd be safe as this is a bug fix ;-)
>
> Do you mean that commit r9-5650-gc590597c45948c was in fact a mistake?

No, it fixed many cases.  I just think [A] were caught by accident,
due to the AST representation being difficult to use.

> I've been looking at it and at r11-8226-g49813aad3292f7, it's a bit 
> convoluted :-)

Yeah, it's not going to be pretty.  The current code warns about
cases where the alignment has been bumped from 64 bits to 128 bits.
Here we'll be warning about the opposite direction (if taking GCC 12
as a baseline).

Thanks,
Richard

>
>
>> 
>> An interesting oddity with these rules is that for:
>> 
>> ---------------------------------------------------------------------
>> struct S1 { unsigned long long a, b; } __attribute__((aligned(16)));
>> struct S2 { struct S1 s; };
>> 
>> int f1(int a, struct S1 s1) { return s1.a; }
>> int f2(int a, struct S2 s2) { return s2.s.a; }
>> ---------------------------------------------------------------------
>> 
>> S1 is not treated as 16-byte aligned, but S2 is (because the analysis
>> isn't recursive).  Clang and GCC seem to be consistent about that though.
>> 
>> Thanks,
>> Richard
>> 
>>>   	  bitfield_alignment
>>>   	    = std::max (bitfield_alignment,
>>>   			TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)));
>>> diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-17.c b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-17.c
>>> new file mode 100644
>>> index 0000000000000000000000000000000000000000..24895c3ab48309b601f6f22c176f1e52350c2257
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-17.c
>>> @@ -0,0 +1,105 @@
>>> +/* Test AAPCS64 layout and __builtin_va_arg.
>>> +
>>> +   This test covers a corner case where a composite type parameter fits in one
>>> +   register: we do not need a double-word alignment when accessing it in the
>>> +   va_arg stack area.  */
>>> +
>>> +/* { dg-do run { target aarch64*-*-* } } */
>>> +
>>> +#ifndef IN_FRAMEWORK
>>> +#define AAPCS64_TEST_STDARG
>>> +#define TESTFILE "va_arg-17.c"
>>> +#include "type-def.h"
>>> +
>>> +enum E6 { e6_0, e6_1, e6_2, e6_3, e6_65533 = 65533, e6_65534, e6_65535 };
>>> +typedef enum E6 Tal16E6 __attribute__((aligned (16)));
>>> +typedef unsigned int Tuint;
>>> +
>>> +int fails;
>>> +
>>> +union S2844 {
>>> +  Tuint a:((((10) - 1) & 31) + 1);
>>> +  Tal16E6 __attribute__((aligned (2), packed)) b:31;
>>> +  struct{}c[0];
>>> +} ;
>>> +union S2844 s2844;
>>> +union S2844 a2844[5];
>>> +
>>> +#define HAS_DATA_INIT_FUNC
>>> +void init_data ()
>>> +{
>>> +  memset (&s2844, '\0', sizeof (s2844));
>>> +  memset (a2844, '\0', sizeof (a2844));
>>> +  s2844.a = 799U;
>>> +  a2844[2].a = 586U;
>>> +}
>>> +
>>> +#include "abitest.h"
>>> +#else
>>> +  ARG       (int          , 1        , W0 , LAST_NAMED_ARG_ID)
>>> +  DOTS
>>> +  ANON_PROMOTED  (float   , 1.0f, double, 1.0, D0, 1)
>>> +  ANON      (union S2844  , s2844    , X1 , 2)
>>> +  ANON      (long long    , 2LL      , X2 , 3)
>>> +  ANON      (union  S2844 , a2844[2] , X3 , 4)
>>> +  LAST_ANON (union  S2844 , a2844[2] , X4 , 5)
>>> +#endif
>>> +
>>> +#if 0
>>> +  /* This test is derived from a case generated by struct-layout-1.exp:  */
>>> +
>>> +enum E6 { e6_0, e6_1, e6_2, e6_3, e6_65533 = 65533, e6_65534, e6_65535 };
>>> +typedef enum E6 Tal16E6 __attribute__((aligned (16)));
>>> +typedef unsigned int Tuint;
>>> +
>>> +int fails;
>>> +
>>> +union S2844 {
>>> +  Tuint a:((((10) - 1) & 31) + 1);
>>> +  Tal16E6 __attribute__((aligned (2), packed)) b:31;
>>> +  struct{}c[0];
>>> +} ;
>>> +union S2844 s2844;
>>> +union S2844 a2844[5];
>>> +
>>> +typedef __builtin_va_list __gnuc_va_list;
>>> +typedef __gnuc_va_list va_list;
>>> +
>>> +void check2844va (int z, ...) {
>>> +  union S2844 arg, *p;
>>> +  va_list ap;
>>> +
>>> +  __builtin_va_start(ap,z);
>>> +  if (__builtin_va_arg(ap,double) != 1.0)
>>> +    printf ("fail %d.%d\n", 2844, 0), ++fails;
>>> +
>>> +  p = &s2844;
>>> +  arg = __builtin_va_arg(ap,union S2844);  /* This would fail.  */
>>> +  if (p->a != arg.a)
>>> +    printf ("fail %d.%d\n", 2844, 1), ++fails;
>>> +
>>> +  if (__builtin_va_arg(ap,long long) != 3LL)
>>> +    printf ("fail %d.%d\n", 2844, 2), ++fails;
>>> +
>>> +  p = &a2844[2];
>>> +  arg = __builtin_va_arg(ap,union S2844);  /* This would fail.  */
>>> +  if (p->a != arg.a)
>>> +    printf ("fail %d.%d\n", 2844, 3), ++fails;
>>> +
>>> +  arg = __builtin_va_arg(ap,union S2844);  /* This would fail.  */
>>> +  if (p->a != arg.a)
>>> +    printf ("fail %d.%d\n", 2844, 4), ++fails;
>>> +
>>> +  __builtin_va_end(ap);
>>> +}
>>> +
>>> +int main (void) {
>>> +  int i, j;
>>> +  memset (&s2844, '\0', sizeof (s2844));
>>> +  memset (a2844, '\0', sizeof (a2844));
>>> +  s2844.a = 799U;
>>> +  a2844[2].a = 586U;
>>> +  check2844va (1, 1.0, s2844, 2LL, a2844[2], a2844[2]);
>>> +  exit (fails != 0);
>>> +}
>>> +#endif /* 0 */
>>> diff --git a/gcc/testsuite/gcc.target/aarch64/pr105549.c b/gcc/testsuite/gcc.target/aarch64/pr105549.c
>>> new file mode 100644
>>> index 0000000000000000000000000000000000000000..55a91ed6bc48b56eed61d668971e890dbd3250ef
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/aarch64/pr105549.c
>>> @@ -0,0 +1,12 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-options "-save-temps" } */
>>> +
>>> +enum e { E1 };
>>> +typedef enum e e __attribute__((aligned(16)));
>>> +union u {
>>> +    __attribute__((aligned(2), packed)) e a : 1;
>>> +    int x[4];
>>> +};
>>> +union u g(int a, union u u2) { return u2; }
>>> +
>>> +/* { dg-final { scan-assembler "stp\tx1, x2, \\\[sp, 8\\\]" } } */
  
Christophe Lyon June 9, 2022, 9:40 a.m. UTC | #4
On 6/8/22 15:19, Richard Sandiford wrote:
> Christophe Lyon <christophe.lyon@arm.com> writes:
>> On 6/7/22 19:44, Richard Sandiford wrote:
>>> Christophe Lyon via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
>>>> While working on enabling DFP for AArch64, I noticed new failures in
>>>> gcc.dg/compat/struct-layout-1.exp (t028) which were not actually
>>>> caused by DFP types handling. These tests are generated during 'make
>>>> check' and enabling DFP made generation different (not sure if new
>>>> non-DFP tests are generated, or if existing ones are generated
>>>> differently, the tests in question are huge and difficult to compare).
>>>>
>>>> Anyway, I reduced the problem to what I attach at the end of the new
>>>> gcc.target/aarch64/aapcs64/va_arg-17.c test and rewrote it in the same
>>>> scheme as other va_arg* AArch64 tests.  Richard Sandiford further
>>>> reduced this to a non-vararg function, added as a second testcase.
>>>>
>>>> This is a tough case mixing bitfields and alignment, where
>>>> aarch64_function_arg_alignment did not follow what its descriptive
>>>> comment says: we want to use the natural alignment of the bitfield
>>>> type only if the user didn't override the alignment for the bitfield
>>>> itself.
>>>>
>>>> The fix is thus very small, and this patch adds two new tests
>>>> (va_arg-17.c and pr105549.c). va_arg-17.c contains the reduced
>>>> offending testcase from struct-layout-1.exp for reference.
>>>>
>>>> We also take the opportunity to fix the comment above
>>>> aarch64_function_arg_alignment since the value of the abi_break
>>>> parameter was changed in a previous commit, no longer match the
>>>> description.
>>>>
>>>> 2022-06-02  Christophe Lyon  <christophe.lyon@arm.com>
>>>>
>>>> 	gcc/
>>>> 	PR target/105549
>>>> 	* config/aarch64/aarch64.cc (aarch64_function_arg_alignment):
>>>> 	Check DECL_USER_ALIGN for bitfield.
>>>>
>>>> 	gcc/testsuite/
>>>> 	PR target/105549
>>>> 	* gcc.target/aarch64/aapcs64/va_arg-17.c: New.
>>>> 	* gcc.target/aarch64/pr105549.c: New.
>>>>
>>>>
>>>> ###############     Attachment also inlined for ease of reply    ###############
>>>>
>>>>
>>>> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
>>>> index 40fc5e633992036a2c06867857a681792178ef00..2c6ccce7cb5dc32097d24514ee525729efb6b7ff 100644
>>>> --- a/gcc/config/aarch64/aarch64.cc
>>>> +++ b/gcc/config/aarch64/aarch64.cc
>>>> @@ -7262,9 +7262,9 @@ aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
>>>>    /* Given MODE and TYPE of a function argument, return the alignment in
>>>>       bits.  The idea is to suppress any stronger alignment requested by
>>>>       the user and opt for the natural alignment (specified in AAPCS64 \S
>>>> -   4.1).  ABI_BREAK is set to true if the alignment was incorrectly
>>>> -   calculated in versions of GCC prior to GCC-9.  This is a helper
>>>> -   function for local use only.  */
>>>> +   4.1).  ABI_BREAK is set to the old alignment if the alignment was
>>>> +   incorrectly calculated in versions of GCC prior to GCC-9.  This is
>>>> +   a helper function for local use only.  */
>>>>    
>>>>    static unsigned int
>>>>    aarch64_function_arg_alignment (machine_mode mode, const_tree type,
>>>> @@ -7304,7 +7304,10 @@ aarch64_function_arg_alignment (machine_mode mode, const_tree type,
>>>>    	   "s" contains only one Fundamental Data Type (the int field)
>>>>    	   but gains 8-byte alignment and size thanks to "e".  */
>>>>    	alignment = std::max (alignment, DECL_ALIGN (field));
>>>> -	if (DECL_BIT_FIELD_TYPE (field))
>>>> +
>>>> +	/* Take bit-field type's alignment into account only if the
>>>> +	   user didn't override this field's alignment.  */
>>>> +	if (DECL_BIT_FIELD_TYPE (field) && !DECL_USER_ALIGN (field))
>>>
>>> I think we need to check DECL_PACKED instead.  On its own, an alignment
>>> attribute on the field can only increase alignment, not decrease it.
>>> In constrast, the packed attribute effectively forces the alignment to
>>> 1 byte, so has an effect even without an alignment attribute.  Adding an
>>> explicit alignment on top can then increase the alignment from 1 to any
>>> value (bigger or smaller than the original underlying type).
>>
>> Right, but the comment before aarch64_function_arg_alignment says:
>>
>> "The idea is to suppress any stronger alignment requested by the user
>> and opt for the natural alignment (specified in AAPCS64 \S 4.1)"
>>
>> When using DECL_PACKED, wouldn't we check the opposite of this (ie. that
>> the user requested a smaller alignment)?   I mean we'd not "suppress
>> stronger alignment" since such cases do not have DECL_PACKED?
> 
> I think "stronger alignment" here means "greater alignment" rather
> than "less alignment".  But in these examples we're dealing with
> alignments of the fields.  I think that part is OK, and that the
> intention is to ignore any greater alignment specified at the structure
> level, independently of the fields.
> 
> In other words, if field list X occupies 16 bytes, then S1 and S2
> below should be handled in the same way as far as register assignment
> is concerned:
> 
>    struct S1 { X };
>    struct S2 { X } __attribute__((aligned(16)));
> 
> The idea is that structures are just a sequence of fields/members
> and don't have any "magic" properties beyond that.
> 
>> However I'm not sure which part of the ABI is mentioned in the comment,
>> in my copy 4.1 is "Design Goals" and does not elaborate on bitfields and
>> parameters.
> 
> Yeah, the section numbers change as new stuff is added, and probably
> also changed in the move to rst.  I think the comment is referring
> to the following, and in particular to the first bullet point under
> "Aggregates":
> 
> --------------------------------------------------------------------------
> Composite Types
> ---------------
> 
> A Composite Type is a collection of one or more Fundamental Data Types that are handled as a single entity at the procedure call level. A Composite Type can be any of:
> 
> - An aggregate, where the members are laid out sequentially in memory (possibly with inter-member padding)
> 
> - A union, where each of the members has the same address
> 
> - An array, which is a repeated sequence of some other type (its base type).
> 
> The definitions are recursive; that is, each of the types may contain a Composite Type as a member.
> 
> *  The *member alignment* of an element of a composite type is the
>     alignment of that member after the application of any language alignment
>     modifiers to that member
> 
> *  The *natural alignment* of a composite type is the maximum of
>     each of the member alignments of the 'top-level' members of the composite
>     type i.e. before any alignment adjustment of the entire composite is
>     applied
> 
> Aggregates
> ^^^^^^^^^^
> 
> - The alignment of an aggregate shall be the alignment of its most-aligned member.
> 
> - The size of an aggregate shall be the smallest multiple of its alignment that is sufficient to hold all of its members.
> --------------------------------------------------------------------------
> 
> So:
> 
> - Types start out with a "natural alignment".  For built-in types this
>    is defined directly in the AAPCS64.  For composite types it is worked
>    out as above (plus similar rules for unions and arrays, snipped above).
> 
> - Non-bitfield members have an alignment that is by default the same as
>    the natural alignment of their type.  However, for C/C++ this can be
>    modified by things like:
> 
>    - an extra alignment applied via a typedef
>    - a packed attribute on the field/member
>    - a packed attribute on the containing struct (which propagates to
>      all fields/members)
>    - a pack pragma
>    - an alignment attribute on the field/member
>    - probably others too
> 
> - Bitfield members are handled as described separately in the AAPCS64.
> 
>>> E.g. for:
>>>
>>> ---------------------------------------------------------------------
>>> typedef unsigned long long ull __attribute__((aligned(ALIGN)));
>>>
>>> #ifndef EXTRA
>>> #define EXTRA unsigned long long x;
>>> #endif
>>>
>>> struct S1 { __attribute__((aligned(1))) ull i : 1; EXTRA };
>>> struct S2 { __attribute__((aligned(2))) ull i : 1; EXTRA };
>>> struct S4 { __attribute__((aligned(4))) ull i : 1; EXTRA };
>>> struct S8 { __attribute__((aligned(8))) ull i : 1; EXTRA };
>>> struct S16 { __attribute__((aligned(16))) ull i : 1; EXTRA };
>>>
>>> struct Sp { ull i : 1; EXTRA }__attribute__((packed));
>>> struct S1p { __attribute__((packed, aligned(1))) ull i : 1; EXTRA };
>>> struct S2p { __attribute__((packed, aligned(2))) ull i : 1; EXTRA };
>>> struct S4p { __attribute__((packed, aligned(4))) ull i : 1; EXTRA };
>>> struct S8p { __attribute__((packed, aligned(8))) ull i : 1; EXTRA };
>>> struct S16p { __attribute__((packed, aligned(16))) ull i : 1; EXTRA };
>>>
>>> int f1(int a, struct S1 s) { return s.i; }
>>> int f2(int a, struct S2 s) { return s.i; }
>>> int f4(int a, struct S4 s) { return s.i; }
>>> int f8(int a, struct S8 s) { return s.i; }
>>> int f16(int a, struct S16 s) { return s.i; }
>>>
>>> int fp(int a, struct Sp s) { return s.i; }
>>> int f1p(int a, struct S1p s) { return s.i; }
>>> int f2p(int a, struct S2p s) { return s.i; }
>>> int f4p(int a, struct S4p s) { return s.i; }
>>> int f8p(int a, struct S8p s) { return s.i; }
>>> int f16p(int a, struct S16p s) { return s.i; }
>>> ---------------------------------------------------------------------
>>>
>>> there are at least 4 interesting cases:
>>>
>>>     {-DALIGN=8,-DALIGN=16} x {-DEXTRA=,}
>>>
>>>   From my reading of the ABI, clang gets all of them right.
>> Which sections of the ABI? I've re-read AAPCS64 (5.9.4 bit-fields
>> subdivision, 8.1.8 bit-fields), and these specific cases are not clear
>> to me :-(
> 
> Hope the above answers this.

It helps, thanks. However....

> 
>>> The case GCC currently gets wrong is:
>>>
>>>     fp f1p f2p f4p f8p for -DALIGN=16   [A]
>>>
>>> f1 to f16 are equivalent for -DALIGN=16: all the structures have 16-byte
>>> alignment, despite the user alignments on the fields.  We currently
>>> handle those correctly.

.... I replaced !DECL_USER_ALIGN (field) with DECL_PACKED (field) and I 
noticed no change with the testcase you provided.

In more details, with ALIGN=8, we currently generate:
str     x1, [sp] for f1, f2, f4, f8, f8p
stp     x2, x3, [sp] for f16, f16p
strb    w1, [sp, 8] for fp, f1p
strh    w1, [sp, 8] for f2p
str     w1, [sp, 8] for f4p

with ALIGN=16:
stp     x2, x3, [sp] for f1, f2, f4, f8, f16, f16p
strb    w1, [sp, 8] for fp, f1p
strh    w1, [sp, 8] for f2p
str     w1, [sp, 8] for f4p
str     x1, [sp]  for f8p

My patch has no impact on the ALIGN=8 case (as expected), and with 
ALIGN=16 it replaces
stp     x2, x3, [sp]  with
stp     x1, x2, [sp]
in f1, f2, f4, f8

Isn't this what we want?

>>> Unfortunately, fixing [A] means that this is an ABI break from GCC 12,
>>> so we'll need (yet another) -Wpsabi warning.  In this case I guess we're
>>> actually restoring the pre-GCC 9.1 behaviour.
>> gasp! I hoped we'd be safe as this is a bug fix ;-)
>>
>> Do you mean that commit r9-5650-gc590597c45948c was in fact a mistake?
> 
> No, it fixed many cases.  I just think [A] were caught by accident,
> due to the AST representation being difficult to use.
> 
>> I've been looking at it and at r11-8226-g49813aad3292f7, it's a bit
>> convoluted :-)
> 
> Yeah, it's not going to be pretty.  The current code warns about
> cases where the alignment has been bumped from 64 bits to 128 bits.
> Here we'll be warning about the opposite direction (if taking GCC 12
> as a baseline).
> 
> Thanks,
> Richard
> 
>>
>>
>>>
>>> An interesting oddity with these rules is that for:
>>>
>>> ---------------------------------------------------------------------
>>> struct S1 { unsigned long long a, b; } __attribute__((aligned(16)));
>>> struct S2 { struct S1 s; };
>>>
>>> int f1(int a, struct S1 s1) { return s1.a; }
>>> int f2(int a, struct S2 s2) { return s2.s.a; }
>>> ---------------------------------------------------------------------
>>>
>>> S1 is not treated as 16-byte aligned, but S2 is (because the analysis
>>> isn't recursive).  Clang and GCC seem to be consistent about that though.
>>>
>>> Thanks,
>>> Richard
>>>
>>>>    	  bitfield_alignment
>>>>    	    = std::max (bitfield_alignment,
>>>>    			TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)));
>>>> diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-17.c b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-17.c
>>>> new file mode 100644
>>>> index 0000000000000000000000000000000000000000..24895c3ab48309b601f6f22c176f1e52350c2257
>>>> --- /dev/null
>>>> +++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-17.c
>>>> @@ -0,0 +1,105 @@
>>>> +/* Test AAPCS64 layout and __builtin_va_arg.
>>>> +
>>>> +   This test covers a corner case where a composite type parameter fits in one
>>>> +   register: we do not need a double-word alignment when accessing it in the
>>>> +   va_arg stack area.  */
>>>> +
>>>> +/* { dg-do run { target aarch64*-*-* } } */
>>>> +
>>>> +#ifndef IN_FRAMEWORK
>>>> +#define AAPCS64_TEST_STDARG
>>>> +#define TESTFILE "va_arg-17.c"
>>>> +#include "type-def.h"
>>>> +
>>>> +enum E6 { e6_0, e6_1, e6_2, e6_3, e6_65533 = 65533, e6_65534, e6_65535 };
>>>> +typedef enum E6 Tal16E6 __attribute__((aligned (16)));
>>>> +typedef unsigned int Tuint;
>>>> +
>>>> +int fails;
>>>> +
>>>> +union S2844 {
>>>> +  Tuint a:((((10) - 1) & 31) + 1);
>>>> +  Tal16E6 __attribute__((aligned (2), packed)) b:31;
>>>> +  struct{}c[0];
>>>> +} ;
>>>> +union S2844 s2844;
>>>> +union S2844 a2844[5];
>>>> +
>>>> +#define HAS_DATA_INIT_FUNC
>>>> +void init_data ()
>>>> +{
>>>> +  memset (&s2844, '\0', sizeof (s2844));
>>>> +  memset (a2844, '\0', sizeof (a2844));
>>>> +  s2844.a = 799U;
>>>> +  a2844[2].a = 586U;
>>>> +}
>>>> +
>>>> +#include "abitest.h"
>>>> +#else
>>>> +  ARG       (int          , 1        , W0 , LAST_NAMED_ARG_ID)
>>>> +  DOTS
>>>> +  ANON_PROMOTED  (float   , 1.0f, double, 1.0, D0, 1)
>>>> +  ANON      (union S2844  , s2844    , X1 , 2)
>>>> +  ANON      (long long    , 2LL      , X2 , 3)
>>>> +  ANON      (union  S2844 , a2844[2] , X3 , 4)
>>>> +  LAST_ANON (union  S2844 , a2844[2] , X4 , 5)
>>>> +#endif
>>>> +
>>>> +#if 0
>>>> +  /* This test is derived from a case generated by struct-layout-1.exp:  */
>>>> +
>>>> +enum E6 { e6_0, e6_1, e6_2, e6_3, e6_65533 = 65533, e6_65534, e6_65535 };
>>>> +typedef enum E6 Tal16E6 __attribute__((aligned (16)));
>>>> +typedef unsigned int Tuint;
>>>> +
>>>> +int fails;
>>>> +
>>>> +union S2844 {
>>>> +  Tuint a:((((10) - 1) & 31) + 1);
>>>> +  Tal16E6 __attribute__((aligned (2), packed)) b:31;
>>>> +  struct{}c[0];
>>>> +} ;
>>>> +union S2844 s2844;
>>>> +union S2844 a2844[5];
>>>> +
>>>> +typedef __builtin_va_list __gnuc_va_list;
>>>> +typedef __gnuc_va_list va_list;
>>>> +
>>>> +void check2844va (int z, ...) {
>>>> +  union S2844 arg, *p;
>>>> +  va_list ap;
>>>> +
>>>> +  __builtin_va_start(ap,z);
>>>> +  if (__builtin_va_arg(ap,double) != 1.0)
>>>> +    printf ("fail %d.%d\n", 2844, 0), ++fails;
>>>> +
>>>> +  p = &s2844;
>>>> +  arg = __builtin_va_arg(ap,union S2844);  /* This would fail.  */
>>>> +  if (p->a != arg.a)
>>>> +    printf ("fail %d.%d\n", 2844, 1), ++fails;
>>>> +
>>>> +  if (__builtin_va_arg(ap,long long) != 3LL)
>>>> +    printf ("fail %d.%d\n", 2844, 2), ++fails;
>>>> +
>>>> +  p = &a2844[2];
>>>> +  arg = __builtin_va_arg(ap,union S2844);  /* This would fail.  */
>>>> +  if (p->a != arg.a)
>>>> +    printf ("fail %d.%d\n", 2844, 3), ++fails;
>>>> +
>>>> +  arg = __builtin_va_arg(ap,union S2844);  /* This would fail.  */
>>>> +  if (p->a != arg.a)
>>>> +    printf ("fail %d.%d\n", 2844, 4), ++fails;
>>>> +
>>>> +  __builtin_va_end(ap);
>>>> +}
>>>> +
>>>> +int main (void) {
>>>> +  int i, j;
>>>> +  memset (&s2844, '\0', sizeof (s2844));
>>>> +  memset (a2844, '\0', sizeof (a2844));
>>>> +  s2844.a = 799U;
>>>> +  a2844[2].a = 586U;
>>>> +  check2844va (1, 1.0, s2844, 2LL, a2844[2], a2844[2]);
>>>> +  exit (fails != 0);
>>>> +}
>>>> +#endif /* 0 */
>>>> diff --git a/gcc/testsuite/gcc.target/aarch64/pr105549.c b/gcc/testsuite/gcc.target/aarch64/pr105549.c
>>>> new file mode 100644
>>>> index 0000000000000000000000000000000000000000..55a91ed6bc48b56eed61d668971e890dbd3250ef
>>>> --- /dev/null
>>>> +++ b/gcc/testsuite/gcc.target/aarch64/pr105549.c
>>>> @@ -0,0 +1,12 @@
>>>> +/* { dg-do compile } */
>>>> +/* { dg-options "-save-temps" } */
>>>> +
>>>> +enum e { E1 };
>>>> +typedef enum e e __attribute__((aligned(16)));
>>>> +union u {
>>>> +    __attribute__((aligned(2), packed)) e a : 1;
>>>> +    int x[4];
>>>> +};
>>>> +union u g(int a, union u u2) { return u2; }
>>>> +
>>>> +/* { dg-final { scan-assembler "stp\tx1, x2, \\\[sp, 8\\\]" } } */
  

Patch

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 40fc5e633992036a2c06867857a681792178ef00..2c6ccce7cb5dc32097d24514ee525729efb6b7ff 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -7262,9 +7262,9 @@  aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
 /* Given MODE and TYPE of a function argument, return the alignment in
    bits.  The idea is to suppress any stronger alignment requested by
    the user and opt for the natural alignment (specified in AAPCS64 \S
-   4.1).  ABI_BREAK is set to true if the alignment was incorrectly
-   calculated in versions of GCC prior to GCC-9.  This is a helper
-   function for local use only.  */
+   4.1).  ABI_BREAK is set to the old alignment if the alignment was
+   incorrectly calculated in versions of GCC prior to GCC-9.  This is
+   a helper function for local use only.  */
 
 static unsigned int
 aarch64_function_arg_alignment (machine_mode mode, const_tree type,
@@ -7304,7 +7304,10 @@  aarch64_function_arg_alignment (machine_mode mode, const_tree type,
 	   "s" contains only one Fundamental Data Type (the int field)
 	   but gains 8-byte alignment and size thanks to "e".  */
 	alignment = std::max (alignment, DECL_ALIGN (field));
-	if (DECL_BIT_FIELD_TYPE (field))
+
+	/* Take bit-field type's alignment into account only if the
+	   user didn't override this field's alignment.  */
+	if (DECL_BIT_FIELD_TYPE (field) && !DECL_USER_ALIGN (field))
 	  bitfield_alignment
 	    = std::max (bitfield_alignment,
 			TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)));
diff --git a/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-17.c b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-17.c
new file mode 100644
index 0000000000000000000000000000000000000000..24895c3ab48309b601f6f22c176f1e52350c2257
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/aapcs64/va_arg-17.c
@@ -0,0 +1,105 @@ 
+/* Test AAPCS64 layout and __builtin_va_arg.
+
+   This test covers a corner case where a composite type parameter fits in one
+   register: we do not need a double-word alignment when accessing it in the
+   va_arg stack area.  */
+
+/* { dg-do run { target aarch64*-*-* } } */
+
+#ifndef IN_FRAMEWORK
+#define AAPCS64_TEST_STDARG
+#define TESTFILE "va_arg-17.c"
+#include "type-def.h"
+
+enum E6 { e6_0, e6_1, e6_2, e6_3, e6_65533 = 65533, e6_65534, e6_65535 };
+typedef enum E6 Tal16E6 __attribute__((aligned (16)));
+typedef unsigned int Tuint;
+
+int fails;
+
+union S2844 {
+  Tuint a:((((10) - 1) & 31) + 1);
+  Tal16E6 __attribute__((aligned (2), packed)) b:31;
+  struct{}c[0];
+} ;
+union S2844 s2844;
+union S2844 a2844[5];
+
+#define HAS_DATA_INIT_FUNC
+void init_data ()
+{
+  memset (&s2844, '\0', sizeof (s2844));
+  memset (a2844, '\0', sizeof (a2844));
+  s2844.a = 799U;
+  a2844[2].a = 586U;
+}
+
+#include "abitest.h"
+#else
+  ARG       (int          , 1        , W0 , LAST_NAMED_ARG_ID)
+  DOTS
+  ANON_PROMOTED  (float   , 1.0f, double, 1.0, D0, 1)
+  ANON      (union S2844  , s2844    , X1 , 2)
+  ANON      (long long    , 2LL      , X2 , 3)
+  ANON      (union  S2844 , a2844[2] , X3 , 4)
+  LAST_ANON (union  S2844 , a2844[2] , X4 , 5)
+#endif
+
+#if 0
+  /* This test is derived from a case generated by struct-layout-1.exp:  */
+
+enum E6 { e6_0, e6_1, e6_2, e6_3, e6_65533 = 65533, e6_65534, e6_65535 };
+typedef enum E6 Tal16E6 __attribute__((aligned (16)));
+typedef unsigned int Tuint;
+
+int fails;
+
+union S2844 {
+  Tuint a:((((10) - 1) & 31) + 1);
+  Tal16E6 __attribute__((aligned (2), packed)) b:31;
+  struct{}c[0];
+} ;
+union S2844 s2844;
+union S2844 a2844[5];
+
+typedef __builtin_va_list __gnuc_va_list;
+typedef __gnuc_va_list va_list;
+
+void check2844va (int z, ...) {
+  union S2844 arg, *p;
+  va_list ap;
+
+  __builtin_va_start(ap,z);
+  if (__builtin_va_arg(ap,double) != 1.0)
+    printf ("fail %d.%d\n", 2844, 0), ++fails;
+
+  p = &s2844;
+  arg = __builtin_va_arg(ap,union S2844);  /* This would fail.  */
+  if (p->a != arg.a)
+    printf ("fail %d.%d\n", 2844, 1), ++fails;
+
+  if (__builtin_va_arg(ap,long long) != 3LL)
+    printf ("fail %d.%d\n", 2844, 2), ++fails;
+
+  p = &a2844[2];
+  arg = __builtin_va_arg(ap,union S2844);  /* This would fail.  */
+  if (p->a != arg.a)
+    printf ("fail %d.%d\n", 2844, 3), ++fails;
+
+  arg = __builtin_va_arg(ap,union S2844);  /* This would fail.  */
+  if (p->a != arg.a)
+    printf ("fail %d.%d\n", 2844, 4), ++fails;
+
+  __builtin_va_end(ap);
+}
+
+int main (void) {
+  int i, j;
+  memset (&s2844, '\0', sizeof (s2844));
+  memset (a2844, '\0', sizeof (a2844));
+  s2844.a = 799U;
+  a2844[2].a = 586U;
+  check2844va (1, 1.0, s2844, 2LL, a2844[2], a2844[2]);
+  exit (fails != 0);
+}
+#endif /* 0 */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr105549.c b/gcc/testsuite/gcc.target/aarch64/pr105549.c
new file mode 100644
index 0000000000000000000000000000000000000000..55a91ed6bc48b56eed61d668971e890dbd3250ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr105549.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-options "-save-temps" } */
+
+enum e { E1 };
+typedef enum e e __attribute__((aligned(16)));
+union u {
+    __attribute__((aligned(2), packed)) e a : 1;
+    int x[4];
+};
+union u g(int a, union u u2) { return u2; }
+
+/* { dg-final { scan-assembler "stp\tx1, x2, \\\[sp, 8\\\]" } } */