[v2] tree-optimization/119155 - wrong aligned access for vectorized packed access

Message ID 20250312144735.1A419132CB@imap1.dmz-prg2.suse.org
State Committed
Commit adb14c7625178b501389c2d7d7c2feec37da7a19
Headers
Series [v2] tree-optimization/119155 - wrong aligned access for vectorized packed access |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_gcc_check--master-arm success Test passed
linaro-tcwg-bot/tcwg_simplebootstrap_build--master-aarch64-bootstrap success Build passed
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 success Test passed
linaro-tcwg-bot/tcwg_simplebootstrap_build--master-arm-bootstrap success Build passed
rivoscibot/toolchain-ci-rivos-lint success Lint passed
rivoscibot/toolchain-ci-rivos-apply-patch success Patch applied
rivoscibot/toolchain-ci-rivos-build--newlib-rv64gcv-lp64d-multilib success Build passed
rivoscibot/toolchain-ci-rivos-build--linux-rv64gc_zba_zbb_zbc_zbs-lp64d-multilib success Build passed
rivoscibot/toolchain-ci-rivos-build--linux-rv64gcv-lp64d-multilib success Build passed
rivoscibot/toolchain-ci-rivos-test success Testing passed

Commit Message

Richard Biener March 12, 2025, 2:47 p.m. UTC
  When doing strided SLP vectorization we use the wrong alignment for
the possibly piecewise access of the vector elements for loads and
stores.  While we are carefully using element aligned loads and
stores that isn't enough for the case the original scalar accesses
are packed.  The following instead honors larger alignment when
present but correctly falls back to the original scalar alignment
used.

v2 fixes an issue with vector composition from larger scalar loads
and alignment noted by arm CI.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Richard.

	PR tree-optimization/119155
	* tree-vect-stmts.cc (vectorizable_store): Do not always
	use vector element alignment for VMAT_STRIDED_SLP but
	a more correct alignment towards both ends.
	(vectorizable_load): Likewise.

	* gcc.dg/vect/pr119155.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/pr119155.c | 26 ++++++++++++++++++++++++++
 gcc/tree-vect-stmts.cc               | 25 +++++++++++++++++++++----
 2 files changed, 47 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr119155.c
  

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/pr119155.c b/gcc/testsuite/gcc.dg/vect/pr119155.c
new file mode 100644
index 00000000000..b860cf24b0f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr119155.c
@@ -0,0 +1,26 @@ 
+#include <stdlib.h>
+#include "tree-vect.h"
+
+struct s { int x; } __attribute__((packed));
+
+void __attribute__((noipa))
+f (char *xc, char *yc, int z)
+{
+  for (int i = 0; i < 100; ++i)
+    {
+      struct s *x = (struct s *) xc;
+      struct s *y = (struct s *) yc;
+      x->x += y->x;
+      xc += z;
+      yc += z;
+    }
+}
+
+int main ()
+{
+  check_vect ();
+  char *x = malloc (100 * sizeof (struct s) + 1);
+  char *y = malloc (100 * sizeof (struct s) + 1);
+  f (x + 1, y + 1, sizeof (struct s));
+  return 0;
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index f894787f7bf..17e3b1db894 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -8904,7 +8904,15 @@  vectorizable_store (vec_info *vinfo,
 		    }
 		}
 	    }
-	  ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
+	  unsigned align;
+	  if (alignment_support_scheme == dr_aligned)
+	    align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
+	  else
+	    align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
+	  /* Alignment is at most the access size if we do multiple stores.  */
+	  if (nstores > 1)
+	    align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);
+	  ltype = build_aligned_type (ltype, align * BITS_PER_UNIT);
 	  ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
 	}
 
@@ -10851,7 +10859,7 @@  vectorizable_load (vec_info *vinfo,
 						  &ptype);
 	      if (vtype != NULL_TREE)
 		{
-		  dr_alignment_support dr_align = dr_aligned;
+		  dr_alignment_support dr_align;
 		  int mis_align = 0;
 		  if (VECTOR_TYPE_P (ptype))
 		    {
@@ -10860,6 +10868,8 @@  vectorizable_load (vec_info *vinfo,
 			= vect_supportable_dr_alignment (vinfo, dr_info, ptype,
 							 mis_align);
 		    }
+		  else
+		    dr_align = dr_unaligned_supported;
 		  if (dr_align == dr_aligned
 		      || dr_align == dr_unaligned_supported)
 		    {
@@ -10872,8 +10882,15 @@  vectorizable_load (vec_info *vinfo,
 		    }
 		}
 	    }
-	  /* Else fall back to the default element-wise access.  */
-	  ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
+	  unsigned align;
+	  if (alignment_support_scheme == dr_aligned)
+	    align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
+	  else
+	    align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
+	  /* Alignment is at most the access size if we do multiple loads.  */
+	  if (nloads > 1)
+	    align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);
+	  ltype = build_aligned_type (ltype, align * BITS_PER_UNIT);
 	}
 
       if (slp)