Fall back to elementwise access for too spaced SLP single element interleaving

Message ID 20240920112404.6D47013AE1@imap1.dmz-prg2.suse.org
State New
Headers
Series Fall back to elementwise access for too spaced SLP single element interleaving |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm warning Patch is already merged
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 warning Patch is already merged

Commit Message

Richard Biener Sept. 20, 2024, 11:23 a.m. UTC
  gcc.dg/vect/vect-pr111779.c is a case where non-SLP manages to vectorize
using VMAT_ELEMENTWISE but SLP currently refuses because doing a regular
access with permutes would cause excess vector loads with at most one
element used.  The following makes us fall back to elementwise accesses
for that, too.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

	* tree-vect-stmts.cc (get_group_load_store_type): Fall back
	to VMAT_ELEMENTWISE when single element interleaving of
	a too large group.
	(vectorizable_load): Do not try to verify load permutations
	when using VMAT_ELEMENTWISE for single-lane SLP and fix code
	generation for this case.
---
 gcc/tree-vect-stmts.cc | 37 ++++++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 15 deletions(-)
  

Patch

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 33cdccae784..45003f762dd 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -2190,11 +2190,12 @@  get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
 	      && single_element_p
 	      && maybe_gt (group_size, TYPE_VECTOR_SUBPARTS (vectype)))
 	    {
+	      *memory_access_type = VMAT_ELEMENTWISE;
 	      if (dump_enabled_p ())
 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 				 "single-element interleaving not supported "
-				 "for not adjacent vector loads\n");
-	      return false;
+				 "for not adjacent vector loads, using "
+				 "elementwise access\n");
 	    }
 	}
     }
@@ -10039,7 +10040,23 @@  vectorizable_load (vec_info *vinfo,
   else
     group_size = 1;
 
-  if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
+  vect_memory_access_type memory_access_type;
+  enum dr_alignment_support alignment_support_scheme;
+  int misalignment;
+  poly_int64 poffset;
+  internal_fn lanes_ifn;
+  if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
+			    ncopies, &memory_access_type, &poffset,
+			    &alignment_support_scheme, &misalignment, &gs_info,
+			    &lanes_ifn))
+    return false;
+
+  /* ???  The following checks should really be part of
+     get_group_load_store_type.  */
+  if (slp
+      && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
+      && !(memory_access_type == VMAT_ELEMENTWISE
+	   && SLP_TREE_LANES (slp_node) == 1))
     {
       slp_perm = true;
 
@@ -10079,17 +10096,6 @@  vectorizable_load (vec_info *vinfo,
 	}
     }
 
-  vect_memory_access_type memory_access_type;
-  enum dr_alignment_support alignment_support_scheme;
-  int misalignment;
-  poly_int64 poffset;
-  internal_fn lanes_ifn;
-  if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
-			    ncopies, &memory_access_type, &poffset,
-			    &alignment_support_scheme, &misalignment, &gs_info,
-			    &lanes_ifn))
-    return false;
-
   if (slp_node
       && slp_node->ldst_lanes
       && memory_access_type != VMAT_LOAD_STORE_LANES)
@@ -10292,7 +10298,8 @@  vectorizable_load (vec_info *vinfo,
 	  first_dr_info = dr_info;
 	}
 
-      if (slp && grouped_load)
+      if (slp && grouped_load
+	  && memory_access_type == VMAT_STRIDED_SLP)
 	{
 	  group_size = DR_GROUP_SIZE (first_stmt_info);
 	  ref_type = get_group_alias_ptr_type (first_stmt_info);