vect: Generalise vect_add_slp_permutation [PR122793]

Message ID 20260108142914.D4BC24BA2E1D@sourceware.org
State New
Headers
Series vect: Generalise vect_add_slp_permutation [PR122793] |

Commit Message

Richard Biener Jan. 8, 2026, 2:28 p.m. UTC
  From: Richard Sandiford <rdsandiford@googlemail.com>

The problem seems to be with a packing permutation:

    op0[4] op0[5] op0[6] op0[7]

and with the identity_offset parameter to vect_add_slp_permutation.

Both the repeating_p and !repeating_p paths correctly realise that this
permutation reduces to an identity.  But the !repeating_p path ends up with
first_node and second_node both set to the second VEC_PERM_EXPR operand
(since that path works elementwise, and since no elements are taken from
the first input).  Therefore, the call:

      vect_add_slp_permutation (vinfo, gsi, node, first_def,
				second_def, mask_vec, mask[0]);

works regardless of whether vect_add_slp_permutation picks first_def or
second_def.  In that sense, the parameters to vect_add_slp_permutation are
already “canonical”.

The repeating_p path instead passes vector 2N as first_def and vector 2N+1
as second_def, with mask[0] indicating the position of the identity within
the concatenation of first_def and second_def.  However,
vect_add_slp_permutation doesn't expect this and instead ignores the
identity_offset parameter.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

	PR tree-optimization/122793
	* tree-vect-slp.cc (vect_add_slp_permutation): Document the existing
	identity_offset parameter.  Handle identities that take from the
	second input rather than the first.

	* gcc.dg/vect/vect-pr122793.c: New testcase.

Co-authored-by: Richard Biener <rguenther@suse.de>
---
 gcc/testsuite/gcc.dg/vect/vect-pr122793.c | 28 ++++++++++
 gcc/tree-vect-slp.cc                      | 67 ++++++++++++++---------
 2 files changed, 69 insertions(+), 26 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-pr122793.c
  

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr122793.c b/gcc/testsuite/gcc.dg/vect/vect-pr122793.c
new file mode 100644
index 00000000000..18af3235b84
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-pr122793.c
@@ -0,0 +1,28 @@ 
+/* { dg-additional-options "-msse4" { target sse4_runtime } } */
+
+#include "tree-vect.h"
+
+static void
+foo (unsigned char *d, unsigned char *s, int e, int f)
+{
+  for (int i = 0; i < 4; i++)
+    {
+      d[0] = s[-2];
+      d[5] = (s[5] + s[6]) * 2 - (s[4] + s[7]);
+      d[6] = (s[6] + s[7]) * 2 - (s[5] + s[8]);
+      d[7] = (s[7] + s[8]) * 2 - (s[6] + s[9]);
+      d += e;
+      s += f;
+    }
+}
+
+unsigned char s[128] = { 2 }, d[128];
+
+int
+main ()
+{
+  check_vect ();
+  foo (d, s + 2, 16, 16);
+  if (d[5] != 0)
+    __builtin_abort ();
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 4e01adf4cd3..e4e0320c678 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -11365,7 +11365,11 @@  vect_transform_slp_perm_load (vec_info *vinfo,
 
    otherwise add:
 
-      <new SSA name> = FIRST_DEF.  */
+      <new SSA name> = VEC_PERM_EXPR <FIRST_DEF, SECOND_DEF,
+				      { N, N+1, N+2, ... }>
+
+   where N == IDENTITY_OFFSET which is either zero or equal to the
+   number of elements of the result.  */
 
 static void
 vect_add_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
@@ -11405,36 +11409,47 @@  vect_add_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
 				       first_def, second_def,
 				       mask_vec);
     }
-  else if (!types_compatible_p (TREE_TYPE (first_def), vectype))
+  else
     {
-      /* For identity permutes we still need to handle the case
-	 of offsetted extracts or concats.  */
-      unsigned HOST_WIDE_INT c;
-      auto first_def_nunits
-	= TYPE_VECTOR_SUBPARTS (TREE_TYPE (first_def));
-      if (known_le (TYPE_VECTOR_SUBPARTS (vectype), first_def_nunits))
+      auto def_nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE (first_def));
+      unsigned HOST_WIDE_INT vecno;
+      poly_uint64 eltno;
+      if (!can_div_trunc_p (poly_uint64 (identity_offset), def_nunits,
+			    &vecno, &eltno))
+	gcc_unreachable ();
+      tree def = vecno & 1 ? second_def : first_def;
+      if (!types_compatible_p (TREE_TYPE (def), vectype))
 	{
-	  unsigned HOST_WIDE_INT elsz
-	    = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (TREE_TYPE (first_def))));
-	  tree lowpart = build3 (BIT_FIELD_REF, vectype, first_def,
-				 TYPE_SIZE (vectype),
-				 bitsize_int (identity_offset * elsz));
-	  perm_stmt = gimple_build_assign (perm_dest, lowpart);
+	  /* For identity permutes we still need to handle the case
+	     of offsetted extracts or concats.  */
+	  unsigned HOST_WIDE_INT c;
+	  if (known_le (TYPE_VECTOR_SUBPARTS (vectype), def_nunits))
+	    {
+	      unsigned HOST_WIDE_INT elsz
+		= tree_to_uhwi (TYPE_SIZE (TREE_TYPE (TREE_TYPE (def))));
+	      tree lowpart = build3 (BIT_FIELD_REF, vectype, def,
+				     TYPE_SIZE (vectype),
+				     bitsize_int (eltno * elsz));
+	      perm_stmt = gimple_build_assign (perm_dest, lowpart);
+	    }
+	  else if (constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype),
+					def_nunits, &c) && c == 2)
+	    {
+	      gcc_assert (known_eq (identity_offset, 0U));
+	      tree ctor = build_constructor_va (vectype, 2,
+						NULL_TREE, first_def,
+						NULL_TREE, second_def);
+	      perm_stmt = gimple_build_assign (perm_dest, ctor);
+	    }
+	  else
+	    gcc_unreachable ();
 	}
-      else if (constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype),
-				    first_def_nunits, &c) && c == 2)
+      else
 	{
-	  tree ctor = build_constructor_va (vectype, 2, NULL_TREE, first_def,
-					    NULL_TREE, second_def);
-	  perm_stmt = gimple_build_assign (perm_dest, ctor);
+	  /* We need a copy here in case the def was external.  */
+	  gcc_assert (known_eq (eltno, 0U));
+	  perm_stmt = gimple_build_assign (perm_dest, def);
 	}
-      else
-	gcc_unreachable ();
-    }
-  else
-    {
-      /* We need a copy here in case the def was external.  */
-      perm_stmt = gimple_build_assign (perm_dest, first_def);
     }
   vect_finish_stmt_generation (vinfo, NULL, perm_stmt, gsi);
   /* Store the vector statement in NODE.  */