tree-optimization/113576 - non-empty latch and may_be_zero vectorization

Message ID 20240124144159.51C50385841A@sourceware.org
State New
Headers
Series tree-optimization/113576 - non-empty latch and may_be_zero vectorization |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gcc_check--master-arm fail Testing failed
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 fail Testing failed

Commit Message

Richard Biener Jan. 24, 2024, 2:40 p.m. UTC
  We can't support niters with may_be_zero when we end up with a
non-empty latch due to early exit peeling.  At least not in
the simplistic way the vectorizer handles this now.  Disallow
it again for exits that are not the last one.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

	PR tree-optimization/113576
	* tree-vect-loop.cc (vec_init_loop_exit_info): Only allow
	exits with may_be_zero niters when its the last one.

	* gcc.dg/vect/pr113576.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/pr113576.c | 157 +++++++++++++++++++++++++++
 gcc/tree-vect-loop.cc                |   9 +-
 2 files changed, 164 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr113576.c
  

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/pr113576.c b/gcc/testsuite/gcc.dg/vect/pr113576.c
new file mode 100644
index 00000000000..da5ddb09e33
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr113576.c
@@ -0,0 +1,157 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+/* { dg-additional-options "-march=skylake-avx512" } */
+
+#include "tree-vect.h"
+
+#include<stdbool.h>
+#include<stdlib.h>
+#include<stddef.h>
+#include<string.h>
+
+#define SBITMAP_ELT_BITS ((unsigned) 64)
+#define SBITMAP_ELT_TYPE unsigned long long
+#define SBITMAP_SIZE_BYTES(BITMAP) ((BITMAP)->size * sizeof (SBITMAP_ELT_TYPE))
+#define do_popcount(x) __builtin_popcountll(x)
+
+typedef struct simple_bitmap_def
+{
+  unsigned char *popcount;      /* Population count.  */
+  unsigned int n_bits;		/* Number of bits.  */
+  unsigned int size;		/* Size in elements.  */
+  SBITMAP_ELT_TYPE elms[1];	/* The elements.  */
+} *sbitmap;
+typedef const struct simple_bitmap_def *const_sbitmap;
+
+/* The iterator for sbitmap.  */
+typedef struct {
+  /* The pointer to the first word of the bitmap.  */
+  const SBITMAP_ELT_TYPE *ptr;
+
+  /* The size of the bitmap.  */
+  unsigned int size;
+
+  /* The current word index.  */
+  unsigned int word_num;
+
+  /* The current bit index (not modulo SBITMAP_ELT_BITS).  */
+  unsigned int bit_num;
+
+  /* The words currently visited.  */
+  SBITMAP_ELT_TYPE word;
+} sbitmap_iterator;
+
+static inline void
+sbitmap_iter_init (sbitmap_iterator *i, const_sbitmap bmp, unsigned int min)
+{
+  i->word_num = min / (unsigned int) SBITMAP_ELT_BITS;
+  i->bit_num = min;
+  i->size = bmp->size;
+  i->ptr = bmp->elms;
+
+  if (i->word_num >= i->size)
+    i->word = 0;
+  else
+    i->word = (i->ptr[i->word_num]
+	       >> (i->bit_num % (unsigned int) SBITMAP_ELT_BITS));
+}
+
+/* Return true if we have more bits to visit, in which case *N is set
+   to the index of the bit to be visited.  Otherwise, return
+   false.  */
+
+static inline bool
+sbitmap_iter_cond (sbitmap_iterator *i, unsigned int *n)
+{
+  /* Skip words that are zeros.  */
+  for (; i->word == 0; i->word = i->ptr[i->word_num])
+    {
+      i->word_num++;
+
+      /* If we have reached the end, break.  */
+      if (i->word_num >= i->size)
+	return false;
+
+      i->bit_num = i->word_num * SBITMAP_ELT_BITS;
+    }
+
+  /* Skip bits that are zero.  */
+  for (; (i->word & 1) == 0; i->word >>= 1)
+    i->bit_num++;
+
+  *n = i->bit_num;
+
+  return true;
+}
+
+/* Advance to the next bit.  */
+
+static inline void
+sbitmap_iter_next (sbitmap_iterator *i)
+{
+  i->word >>= 1;
+  i->bit_num++;
+}
+
+#define SBITMAP_SET_SIZE(N) (((N) + SBITMAP_ELT_BITS - 1) / SBITMAP_ELT_BITS)
+/* Allocate a simple bitmap of N_ELMS bits.  */
+
+sbitmap
+sbitmap_alloc (unsigned int n_elms)
+{
+  unsigned int bytes, size, amt;
+  sbitmap bmap;
+
+  size = SBITMAP_SET_SIZE (n_elms);
+  bytes = size * sizeof (SBITMAP_ELT_TYPE);
+  amt = (sizeof (struct simple_bitmap_def)
+	 + bytes - sizeof (SBITMAP_ELT_TYPE));
+  bmap = (sbitmap) malloc (amt);
+  bmap->n_bits = n_elms;
+  bmap->size = size;
+  bmap->popcount = NULL;
+  return bmap;
+}
+
+#define sbitmap_free(MAP)		(free((MAP)->popcount), free((MAP)))
+/* Loop over all elements of SBITMAP, starting with MIN.  In each
+   iteration, N is set to the index of the bit being visited.  ITER is
+   an instance of sbitmap_iterator used to iterate the bitmap.  */
+
+#define EXECUTE_IF_SET_IN_SBITMAP(SBITMAP, MIN, N, ITER)	\
+  for (sbitmap_iter_init (&(ITER), (SBITMAP), (MIN));		\
+       sbitmap_iter_cond (&(ITER), &(N));			\
+       sbitmap_iter_next (&(ITER)))
+
+int
+__attribute__((noinline))
+sbitmap_first_set_bit (const_sbitmap bmap)
+{
+  unsigned int n = 0;
+  sbitmap_iterator sbi;
+
+  EXECUTE_IF_SET_IN_SBITMAP (bmap, 0, n, sbi)
+    return n;
+  return -1;
+}
+
+void
+sbitmap_zero (sbitmap bmap)
+{
+  memset (bmap->elms, 0, SBITMAP_SIZE_BYTES (bmap));
+  if (bmap->popcount)
+    memset (bmap->popcount, 0, bmap->size * sizeof (unsigned char));
+}
+
+int main ()
+{
+  check_vect ();
+
+  sbitmap tmp = sbitmap_alloc(1856);
+  sbitmap_zero (tmp);
+  int res = sbitmap_first_set_bit (tmp);
+  if (res != -1)
+    abort ();
+  sbitmap_free (tmp);
+  return 0;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index fe631252dc2..e0d02e09d7d 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -991,8 +991,13 @@  vec_init_loop_exit_info (class loop *loop)
 	{
 	  tree may_be_zero = niter_desc.may_be_zero;
 	  if ((integer_zerop (may_be_zero)
-	       || integer_nonzerop (may_be_zero)
-	       || COMPARISON_CLASS_P (may_be_zero))
+	       /* As we are handling may_be_zero that's not false by
+		  rewriting niter to may_be_zero ? 0 : niter we require
+		  an empty latch.  */
+	       || (single_pred_p (loop->latch)
+		   && exit->src == single_pred (loop->latch)
+		   && (integer_nonzerop (may_be_zero)
+		       || COMPARISON_CLASS_P (may_be_zero))))
 	      && (!candidate
 		  || dominated_by_p (CDI_DOMINATORS, exit->src,
 				     candidate->src)))