middle-end: update vuses out of loop which use a vdef that's moved [PR114068]

Message ID patch-18323-tamar@arm.com
State New
Headers
Series middle-end: update vuses out of loop which use a vdef that's moved [PR114068] |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm fail Patch failed to apply
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 fail Patch failed to apply

Commit Message

Tamar Christina Feb. 23, 2024, 5:53 p.m. UTC
  Hi All,

In certain cases we can have a situation where the merge block has a vUSE
virtual PHI and the exits do not.  In this case for instance the exits lead
to an abort so they have no virtual PHIs.  If we have a store before the first
exit and we move it to a later block during vectorization we update the vUSE
chain.

However the merge block is not an exit and is not visited by the update code.

This patch fixes it by checking during moving if there are any out of loop uses
of the vDEF that is the last_seen_vuse.  Normally there wouldn't be any and
things are skipped, but if there is then update that to the last vDEF in the
exit block.

Bootstrapped Regtested on aarch64-none-linux-gnu and
x86_64-pc-linux-gnu no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	PR tree-optimizations/114068
	* tree-vect-loop.cc (move_early_exit_stmts): Update vUSE chain in merge
	block.

gcc/testsuite/ChangeLog:

	PR tree-optimizations/114068
	* gcc.dg/vect/vect-early-break_118-pr114068.c: New test.
	* gcc.dg/vect/vect-early-break_119-pr114068.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_118-pr114068.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_118-pr114068.c
new file mode 100644
index 0000000000000000000000000000000000000000..b462a464b6603e718c5a283513ea586fc13e37ce




--
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_118-pr114068.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_118-pr114068.c
new file mode 100644
index 0000000000000000000000000000000000000000..b462a464b6603e718c5a283513ea586fc13e37ce
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_118-pr114068.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3" } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+struct h {
+  int b;
+  int f;
+} k;
+
+void n(int m) {
+  struct h a = k;
+  for (int o = m; o; ++o) {
+    if (a.f)
+      __builtin_unreachable();
+    if (o > 1)
+      __builtin_unreachable();
+    *(&k.b + o) = 1;
+  }
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_119-pr114068.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_119-pr114068.c
new file mode 100644
index 0000000000000000000000000000000000000000..a65ef7b8c4901b2ada585f38fda436dc07d1e1de
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_119-pr114068.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3" } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+struct h {
+  int b;
+  int c;
+  int f;
+} k;
+
+void n(int m) {
+  struct h a = k;
+  for (int o = m; o; ++o) {
+    if (a.f)
+      __builtin_unreachable();
+    if (o > 1)
+      __builtin_unreachable();
+    *(&k.b + o) = 1;
+    *(&k.c + o*m) = 2;
+  }
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 35f1f8c7d4245135ace7ffff40ff9be548919587..44bd8032b55b1ef84fdf4fa9d6117304b7709d6f 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -11837,6 +11837,27 @@ move_early_exit_stmts (loop_vec_info loop_vinfo)
       update_stmt (p);
     }
 
+  /* last_seen_vuse should now be the PHI in the loop header.  Check for
+     any out of loop uses and update them to the vUSE on the loop latch.  */
+  auto vuse_stmt =  loop_vinfo->lookup_def (last_seen_vuse);
+  gphi *vuse_def;
+  if (vuse_stmt
+      && (vuse_def = dyn_cast <gphi *> (STMT_VINFO_STMT (vuse_stmt))))
+    {
+      imm_use_iterator iter;
+      use_operand_p use_p;
+      gimple *use_stmt;
+      auto loop = LOOP_VINFO_LOOP (loop_vinfo);
+      tree vuse = PHI_ARG_DEF_FROM_EDGE (vuse_def, loop_latch_edge (loop));
+      FOR_EACH_IMM_USE_STMT (use_stmt, iter, last_seen_vuse)
+	{
+	  if (flow_bb_inside_loop_p (loop, use_stmt->bb))
+	    continue;
+	  FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
+	    SET_USE (use_p, vuse);
+	}
+    }
+
   /* And update the LC PHIs on exits.  */
   for (edge e : get_loop_exit_edges (LOOP_VINFO_LOOP  (loop_vinfo)))
     if (!dominated_by_p (CDI_DOMINATORS, e->src, dest_bb))
  

Comments

Richard Biener Feb. 26, 2024, 9:54 a.m. UTC | #1
On Fri, 23 Feb 2024, Tamar Christina wrote:

> Hi All,
> 
> In certain cases we can have a situation where the merge block has a vUSE
> virtual PHI and the exits do not.  In this case for instance the exits lead
> to an abort so they have no virtual PHIs.  If we have a store before the first
> exit and we move it to a later block during vectorization we update the vUSE
> chain.
> 
> However the merge block is not an exit and is not visited by the update code.
> 
> This patch fixes it by checking during moving if there are any out of loop uses
> of the vDEF that is the last_seen_vuse.  Normally there wouldn't be any and
> things are skipped, but if there is then update that to the last vDEF in the
> exit block.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and
> x86_64-pc-linux-gnu no issues.
> 
> Ok for master?

I think the problem is rather that BB13 misses the LC virtual PHI which
it requires because of the merge block.  So this should be fixed during
peeling instead.  There we assume the IV exit we choose will have
a virtual LC PHI if the loop needs one (but we choose the exit going
to unreachable () as IV exit ...).  This assumption breaks down in this
case.

Let me try to fix this.

Richard.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> 	PR tree-optimizations/114068
> 	* tree-vect-loop.cc (move_early_exit_stmts): Update vUSE chain in merge
> 	block.
> 
> gcc/testsuite/ChangeLog:
> 
> 	PR tree-optimizations/114068
> 	* gcc.dg/vect/vect-early-break_118-pr114068.c: New test.
> 	* gcc.dg/vect/vect-early-break_119-pr114068.c: New test.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_118-pr114068.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_118-pr114068.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..b462a464b6603e718c5a283513ea586fc13e37ce
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_118-pr114068.c
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-add-options vect_early_break } */
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-additional-options "-O3" } */
> +
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +
> +struct h {
> +  int b;
> +  int f;
> +} k;
> +
> +void n(int m) {
> +  struct h a = k;
> +  for (int o = m; o; ++o) {
> +    if (a.f)
> +      __builtin_unreachable();
> +    if (o > 1)
> +      __builtin_unreachable();
> +    *(&k.b + o) = 1;
> +  }
> +}
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_119-pr114068.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_119-pr114068.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..a65ef7b8c4901b2ada585f38fda436dc07d1e1de
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_119-pr114068.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile } */
> +/* { dg-add-options vect_early_break } */
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-additional-options "-O3" } */
> +
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +
> +struct h {
> +  int b;
> +  int c;
> +  int f;
> +} k;
> +
> +void n(int m) {
> +  struct h a = k;
> +  for (int o = m; o; ++o) {
> +    if (a.f)
> +      __builtin_unreachable();
> +    if (o > 1)
> +      __builtin_unreachable();
> +    *(&k.b + o) = 1;
> +    *(&k.c + o*m) = 2;
> +  }
> +}
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 35f1f8c7d4245135ace7ffff40ff9be548919587..44bd8032b55b1ef84fdf4fa9d6117304b7709d6f 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -11837,6 +11837,27 @@ move_early_exit_stmts (loop_vec_info loop_vinfo)
>        update_stmt (p);
>      }
>  
> +  /* last_seen_vuse should now be the PHI in the loop header.  Check for
> +     any out of loop uses and update them to the vUSE on the loop latch.  */
> +  auto vuse_stmt =  loop_vinfo->lookup_def (last_seen_vuse);
> +  gphi *vuse_def;
> +  if (vuse_stmt
> +      && (vuse_def = dyn_cast <gphi *> (STMT_VINFO_STMT (vuse_stmt))))
> +    {
> +      imm_use_iterator iter;
> +      use_operand_p use_p;
> +      gimple *use_stmt;
> +      auto loop = LOOP_VINFO_LOOP (loop_vinfo);
> +      tree vuse = PHI_ARG_DEF_FROM_EDGE (vuse_def, loop_latch_edge (loop));
> +      FOR_EACH_IMM_USE_STMT (use_stmt, iter, last_seen_vuse)
> +	{
> +	  if (flow_bb_inside_loop_p (loop, use_stmt->bb))
> +	    continue;
> +	  FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
> +	    SET_USE (use_p, vuse);
> +	}
> +    }
> +
>    /* And update the LC PHIs on exits.  */
>    for (edge e : get_loop_exit_edges (LOOP_VINFO_LOOP  (loop_vinfo)))
>      if (!dominated_by_p (CDI_DOMINATORS, e->src, dest_bb))
> 
> 
> 
> 
>
  

Patch

--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_118-pr114068.c
@@ -0,0 +1,23 @@ 
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3" } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+struct h {
+  int b;
+  int f;
+} k;
+
+void n(int m) {
+  struct h a = k;
+  for (int o = m; o; ++o) {
+    if (a.f)
+      __builtin_unreachable();
+    if (o > 1)
+      __builtin_unreachable();
+    *(&k.b + o) = 1;
+  }
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_119-pr114068.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_119-pr114068.c
new file mode 100644
index 0000000000000000000000000000000000000000..a65ef7b8c4901b2ada585f38fda436dc07d1e1de
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_119-pr114068.c
@@ -0,0 +1,25 @@ 
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3" } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+struct h {
+  int b;
+  int c;
+  int f;
+} k;
+
+void n(int m) {
+  struct h a = k;
+  for (int o = m; o; ++o) {
+    if (a.f)
+      __builtin_unreachable();
+    if (o > 1)
+      __builtin_unreachable();
+    *(&k.b + o) = 1;
+    *(&k.c + o*m) = 2;
+  }
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 35f1f8c7d4245135ace7ffff40ff9be548919587..44bd8032b55b1ef84fdf4fa9d6117304b7709d6f 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -11837,6 +11837,27 @@  move_early_exit_stmts (loop_vec_info loop_vinfo)
       update_stmt (p);
     }
 
+  /* last_seen_vuse should now be the PHI in the loop header.  Check for
+     any out of loop uses and update them to the vUSE on the loop latch.  */
+  auto vuse_stmt =  loop_vinfo->lookup_def (last_seen_vuse);
+  gphi *vuse_def;
+  if (vuse_stmt
+      && (vuse_def = dyn_cast <gphi *> (STMT_VINFO_STMT (vuse_stmt))))
+    {
+      imm_use_iterator iter;
+      use_operand_p use_p;
+      gimple *use_stmt;
+      auto loop = LOOP_VINFO_LOOP (loop_vinfo);
+      tree vuse = PHI_ARG_DEF_FROM_EDGE (vuse_def, loop_latch_edge (loop));
+      FOR_EACH_IMM_USE_STMT (use_stmt, iter, last_seen_vuse)
+	{
+	  if (flow_bb_inside_loop_p (loop, use_stmt->bb))
+	    continue;
+	  FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
+	    SET_USE (use_p, vuse);
+	}
+    }
+
   /* And update the LC PHIs on exits.  */
   for (edge e : get_loop_exit_edges (LOOP_VINFO_LOOP  (loop_vinfo)))
     if (!dominated_by_p (CDI_DOMINATORS, e->src, dest_bb))