vect: Fix check_reduction_path [PR112374]

Message ID ZVdsZJor+tYlrQKm@tucnak
State New
Headers
Series vect: Fix check_reduction_path [PR112374] |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm fail Patch failed to apply
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 fail Patch failed to apply

Commit Message

Jakub Jelinek Nov. 17, 2023, 1:36 p.m. UTC
  Hi!

As mentioned in the PR, the intent of the r14-5076 changes was that
it doesn't count one of the uses on the use_stmt, but what actually
got implemented is that it does this processing on any op_use_stmt,
even if it is not the use_stmt statement, which means that it
can increase count even on debug stmts (-fcompare-debug failures),
or if there would be some other use stmt with 2+ uses it could count
that as a single use.  Though, because it fails whenever cnt != 1
and I believe use_stmt must be one of the uses, it would probably
fail in the latter case anyway.

The following patch fixes that by doing this extra processing only when
op_use_stmt is use_stmt, and using the normal processing otherwise
(so ignore debug stmts, and increase on any uses on the stmt).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2023-11-17  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/112374
	* tree-vect-loop.cc (check_reduction_path): Perform the cond_fn_p
	special case only if op_use_stmt == use_stmt, use as_a rather than
	dyn_cast in that case.

	* gcc.dg/pr112374-1.c: New test.
	* gcc.dg/pr112374-2.c: New test.
	* g++.dg/opt/pr112374.C: New test.


	Jakub
  

Comments

Richard Biener Nov. 17, 2023, 1:44 p.m. UTC | #1
On Fri, 17 Nov 2023, Jakub Jelinek wrote:

> Hi!
> 
> As mentioned in the PR, the intent of the r14-5076 changes was that
> it doesn't count one of the uses on the use_stmt, but what actually
> got implemented is that it does this processing on any op_use_stmt,
> even if it is not the use_stmt statement, which means that it
> can increase count even on debug stmts (-fcompare-debug failures),
> or if there would be some other use stmt with 2+ uses it could count
> that as a single use.  Though, because it fails whenever cnt != 1
> and I believe use_stmt must be one of the uses, it would probably
> fail in the latter case anyway.
> 
> The following patch fixes that by doing this extra processing only when
> op_use_stmt is use_stmt, and using the normal processing otherwise
> (so ignore debug stmts, and increase on any uses on the stmt).
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Richard.

> 2023-11-17  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/112374
> 	* tree-vect-loop.cc (check_reduction_path): Perform the cond_fn_p
> 	special case only if op_use_stmt == use_stmt, use as_a rather than
> 	dyn_cast in that case.
> 
> 	* gcc.dg/pr112374-1.c: New test.
> 	* gcc.dg/pr112374-2.c: New test.
> 	* g++.dg/opt/pr112374.C: New test.
> 
> --- gcc/tree-vect-loop.cc.jj	2023-11-14 10:35:52.000000000 +0100
> +++ gcc/tree-vect-loop.cc	2023-11-15 22:42:32.782007408 +0100
> @@ -4105,9 +4105,9 @@ pop:
>  	/* In case of a COND_OP (mask, op1, op2, op1) reduction we might have
>  	   op1 twice (once as definition, once as else) in the same operation.
>  	   Allow this.  */
> -	  if (cond_fn_p)
> +	  if (cond_fn_p && op_use_stmt == use_stmt)
>  	    {
> -	      gcall *call = dyn_cast<gcall *> (use_stmt);
> +	      gcall *call = as_a<gcall *> (use_stmt);
>  	      unsigned else_pos
>  		= internal_fn_else_index (internal_fn (op.code));
>  
> --- gcc/testsuite/gcc.dg/pr112374-1.c.jj	2023-11-16 10:27:12.064849600 +0100
> +++ gcc/testsuite/gcc.dg/pr112374-1.c	2023-11-16 10:23:57.949586509 +0100
> @@ -0,0 +1,20 @@
> +/* PR tree-optimization/112374 */
> +/* { dg-do compile } */
> +/* { dg-options "-fcompare-debug -gno-statement-frontiers -O2 -w" } */
> +/* { dg-additional-options "-march=skylake-avx512" { target i?86-*-* x86_64-*-* } } */
> +/* { dg-additional-options "-march=armv9-a" { target aarch64*-*-* } } */
> +
> +void foo (int, int);
> +struct S { char s[4]; };
> +int a, b, c;
> +
> +void
> +bar ()
> +{
> +  struct S d;
> +  long e = 0;
> +  for (c = 0; c < 4; ++c)
> +    e |= (d.s[c] ? 3 : 0) << c;
> +  if (e)
> +    foo (a, b);
> +}
> --- gcc/testsuite/gcc.dg/pr112374-2.c.jj	2023-11-16 10:27:15.341803394 +0100
> +++ gcc/testsuite/gcc.dg/pr112374-2.c	2023-11-16 10:24:11.705392556 +0100
> @@ -0,0 +1,33 @@
> +/* PR tree-optimization/112374 */
> +/* { dg-do compile } */
> +/* { dg-options "-fcompare-debug -gno-statement-frontiers -O2" } */
> +/* { dg-additional-options "-march=skylake-avx512" { target i?86-*-* x86_64-*-* } } */
> +/* { dg-additional-options "-march=armv9-a" { target aarch64*-*-* } } */
> +
> +void foo (int, int);
> +struct S { char s[64]; } *p;
> +char a, b;
> +unsigned char c;
> +int d, e;
> +
> +void
> +bar (void)
> +{
> +  unsigned i;
> +  long j = 0;
> +  for (i = 0; i < b; ++i)
> +    j |= (p->s[i] ? 3 : 0) << i;
> +  if (p->s[i + 1])
> +  lab:
> +    for (;;)
> +      ;
> +  for (i = 0; i < 4; ++i)
> +    j |= p->s[i] << i;
> +  for (; i; i += 2)
> +    if (c + 1 != a)
> +      goto lab;
> +  for (; i < 8; ++i)
> +    j |= p->s[i] >= 6;
> +  if (j)
> +    foo (d, e);
> +}
> --- gcc/testsuite/g++.dg/opt/pr112374.C.jj	2023-11-16 10:27:52.626277708 +0100
> +++ gcc/testsuite/g++.dg/opt/pr112374.C	2023-11-16 10:28:04.914104455 +0100
> @@ -0,0 +1,24 @@
> +// PR tree-optimization/112374
> +// { dg-do compile { target c++11 } }
> +// { dg-options "-fcompare-debug -gno-statement-frontiers -O2" }
> +// { dg-additional-options "-march=skylake-avx512" { target i?86-*-* x86_64-*-* } }
> +// { dg-additional-options "-march=armv9-a" { target aarch64*-*-* } }
> +
> +struct t
> +{
> +  long coef[1];
> +  t(const unsigned long &a) : coef{(long)a} {};
> +  t(const t &a);
> +};
> +extern void gen_int_mode(t, int);
> +struct expand_vec_perm_d {
> +  unsigned char perm[64];
> +  int vmode;
> +  unsigned char nelt;
> +};
> +void expand_vec_perm_blend(struct expand_vec_perm_d *d) {
> +  unsigned long mask = 0;
> +  for (unsigned i = 0; i < 4; ++i)
> +    mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
> +  gen_int_mode(mask, 0);
> +}
> 
> 	Jakub
> 
>
  

Patch

--- gcc/tree-vect-loop.cc.jj	2023-11-14 10:35:52.000000000 +0100
+++ gcc/tree-vect-loop.cc	2023-11-15 22:42:32.782007408 +0100
@@ -4105,9 +4105,9 @@  pop:
 	/* In case of a COND_OP (mask, op1, op2, op1) reduction we might have
 	   op1 twice (once as definition, once as else) in the same operation.
 	   Allow this.  */
-	  if (cond_fn_p)
+	  if (cond_fn_p && op_use_stmt == use_stmt)
 	    {
-	      gcall *call = dyn_cast<gcall *> (use_stmt);
+	      gcall *call = as_a<gcall *> (use_stmt);
 	      unsigned else_pos
 		= internal_fn_else_index (internal_fn (op.code));
 
--- gcc/testsuite/gcc.dg/pr112374-1.c.jj	2023-11-16 10:27:12.064849600 +0100
+++ gcc/testsuite/gcc.dg/pr112374-1.c	2023-11-16 10:23:57.949586509 +0100
@@ -0,0 +1,20 @@ 
+/* PR tree-optimization/112374 */
+/* { dg-do compile } */
+/* { dg-options "-fcompare-debug -gno-statement-frontiers -O2 -w" } */
+/* { dg-additional-options "-march=skylake-avx512" { target i?86-*-* x86_64-*-* } } */
+/* { dg-additional-options "-march=armv9-a" { target aarch64*-*-* } } */
+
+void foo (int, int);
+struct S { char s[4]; };
+int a, b, c;
+
+void
+bar ()
+{
+  struct S d;
+  long e = 0;
+  for (c = 0; c < 4; ++c)
+    e |= (d.s[c] ? 3 : 0) << c;
+  if (e)
+    foo (a, b);
+}
--- gcc/testsuite/gcc.dg/pr112374-2.c.jj	2023-11-16 10:27:15.341803394 +0100
+++ gcc/testsuite/gcc.dg/pr112374-2.c	2023-11-16 10:24:11.705392556 +0100
@@ -0,0 +1,33 @@ 
+/* PR tree-optimization/112374 */
+/* { dg-do compile } */
+/* { dg-options "-fcompare-debug -gno-statement-frontiers -O2" } */
+/* { dg-additional-options "-march=skylake-avx512" { target i?86-*-* x86_64-*-* } } */
+/* { dg-additional-options "-march=armv9-a" { target aarch64*-*-* } } */
+
+void foo (int, int);
+struct S { char s[64]; } *p;
+char a, b;
+unsigned char c;
+int d, e;
+
+void
+bar (void)
+{
+  unsigned i;
+  long j = 0;
+  for (i = 0; i < b; ++i)
+    j |= (p->s[i] ? 3 : 0) << i;
+  if (p->s[i + 1])
+  lab:
+    for (;;)
+      ;
+  for (i = 0; i < 4; ++i)
+    j |= p->s[i] << i;
+  for (; i; i += 2)
+    if (c + 1 != a)
+      goto lab;
+  for (; i < 8; ++i)
+    j |= p->s[i] >= 6;
+  if (j)
+    foo (d, e);
+}
--- gcc/testsuite/g++.dg/opt/pr112374.C.jj	2023-11-16 10:27:52.626277708 +0100
+++ gcc/testsuite/g++.dg/opt/pr112374.C	2023-11-16 10:28:04.914104455 +0100
@@ -0,0 +1,24 @@ 
+// PR tree-optimization/112374
+// { dg-do compile { target c++11 } }
+// { dg-options "-fcompare-debug -gno-statement-frontiers -O2" }
+// { dg-additional-options "-march=skylake-avx512" { target i?86-*-* x86_64-*-* } }
+// { dg-additional-options "-march=armv9-a" { target aarch64*-*-* } }
+
+struct t
+{
+  long coef[1];
+  t(const unsigned long &a) : coef{(long)a} {};
+  t(const t &a);
+};
+extern void gen_int_mode(t, int);
+struct expand_vec_perm_d {
+  unsigned char perm[64];
+  int vmode;
+  unsigned char nelt;
+};
+void expand_vec_perm_blend(struct expand_vec_perm_d *d) {
+  unsigned long mask = 0;
+  for (unsigned i = 0; i < 4; ++i)
+    mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
+  gen_int_mode(mask, 0);
+}