vect: Fix wrong code with pr108692.c on targets with only non-widening ABD [PR118727]
Checks
Context |
Check |
Description |
linaro-tcwg-bot/tcwg_gcc_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-arm |
success
|
Test passed
|
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 |
fail
|
Patch failed to apply
|
Commit Message
With things like
// signed char a_14, a_16;
a.0_4 = (unsigned char) a_14;
_5 = (int) a.0_4;
b.1_6 = (unsigned char) b_16;
_7 = (int) b.1_6;
c_17 = _5 - _7;
_8 = ABS_EXPR <c_17>;
r_18 = _8 + r_23;
An ABD pattern will be recognized for _8:
patt_31 = .ABD (a.0_4, b.1_6);
It's still correct. But then when the SAD pattern is recognized:
patt_29 = SAD_EXPR <a_14, b_16, r_23>;
This is not correct. This only happens for targets with both uabd and
sabd but not vec_widen_{s,u}abd, currently LoongArch is the only target
affected.
The problem is vect_look_through_possible_promotion will throw away a
series of conversions if the effect is equivalent to a sign change and a
promotion, but here the sign change is definitely relevant, and the
promotion is also relevant for "mixed sign" cases like
r += abs((unsigned int)(unsigned char) a - (signed int)(signed char) b
(we need to promote to HImode as the difference can exceed the range of
QImode).
If there were any redundant promotion, it should have been stripped in
vect_recog_abd_pattern (i.e. when patt_31 = .ABD (a.0_4, b.1_6) is
recognized) instead of in vect_recog_sad_pattern, or we'd have a
missed-optimization if the ABD output is not summerized. So anyway
vect_recog_sad_pattern is just not a proper location to call
vect_look_through_possible_promotion for the ABD inputs, remove the
calls to fix the issue.
gcc/ChangeLog:
PR tree-optimization/118727
* tree-vect-patterns.cc (vect_recog_sad_pattern): Don't call
vect_look_through_possible_promotion on ABD inputs.
gcc/testsuite/ChangeLog:
PR tree-optimization/118727
* gcc.dg/pr108692.c: Mention PR 118727 in the comment.
* gcc.dg/pr118727.c: New test case.
---
Bootstrapped and regtested on loongarch64-linux-gnu. Ok for trunk?
gcc/testsuite/gcc.dg/pr108692.c | 1 +
gcc/testsuite/gcc.dg/pr118727.c | 32 ++++++++++++++++++++++++++++++++
gcc/tree-vect-patterns.cc | 11 ++---------
3 files changed, 35 insertions(+), 9 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/pr118727.c
Comments
On Wed, 5 Feb 2025, Xi Ruoyao wrote:
> With things like
>
> // signed char a_14, a_16;
> a.0_4 = (unsigned char) a_14;
> _5 = (int) a.0_4;
> b.1_6 = (unsigned char) b_16;
> _7 = (int) b.1_6;
> c_17 = _5 - _7;
> _8 = ABS_EXPR <c_17>;
> r_18 = _8 + r_23;
>
> An ABD pattern will be recognized for _8:
>
> patt_31 = .ABD (a.0_4, b.1_6);
>
> It's still correct. But then when the SAD pattern is recognized:
>
> patt_29 = SAD_EXPR <a_14, b_16, r_23>;
>
> This is not correct. This only happens for targets with both uabd and
> sabd but not vec_widen_{s,u}abd, currently LoongArch is the only target
> affected.
>
> The problem is vect_look_through_possible_promotion will throw away a
> series of conversions if the effect is equivalent to a sign change and a
> promotion, but here the sign change is definitely relevant, and the
> promotion is also relevant for "mixed sign" cases like
> r += abs((unsigned int)(unsigned char) a - (signed int)(signed char) b
> (we need to promote to HImode as the difference can exceed the range of
> QImode).
>
> If there were any redundant promotion, it should have been stripped in
> vect_recog_abd_pattern (i.e. when patt_31 = .ABD (a.0_4, b.1_6) is
> recognized) instead of in vect_recog_sad_pattern, or we'd have a
> missed-optimization if the ABD output is not summerized. So anyway
> vect_recog_sad_pattern is just not a proper location to call
> vect_look_through_possible_promotion for the ABD inputs, remove the
> calls to fix the issue.
OK.
Thanks,
Richard.
> gcc/ChangeLog:
>
> PR tree-optimization/118727
> * tree-vect-patterns.cc (vect_recog_sad_pattern): Don't call
> vect_look_through_possible_promotion on ABD inputs.
>
> gcc/testsuite/ChangeLog:
>
> PR tree-optimization/118727
> * gcc.dg/pr108692.c: Mention PR 118727 in the comment.
> * gcc.dg/pr118727.c: New test case.
> ---
>
> Bootstrapped and regtested on loongarch64-linux-gnu. Ok for trunk?
>
> gcc/testsuite/gcc.dg/pr108692.c | 1 +
> gcc/testsuite/gcc.dg/pr118727.c | 32 ++++++++++++++++++++++++++++++++
> gcc/tree-vect-patterns.cc | 11 ++---------
> 3 files changed, 35 insertions(+), 9 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/pr118727.c
>
> diff --git a/gcc/testsuite/gcc.dg/pr108692.c b/gcc/testsuite/gcc.dg/pr108692.c
> index 13a27496ad9..eeee2203281 100644
> --- a/gcc/testsuite/gcc.dg/pr108692.c
> +++ b/gcc/testsuite/gcc.dg/pr108692.c
> @@ -1,4 +1,5 @@
> /* PR tree-optimization/108692 */
> +/* PR tree-optimization/118727 */
> /* { dg-do run } */
> /* { dg-options "-O2 -ftree-vectorize" } */
>
> diff --git a/gcc/testsuite/gcc.dg/pr118727.c b/gcc/testsuite/gcc.dg/pr118727.c
> new file mode 100644
> index 00000000000..2ee5fa78236
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr118727.c
> @@ -0,0 +1,32 @@
> +/* PR tree-optimization/118727 */
> +/* { dg-do run } */
> +/* { dg-options "-O2 -ftree-vectorize" } */
> +
> +__attribute__((noipa)) int
> +foo (signed char *x, signed char *y, int n)
> +{
> + int i, r = 0;
> + signed char a, b;
> + for (i = 0; i < n; i++)
> + {
> + a = x[i];
> + b = y[i];
> + /* Slightly twisted from pr108692.c. */
> + int c = (unsigned int)(unsigned char) a - (signed int)(signed char) b;
> + r = r + (c < 0 ? -c : c);
> + }
> + return r;
> +}
> +
> +int
> +main ()
> +{
> + signed char x[64] = {}, y[64] = {};
> + if (__CHAR_BIT__ != 8 || __SIZEOF_INT__ != 4)
> + return 0;
> + x[32] = -1;
> + y[32] = -128;
> + if (foo (x, y, 64) != 383)
> + __builtin_abort ();
> + return 0;
> +}
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index 5aebf950548..6fc97d1b6ef 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -1405,15 +1405,8 @@ vect_recog_sad_pattern (vec_info *vinfo,
> tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
> tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
>
> - if (gimple_call_internal_fn (abd_stmt) == IFN_ABD)
> - {
> - if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0,
> - &unprom[0])
> - || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
> - &unprom[1]))
> - return NULL;
> - }
> - else if (gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
> + if (gimple_call_internal_fn (abd_stmt) == IFN_ABD
> + || gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
> {
> unprom[0].op = abd_oprnd0;
> unprom[0].type = TREE_TYPE (abd_oprnd0);
>
@@ -1,4 +1,5 @@
/* PR tree-optimization/108692 */
+/* PR tree-optimization/118727 */
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize" } */
new file mode 100644
@@ -0,0 +1,32 @@
+/* PR tree-optimization/118727 */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+__attribute__((noipa)) int
+foo (signed char *x, signed char *y, int n)
+{
+ int i, r = 0;
+ signed char a, b;
+ for (i = 0; i < n; i++)
+ {
+ a = x[i];
+ b = y[i];
+ /* Slightly twisted from pr108692.c. */
+ int c = (unsigned int)(unsigned char) a - (signed int)(signed char) b;
+ r = r + (c < 0 ? -c : c);
+ }
+ return r;
+}
+
+int
+main ()
+{
+ signed char x[64] = {}, y[64] = {};
+ if (__CHAR_BIT__ != 8 || __SIZEOF_INT__ != 4)
+ return 0;
+ x[32] = -1;
+ y[32] = -128;
+ if (foo (x, y, 64) != 383)
+ __builtin_abort ();
+ return 0;
+}
@@ -1405,15 +1405,8 @@ vect_recog_sad_pattern (vec_info *vinfo,
tree abd_oprnd0 = gimple_call_arg (abd_stmt, 0);
tree abd_oprnd1 = gimple_call_arg (abd_stmt, 1);
- if (gimple_call_internal_fn (abd_stmt) == IFN_ABD)
- {
- if (!vect_look_through_possible_promotion (vinfo, abd_oprnd0,
- &unprom[0])
- || !vect_look_through_possible_promotion (vinfo, abd_oprnd1,
- &unprom[1]))
- return NULL;
- }
- else if (gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
+ if (gimple_call_internal_fn (abd_stmt) == IFN_ABD
+ || gimple_call_internal_fn (abd_stmt) == IFN_VEC_WIDEN_ABD)
{
unprom[0].op = abd_oprnd0;
unprom[0].type = TREE_TYPE (abd_oprnd0);