tree-optimization/104445 - check for vector extraction support
Commit Message
This adds a missing check to epilogue reduction re-use, namely
that we can do hi/lo extracts from the vector when demoting it
to the epilogue vector size.
I've chosen to add a can_vec_extract helper to optabs-query.h,
in the future we might want to simplify the vectorizers life by
handling vector-from-vector extraction via BIT_FIELD_REFs during
RTL expansion via the mode punning when the vec_extract is not
directly supported.
I'm not 100% sure we can always do the punning of the
vec_extract result to a vector mode of the same size, but then
I'm also not sure how to check for that (the vectorizer doesn't
in other places it does that at the moment, but I suppose we
eventually just go through memory there)?
Bootstrap and regtest running on x86_64-unknown-linux-gnu.
Does this look OK?
Thanks,
Richard.
2022-02-09 Richard Biener <rguenther@suse.de>
PR tree-optimization/104445
PR tree-optimization/102832
* optabs-query.h (can_vec_extract): New.
* optabs-query.cc (can_vec_extract): Likewise.
* tree-vect-loop.cc (vect_find_reusable_accumulator): Check
we can extract a hi/lo part from the larger vector.
* gcc.dg/vect/pr104445.c: New testcase.
---
gcc/optabs-query.cc | 33 ++++++++++++++++++++++++++++
gcc/optabs-query.h | 1 +
gcc/testsuite/gcc.dg/vect/pr102832.c | 12 ++++++++++
gcc/testsuite/gcc.dg/vect/pr104445.c | 16 ++++++++++++++
gcc/tree-vect-loop.cc | 8 +++++--
5 files changed, 68 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/pr102832.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr104445.c
Comments
Richard Biener <rguenther@suse.de> writes:
> This adds a missing check to epilogue reduction re-use, namely
> that we can do hi/lo extracts from the vector when demoting it
> to the epilogue vector size.
>
> I've chosen to add a can_vec_extract helper to optabs-query.h,
> in the future we might want to simplify the vectorizers life by
> handling vector-from-vector extraction via BIT_FIELD_REFs during
> RTL expansion via the mode punning when the vec_extract is not
> directly supported.
>
> I'm not 100% sure we can always do the punning of the
> vec_extract result to a vector mode of the same size, but then
> I'm also not sure how to check for that (the vectorizer doesn't
> in other places it does that at the moment, but I suppose we
> eventually just go through memory there)?
>
> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
>
> Does this look OK?
LGTM. I guess some of the existing optab checks could be simplified
using the new helper, but that's a separate clean-up.
Thanks,
Richard
>
> Thanks,
> Richard.
>
> 2022-02-09 Richard Biener <rguenther@suse.de>
>
> PR tree-optimization/104445
> PR tree-optimization/102832
> * optabs-query.h (can_vec_extract): New.
> * optabs-query.cc (can_vec_extract): Likewise.
> * tree-vect-loop.cc (vect_find_reusable_accumulator): Check
> we can extract a hi/lo part from the larger vector.
>
> * gcc.dg/vect/pr104445.c: New testcase.
> ---
> gcc/optabs-query.cc | 33 ++++++++++++++++++++++++++++
> gcc/optabs-query.h | 1 +
> gcc/testsuite/gcc.dg/vect/pr102832.c | 12 ++++++++++
> gcc/testsuite/gcc.dg/vect/pr104445.c | 16 ++++++++++++++
> gcc/tree-vect-loop.cc | 8 +++++--
> 5 files changed, 68 insertions(+), 2 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/vect/pr102832.c
> create mode 100644 gcc/testsuite/gcc.dg/vect/pr104445.c
>
> diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
> index 2ce8d74db16..fa88b4bede0 100644
> --- a/gcc/optabs-query.cc
> +++ b/gcc/optabs-query.cc
> @@ -763,3 +763,36 @@ supports_vec_scatter_store_p (machine_mode mode)
> return this_fn_optabs->supports_vec_scatter_store[mode] > 0;
> }
>
> +/* Whether we can extract part of the vector mode MODE as
> + (scalar or vector) mode EXTR_MODE. */
> +
> +bool
> +can_vec_extract (machine_mode mode, machine_mode extr_mode)
> +{
> + if (!VECTOR_MODE_P (mode))
> + return false;
> +
> + unsigned m;
> + if (!constant_multiple_p (GET_MODE_SIZE (mode),
> + GET_MODE_SIZE (extr_mode), &m))
> + return false;
> +
> + if (convert_optab_handler (vec_extract_optab, mode, extr_mode)
> + != CODE_FOR_nothing)
> + return true;
> +
> + if (!VECTOR_MODE_P (extr_mode))
> + return false;
> +
> + /* Besides a direct vec_extract we can also use an element extract from
> + an integer vector mode with elements of the size of the extr_mode. */
> + scalar_int_mode imode;
> + machine_mode vmode;
> + if (!int_mode_for_size (GET_MODE_SIZE (extr_mode), 0).exists (&imode)
> + || !related_vector_mode (mode, imode, m).exists (&vmode)
> + || (convert_optab_handler (vec_extract_optab, vmode, imode)
> + == CODE_FOR_nothing))
> + return false;
> + /* We assume we can pun mode to vmode and imode to extr_mode. */
> + return true;
> +}
> diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h
> index 8b768c1797d..b9c9fd6f64d 100644
> --- a/gcc/optabs-query.h
> +++ b/gcc/optabs-query.h
> @@ -195,6 +195,7 @@ bool can_atomic_load_p (machine_mode);
> bool lshift_cheap_p (bool);
> bool supports_vec_gather_load_p (machine_mode = E_VOIDmode);
> bool supports_vec_scatter_store_p (machine_mode = E_VOIDmode);
> +bool can_vec_extract (machine_mode, machine_mode);
>
> /* Version of find_widening_optab_handler_and_mode that operates on
> specific mode types. */
> diff --git a/gcc/testsuite/gcc.dg/vect/pr102832.c b/gcc/testsuite/gcc.dg/vect/pr102832.c
> new file mode 100644
> index 00000000000..7cb4db5e4c7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr102832.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O3" } */
> +/* { dg-additional-options "-march=armv8.2-a+sve -msve-vector-bits=128" { target aarch64-*-* } } */
> +
> +int a, b;
> +char c;
> +signed char d(int e, int f) { return e - f; }
> +void g() {
> + a = 0;
> + for (; a >= -17; a = d(a, 1))
> + c ^= b;
> +}
> diff --git a/gcc/testsuite/gcc.dg/vect/pr104445.c b/gcc/testsuite/gcc.dg/vect/pr104445.c
> new file mode 100644
> index 00000000000..8ec3b3b0f1e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr104445.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O3" } */
> +/* { dg-additional-options "-mavx -mno-mmx" { target x86_64-*-* i?86-*-* } } */
> +
> +signed char a;
> +signed char f (int i, int j)
> +{
> + signed char c;
> + while (i != 0)
> + {
> + a ^= j;
> + ++c;
> + ++i;
> + }
> + return c;
> +}
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 4860bfd3344..9916ae46460 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -4997,7 +4997,8 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo,
> if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (old_vectype),
> TYPE_VECTOR_SUBPARTS (vectype), &m))
> return false;
> - /* Check the intermediate vector types are available. */
> + /* Check the intermediate vector types and operations are available. */
> + tree prev_vectype = old_vectype;
> while (m > 2)
> {
> m /= 2;
> @@ -5006,8 +5007,11 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo,
> exact_div (TYPE_VECTOR_SUBPARTS (old_vectype), m));
> if (!intermediate_vectype
> || !directly_supported_p (STMT_VINFO_REDUC_CODE (reduc_info),
> - intermediate_vectype))
> + intermediate_vectype)
> + || !can_vec_extract (TYPE_MODE (prev_vectype),
> + TYPE_MODE (intermediate_vectype)))
> return false;
> + prev_vectype = intermediate_vectype;
> }
>
> /* Non-SLP reductions might apply an adjustment after the reduction
On Wed, 9 Feb 2022, Richard Sandiford wrote:
> Richard Biener <rguenther@suse.de> writes:
> > This adds a missing check to epilogue reduction re-use, namely
> > that we can do hi/lo extracts from the vector when demoting it
> > to the epilogue vector size.
> >
> > I've chosen to add a can_vec_extract helper to optabs-query.h,
> > in the future we might want to simplify the vectorizers life by
> > handling vector-from-vector extraction via BIT_FIELD_REFs during
> > RTL expansion via the mode punning when the vec_extract is not
> > directly supported.
> >
> > I'm not 100% sure we can always do the punning of the
> > vec_extract result to a vector mode of the same size, but then
> > I'm also not sure how to check for that (the vectorizer doesn't
> > in other places it does that at the moment, but I suppose we
> > eventually just go through memory there)?
> >
> > Bootstrap and regtest running on x86_64-unknown-linux-gnu.
> >
> > Does this look OK?
>
> LGTM. I guess some of the existing optab checks could be simplified
> using the new helper, but that's a separate clean-up.
Indeed.
I did notice some errors in the patch when digging into whether
there's a bug in the i386 patterns as well though. I've now
verified we do proper can_vec_extract queries (got the order
wrong for multiple intermediate steps and asked for
vec_extractv8qiv16qi). Also I mixed up bitsize vs. size for
the int_mode_for_size query. The issue with x86 is that we
have no v8qi with -m32 -mno-mmx but instead we use DImode,
but we do have v4qi. I've checked we can handle v16qi -> DImode
extracts via vec_extract_v2didi but we have to punt for
DImode -> v4qi with this simple approach.
I'll think about those cases when we clean this up, possibly next
stage1.
Re-bootstrapping and testing the variant below, will push when
that succeeds.
Richard.
From e1374a647d4524734cad373a79fe9b863365c374 Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Wed, 9 Feb 2022 10:55:18 +0100
Subject: [PATCH] tree-optimization/104445 - check for vector extraction
support
To: gcc-patches@gcc.gnu.org
This adds a missing check to epilogue reduction re-use, namely
that we can do hi/lo extracts from the vector when demoting it
to the epilogue vector size.
I've chosen to add a can_vec_extract helper to optabs-query.h,
in the future we might want to simplify the vectorizers life by
handling vector-from-vector extraction via BIT_FIELD_REFs during
RTL expansion via the mode punning when the vec_extract is not
directly supported.
I'm not 100% sure we can always do the punning of the
vec_extract result to a vector mode of the same size, but then
I'm also not sure how to check for that (the vectorizer doesn't
in other places it does that at the moment, but I suppose we
eventually just go through memory there)?
2022-02-09 Richard Biener <rguenther@suse.de>
PR tree-optimization/104445
PR tree-optimization/102832
* optabs-query.h (can_vec_extract): New.
* optabs-query.cc (can_vec_extract): Likewise.
* tree-vect-loop.cc (vect_find_reusable_accumulator): Check
we can extract a hi/lo part from the larger vector, rework
check iteration from larger to smaller sizes.
* gcc.dg/vect/pr104445.c: New testcase.
---
gcc/optabs-query.cc | 28 ++++++++++++++++++++++++++++
gcc/optabs-query.h | 1 +
gcc/testsuite/gcc.dg/vect/pr102832.c | 12 ++++++++++++
gcc/testsuite/gcc.dg/vect/pr104445.c | 16 ++++++++++++++++
gcc/tree-vect-loop.cc | 16 ++++++++++------
5 files changed, 67 insertions(+), 6 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/pr102832.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr104445.c
diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
index 2ce8d74db16..713c098ba4e 100644
--- a/gcc/optabs-query.cc
+++ b/gcc/optabs-query.cc
@@ -763,3 +763,31 @@ supports_vec_scatter_store_p (machine_mode mode)
return this_fn_optabs->supports_vec_scatter_store[mode] > 0;
}
+/* Whether we can extract part of the vector mode MODE as
+ (scalar or vector) mode EXTR_MODE. */
+
+bool
+can_vec_extract (machine_mode mode, machine_mode extr_mode)
+{
+ unsigned m;
+ if (!VECTOR_MODE_P (mode)
+ || !constant_multiple_p (GET_MODE_SIZE (mode),
+ GET_MODE_SIZE (extr_mode), &m))
+ return false;
+
+ if (convert_optab_handler (vec_extract_optab, mode, extr_mode)
+ != CODE_FOR_nothing)
+ return true;
+
+ /* Besides a direct vec_extract we can also use an element extract from
+ an integer vector mode with elements of the size of the extr_mode. */
+ scalar_int_mode imode;
+ machine_mode vmode;
+ if (!int_mode_for_size (GET_MODE_BITSIZE (extr_mode), 0).exists (&imode)
+ || !related_vector_mode (mode, imode, m).exists (&vmode)
+ || (convert_optab_handler (vec_extract_optab, vmode, imode)
+ == CODE_FOR_nothing))
+ return false;
+ /* We assume we can pun mode to vmode and imode to extr_mode. */
+ return true;
+}
diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h
index 8b768c1797d..b9c9fd6f64d 100644
--- a/gcc/optabs-query.h
+++ b/gcc/optabs-query.h
@@ -195,6 +195,7 @@ bool can_atomic_load_p (machine_mode);
bool lshift_cheap_p (bool);
bool supports_vec_gather_load_p (machine_mode = E_VOIDmode);
bool supports_vec_scatter_store_p (machine_mode = E_VOIDmode);
+bool can_vec_extract (machine_mode, machine_mode);
/* Version of find_widening_optab_handler_and_mode that operates on
specific mode types. */
diff --git a/gcc/testsuite/gcc.dg/vect/pr102832.c b/gcc/testsuite/gcc.dg/vect/pr102832.c
new file mode 100644
index 00000000000..7cb4db5e4c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr102832.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+/* { dg-additional-options "-march=armv8.2-a+sve -msve-vector-bits=128" { target aarch64-*-* } } */
+
+int a, b;
+char c;
+signed char d(int e, int f) { return e - f; }
+void g() {
+ a = 0;
+ for (; a >= -17; a = d(a, 1))
+ c ^= b;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr104445.c b/gcc/testsuite/gcc.dg/vect/pr104445.c
new file mode 100644
index 00000000000..8ec3b3b0f1e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104445.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+/* { dg-additional-options "-mavx -mno-mmx" { target x86_64-*-* i?86-*-* } } */
+
+signed char a;
+signed char f (int i, int j)
+{
+ signed char c;
+ while (i != 0)
+ {
+ a ^= j;
+ ++c;
+ ++i;
+ }
+ return c;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 4860bfd3344..896218f23ea 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -4997,17 +4997,21 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo,
if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (old_vectype),
TYPE_VECTOR_SUBPARTS (vectype), &m))
return false;
- /* Check the intermediate vector types are available. */
- while (m > 2)
+ /* Check the intermediate vector types and operations are available. */
+ tree prev_vectype = old_vectype;
+ poly_uint64 intermediate_nunits = TYPE_VECTOR_SUBPARTS (old_vectype);
+ while (known_gt (intermediate_nunits, TYPE_VECTOR_SUBPARTS (vectype)))
{
- m /= 2;
+ intermediate_nunits = exact_div (intermediate_nunits, 2);
tree intermediate_vectype = get_related_vectype_for_scalar_type
- (TYPE_MODE (vectype), TREE_TYPE (vectype),
- exact_div (TYPE_VECTOR_SUBPARTS (old_vectype), m));
+ (TYPE_MODE (vectype), TREE_TYPE (vectype), intermediate_nunits);
if (!intermediate_vectype
|| !directly_supported_p (STMT_VINFO_REDUC_CODE (reduc_info),
- intermediate_vectype))
+ intermediate_vectype)
+ || !can_vec_extract (TYPE_MODE (prev_vectype),
+ TYPE_MODE (intermediate_vectype)))
return false;
+ prev_vectype = intermediate_vectype;
}
/* Non-SLP reductions might apply an adjustment after the reduction
@@ -763,3 +763,36 @@ supports_vec_scatter_store_p (machine_mode mode)
return this_fn_optabs->supports_vec_scatter_store[mode] > 0;
}
+/* Whether we can extract part of the vector mode MODE as
+ (scalar or vector) mode EXTR_MODE. */
+
+bool
+can_vec_extract (machine_mode mode, machine_mode extr_mode)
+{
+ if (!VECTOR_MODE_P (mode))
+ return false;
+
+ unsigned m;
+ if (!constant_multiple_p (GET_MODE_SIZE (mode),
+ GET_MODE_SIZE (extr_mode), &m))
+ return false;
+
+ if (convert_optab_handler (vec_extract_optab, mode, extr_mode)
+ != CODE_FOR_nothing)
+ return true;
+
+ if (!VECTOR_MODE_P (extr_mode))
+ return false;
+
+ /* Besides a direct vec_extract we can also use an element extract from
+ an integer vector mode with elements of the size of the extr_mode. */
+ scalar_int_mode imode;
+ machine_mode vmode;
+ if (!int_mode_for_size (GET_MODE_SIZE (extr_mode), 0).exists (&imode)
+ || !related_vector_mode (mode, imode, m).exists (&vmode)
+ || (convert_optab_handler (vec_extract_optab, vmode, imode)
+ == CODE_FOR_nothing))
+ return false;
+ /* We assume we can pun mode to vmode and imode to extr_mode. */
+ return true;
+}
@@ -195,6 +195,7 @@ bool can_atomic_load_p (machine_mode);
bool lshift_cheap_p (bool);
bool supports_vec_gather_load_p (machine_mode = E_VOIDmode);
bool supports_vec_scatter_store_p (machine_mode = E_VOIDmode);
+bool can_vec_extract (machine_mode, machine_mode);
/* Version of find_widening_optab_handler_and_mode that operates on
specific mode types. */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+/* { dg-additional-options "-march=armv8.2-a+sve -msve-vector-bits=128" { target aarch64-*-* } } */
+
+int a, b;
+char c;
+signed char d(int e, int f) { return e - f; }
+void g() {
+ a = 0;
+ for (; a >= -17; a = d(a, 1))
+ c ^= b;
+}
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+/* { dg-additional-options "-mavx -mno-mmx" { target x86_64-*-* i?86-*-* } } */
+
+signed char a;
+signed char f (int i, int j)
+{
+ signed char c;
+ while (i != 0)
+ {
+ a ^= j;
+ ++c;
+ ++i;
+ }
+ return c;
+}
@@ -4997,7 +4997,8 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo,
if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (old_vectype),
TYPE_VECTOR_SUBPARTS (vectype), &m))
return false;
- /* Check the intermediate vector types are available. */
+ /* Check the intermediate vector types and operations are available. */
+ tree prev_vectype = old_vectype;
while (m > 2)
{
m /= 2;
@@ -5006,8 +5007,11 @@ vect_find_reusable_accumulator (loop_vec_info loop_vinfo,
exact_div (TYPE_VECTOR_SUBPARTS (old_vectype), m));
if (!intermediate_vectype
|| !directly_supported_p (STMT_VINFO_REDUC_CODE (reduc_info),
- intermediate_vectype))
+ intermediate_vectype)
+ || !can_vec_extract (TYPE_MODE (prev_vectype),
+ TYPE_MODE (intermediate_vectype)))
return false;
+ prev_vectype = intermediate_vectype;
}
/* Non-SLP reductions might apply an adjustment after the reduction