Move negative stride bias out of dr_misalignment
Commit Message
This moves applying of a bias for negative stride accesses out of
dr_misalignment in favor of a more general optional offset argument.
The negative bias is now computed by get_load_store_type and applied
accordingly to determine the alignment support scheme. Likewise
the peeling/versioning code is adjusted albeit that still assumes
we'll end up with VMAT_CONTIGUOUS_DOWN or VMAT_CONTIGUOUS_REVERSE
but at least when not so (VMAT_STRIDED_SLP is one possibility) then
get_load_store_type will _not_ falsely report an aligned access but
instead an access with known misalignment.
This fixes PR96109.
Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
Richard.
2021-10-25 Richard Biener <rguenther@suse.de>
PR tree-optimization/96109
* tree-vectorizer.h (dr_misalignment): Add optional offset
parameter.
* tree-vect-data-refs.c (dr_misalignment): Likewise. Remove
offset applied for negative stride accesses.
(vect_enhance_data_refs_alignment): Compute negative stride
access offset and pass it to dr_misalignment.
* tree-vect-stmts.c (get_negative_load_store_type): Pass
negative offset to dr_misalignment.
(get_group_load_store_type): Likewise.
(get_load_store_type): Likewise.
(vectorizable_store): Remove asserts about alignment.
(vectorizable_load): Likewise.
---
gcc/tree-vect-data-refs.c | 58 ++++++++++++++++++++++++++-------------
gcc/tree-vect-stmts.c | 19 +++++--------
gcc/tree-vectorizer.h | 3 +-
3 files changed, 48 insertions(+), 32 deletions(-)
Comments
On 26 October 2021 11:19:44 CEST, Richard Biener via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
>@@ -2010,6 +2010,7 @@ get_negative_load_store_type (vec_info *vinfo,
> if (dump_enabled_p ())
> dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> "negative step but alignment required.\n");
>+ *poffset = 0;
> return VMAT_ELEMENTWISE;
> *poffset = 0;
> }
I think you cannot really diagnose these, they would trigger a lot, in each early return, no?
Or would we see that there are unreachable, non artificial stmts in the same block after a return?
Somebody could experiment with diagnosing each and every DCEd stmt (!artificial, nondebug) but I would expect that hell breaks loose..
cheers,
On Tue, 26 Oct 2021, Bernhard Reutner-Fischer wrote:
> On 26 October 2021 11:19:44 CEST, Richard Biener via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
>
>
>
> >@@ -2010,6 +2010,7 @@ get_negative_load_store_type (vec_info *vinfo,
> > if (dump_enabled_p ())
> > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > "negative step but alignment required.\n");
> >+ *poffset = 0;
> > return VMAT_ELEMENTWISE;
> > *poffset = 0;
> > }
>
> I think you cannot really diagnose these, they would trigger a lot, in each early return, no?
> Or would we see that there are unreachable, non artificial stmts in the same block after a return?
> Somebody could experiment with diagnosing each and every DCEd stmt (!artificial, nondebug) but I would expect that hell breaks loose..
> cheers,
[I fixed the issue before pushing this patch btw]
I agree in general, where I would diagnose this is when we build the
CFG I would diagnose unreachable blocks - the above does not have
any path to the second *poffset store. Like with the prototype patch
below we warn for
int *p;
int foo()
{
return 0;
*p = 0;
}
> ./cc1 -quiet t.c
t.c: In function 'foo':
t.c:5:3: warning: statement is not reachable
5 | *p = 0;
| ^~
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index b3a27bcd17c..2de661ba35d 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -374,6 +374,22 @@ execute_build_cfg (void)
fprintf (dump_file, "Scope blocks:\n");
dump_scope_blocks (dump_file, dump_flags);
}
+
+ find_unreachable_blocks ();
+ basic_block bb;
+ FOR_EACH_BB_FN (bb, cfun)
+ if (!(bb->flags & BB_REACHABLE))
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p
(gsi);
+ gsi_next (&gsi))
+ {
+ if (gimple_location (gsi_stmt (gsi)) != UNKNOWN_LOCATION)
+ warning_at (gimple_location (gsi_stmt (gsi)), 0,
+ "statement is not reachable");
+ /* ??? Mark blocks reachable from here. And even better make
+ sure to process entries to unreachable regions first. */
+ break;
+ }
+
cleanup_tree_cfg ();
bb_to_omp_idx.release ();
On Tue, 26 Oct 2021 14:10:05 +0200 (CEST)
Richard Biener <rguenther@suse.de> wrote:
> I agree in general, where I would diagnose this is when we build the
> CFG I would diagnose unreachable blocks - the above does not have
> any path to the second *poffset store. Like with the prototype patch
> below we warn for
>
> int *p;
> int foo()
> {
> return 0;
> *p = 0;
> }
mhm. I thought we have -Wunreachable-code but it seems that's gone
thanks,
@@ -887,10 +887,11 @@ vect_slp_analyze_instance_dependence (vec_info *vinfo, slp_instance instance)
return res;
}
-/* Return the misalignment of DR_INFO accessed in VECTYPE. */
+/* Return the misalignment of DR_INFO accessed in VECTYPE with OFFSET
+ applied. */
int
-dr_misalignment (dr_vec_info *dr_info, tree vectype)
+dr_misalignment (dr_vec_info *dr_info, tree vectype, poly_int64 offset)
{
HOST_WIDE_INT diff = 0;
/* Alignment is only analyzed for the first element of a DR group,
@@ -919,13 +920,9 @@ dr_misalignment (dr_vec_info *dr_info, tree vectype)
targetm.vectorize.preferred_vector_alignment (vectype)))
return DR_MISALIGNMENT_UNKNOWN;
- /* If this is a backward running DR then first access in the larger
- vectype actually is N-1 elements before the address in the DR.
- Adjust misalign accordingly. */
- poly_int64 misalignment = misalign + diff;
- if (tree_int_cst_sgn (DR_STEP (dr_info->dr)) < 0)
- misalignment += ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
- * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
+ /* Apply the offset from the DR group start and the externally supplied
+ offset which can for example result from a negative stride access. */
+ poly_int64 misalignment = misalign + diff + offset;
/* vect_compute_data_ref_alignment will have ensured that target_alignment
is constant and otherwise set misalign to DR_MISALIGNMENT_UNKNOWN. */
@@ -1549,8 +1546,15 @@ vect_get_peeling_costs_all_drs (loop_vec_info loop_vinfo,
int misalignment;
unsigned HOST_WIDE_INT alignment;
+ bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
+ size_zero_node) < 0;
+ poly_int64 off = 0;
+ if (negative)
+ off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
+ * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
+
if (npeel == 0)
- misalignment = dr_misalignment (dr_info, vectype);
+ misalignment = dr_misalignment (dr_info, vectype, off);
else if (dr_info == dr0_info
|| vect_dr_aligned_if_peeled_dr_is (dr_info, dr0_info))
misalignment = 0;
@@ -1560,7 +1564,7 @@ vect_get_peeling_costs_all_drs (loop_vec_info loop_vinfo,
misalignment = DR_MISALIGNMENT_UNKNOWN;
else
{
- misalignment = dr_misalignment (dr_info, vectype);
+ misalignment = dr_misalignment (dr_info, vectype, off);
misalignment += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
misalignment &= alignment - 1;
}
@@ -1960,8 +1964,11 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
*/
unsigned int target_align =
DR_TARGET_ALIGNMENT (dr_info).to_constant ();
- unsigned int dr_size = vect_get_scalar_dr_size (dr_info);
- unsigned int mis = dr_misalignment (dr_info, vectype);
+ unsigned HOST_WIDE_INT dr_size = vect_get_scalar_dr_size (dr_info);
+ poly_int64 off = 0;
+ if (negative)
+ off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size;
+ unsigned int mis = dr_misalignment (dr_info, vectype, off);
mis = negative ? mis : -mis;
if (mis != 0)
npeel_tmp = (mis & (target_align - 1)) / dr_size;
@@ -2238,8 +2245,13 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
updating DR_MISALIGNMENT values. The peeling factor is the
vectorization factor minus the misalignment as an element
count. */
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ poly_int64 off = 0;
+ if (negative)
+ off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
+ * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
unsigned int mis
- = dr_misalignment (dr0_info, STMT_VINFO_VECTYPE (stmt_info));
+ = dr_misalignment (dr0_info, vectype, off);
mis = negative ? mis : -mis;
/* If known_alignment_for_access_p then we have set
DR_MISALIGNMENT which is only done if we know it at compiler
@@ -2383,19 +2395,27 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
FOR_EACH_VEC_ELT (datarefs, i, dr)
{
dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
- stmt_vec_info stmt_info = dr_info->stmt;
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
- int misalignment;
- if ((misalignment = dr_misalignment (dr_info, vectype)) == 0
- || !vect_relevant_for_alignment_p (dr_info))
+ if (!vect_relevant_for_alignment_p (dr_info))
continue;
+ stmt_vec_info stmt_info = dr_info->stmt;
if (STMT_VINFO_STRIDED_P (stmt_info))
{
do_versioning = false;
break;
}
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ bool negative = tree_int_cst_compare (DR_STEP (dr),
+ size_zero_node) < 0;
+ poly_int64 off = 0;
+ if (negative)
+ off = ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
+ * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
+ int misalignment;
+ if ((misalignment = dr_misalignment (dr_info, vectype, off)) == 0)
+ continue;
+
enum dr_alignment_support supportable_dr_alignment
= vect_supportable_dr_alignment (loop_vinfo, dr_info, vectype,
misalignment);
@@ -2001,7 +2001,7 @@ get_negative_load_store_type (vec_info *vinfo,
*poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
* TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
- int misalignment = dr_misalignment (dr_info, vectype);
+ int misalignment = dr_misalignment (dr_info, vectype, *poffset);
alignment_support_scheme
= vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
if (alignment_support_scheme != dr_aligned
@@ -2010,6 +2010,7 @@ get_negative_load_store_type (vec_info *vinfo,
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"negative step but alignment required.\n");
+ *poffset = 0;
return VMAT_ELEMENTWISE;
*poffset = 0;
}
@@ -2320,7 +2321,7 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
}
else
{
- *misalignment = dr_misalignment (first_dr_info, vectype);
+ *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
*alignment_support_scheme
= vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
*misalignment);
@@ -2470,7 +2471,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
else
*memory_access_type = VMAT_CONTIGUOUS;
*misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
- vectype);
+ vectype, *poffset);
*alignment_support_scheme
= vect_supportable_dr_alignment (vinfo,
STMT_VINFO_DR_INFO (stmt_info),
@@ -8252,10 +8253,7 @@ vectorizable_store (vec_info *vinfo,
align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
if (alignment_support_scheme == dr_aligned)
- {
- gcc_assert (aligned_access_p (first_dr_info, vectype));
- misalign = 0;
- }
+ misalign = 0;
else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
{
align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
@@ -8340,7 +8338,7 @@ vectorizable_store (vec_info *vinfo,
? dataref_offset
: build_int_cst (ref_type, 0));
if (alignment_support_scheme == dr_aligned)
- gcc_assert (aligned_access_p (first_dr_info, vectype));
+ ;
else
TREE_TYPE (data_ref)
= build_aligned_type (TREE_TYPE (data_ref),
@@ -9589,10 +9587,7 @@ vectorizable_load (vec_info *vinfo,
align =
known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
if (alignment_support_scheme == dr_aligned)
- {
- gcc_assert (aligned_access_p (first_dr_info, vectype));
- misalign = 0;
- }
+ misalign = 0;
else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
{
align = dr_alignment
@@ -1606,7 +1606,8 @@ set_dr_misalignment (dr_vec_info *dr_info, int val)
dr_info->misalignment = val;
}
-extern int dr_misalignment (dr_vec_info *dr_info, tree vectype);
+extern int dr_misalignment (dr_vec_info *dr_info, tree vectype,
+ poly_int64 offset = 0);
#define SET_DR_MISALIGNMENT(DR, VAL) set_dr_misalignment (DR, VAL)