[RFC] tree-optimization/65206 - dependence analysis on mixed pointer/array

Message ID 53129n1r-852-q7r-1n84-2nn5p6oqn51p@fhfr.qr
State RFC
Headers
Series [RFC] tree-optimization/65206 - dependence analysis on mixed pointer/array |

Commit Message

Richard Biener Sept. 15, 2021, 2:09 p.m. UTC
  This adds the capability to analyze the dependence of mixed
pointer/array accesses.  The example is from where using a masked
load/store creates the pointer-based access when an otherwise
unconditional access is array based.  Other examples would include
accesses to an array mixed with accesses from inlined helpers
that work on pointers.

The idea is quite simple and old - analyze the data-ref indices
as if the reference was pointer-based.  The following change does
this by changing dr_analyze_indices to work on the indices
sub-structure and storing an alternate indices substructure in
each data reference.  That alternate set of indices is analyzed
lazily by initialize_data_dependence_relation when it fails to
match-up the main set of indices of two data references.
initialize_data_dependence_relation is refactored into a head
and a tail worker and changed to work on one of the indices
structures and thus away from using DR_* access macros which
continue to reference the main indices substructure.

There's currently a --param in the patch that I intend to remove
in the end to control whether the alternate indices path is used.

The patch currently FAILs gcc.dg/vect/vect-cselim-1.c on x86_64 because
I enabled vect_masked_store but the testcase looks for 'short'
sized element masking which isn't available.  I have to figure
how to deal with this, possibly by not enabling vect_masked_store
but instead making the testcase target specific somehow.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

I'm currently gathering some statistics on SPEC CPU 2017 on
x86_64 with AVX2 to assess the effect on loop transforms
by means of -fopt-info-loop (but that takes some time due
to serial compile)

Any comments on the approach and/or different or better ideas?

Thanks,
Richard.

2021-09-08  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/65206
	* tree-data-ref.h (struct data_reference): Add alt_indices,
	order it last.
	* tree-data-ref.c (dr_analyze_indices): Work on
	struct indices and get DR_REF as tree.
	(create_data_ref): Adjust.
	(initialize_data_dependence_relation): Split into head
	and tail.  When the base objects fail to match up try
	again with pointer-based analysis of indices.
	* tree-vectorizer.c (vec_info_shared::check_datarefs): Do
	not compare the lazily computed alternate set of indices.

	* gcc.dg/vect/pr65206.c: New testcase.
	* lib/target-supports.exp: Amend
	check_effective_target_vect_masked_store.
---
 gcc/params.opt                        |   4 +
 gcc/testsuite/gcc.dg/vect/pr65206.c   |  23 ++++
 gcc/testsuite/lib/target-supports.exp |   3 +-
 gcc/tree-data-ref.c                   | 163 +++++++++++++++++---------
 gcc/tree-data-ref.h                   |   9 +-
 gcc/tree-vectorizer.c                 |   3 +-
 6 files changed, 146 insertions(+), 59 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr65206.c
  

Patch

diff --git a/gcc/params.opt b/gcc/params.opt
index 658ca028851..74a76954a0a 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -1137,4 +1137,8 @@  Controls how loop vectorizer uses partial vectors.  0 means never, 1 means only
 Common Joined UInteger Var(param_vect_inner_loop_cost_factor) Init(50) IntegerRange(1, 10000) Param Optimization
 The maximum factor which the loop vectorizer applies to the cost of statements in an inner loop relative to the loop being vectorized.
 
+-param=data-dep-alt-indices=
+Common Joined UInteger Var(param_data_dep_alt_indices) Init(1) IntegerRange(0, 1) Param Optimization
+Whether to allow the use of alternate indices in dependence analysis.
+
 ; This comment is to ensure we retain the blank line above.
diff --git a/gcc/testsuite/gcc.dg/vect/pr65206.c b/gcc/testsuite/gcc.dg/vect/pr65206.c
new file mode 100644
index 00000000000..17405297172
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr65206.c
@@ -0,0 +1,23 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+/* { dg-require-effective-target vect_masked_store } */
+/* { dg-additional-options "-fno-trapping-math -fno-allow-store-data-races" } */
+/* { dg-additional-options "-mavx" { target avx } } */
+
+#define N 1024
+
+double a[N], b[N];
+
+void foo ()
+{
+  for (int i = 0; i < N; ++i)
+    if (b[i] < 3.)
+      a[i] += b[i];
+}
+
+/* We get a .MASK_STORE because while the load of a[i] does not trap
+   the store would introduce store data races.  Make sure we still
+   can handle the data dependence with zero distance.  */
+
+/* { dg-final { scan-tree-dump-not "versioning for alias required" "vect" } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 8697ceb53c9..74f47de6832 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -7656,7 +7656,8 @@  proc check_effective_target_vect_masked_load { } {
 
 proc check_effective_target_vect_masked_store { } {
     return [expr { [check_effective_target_aarch64_sve]
-		   || [istarget amdgcn*-*-*] }]
+		   || [istarget amdgcn*-*-*]
+		   || [check_effective_target_avx] }]
 }
 
 # Return 1 if the target supports vector scatter stores.
diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
index e061baa7c20..80f27672766 100644
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -1300,22 +1300,18 @@  base_supports_access_fn_components_p (tree base)
    DR, analyzed in LOOP and instantiated before NEST.  */
 
 static void
-dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
+dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
 {
-  vec<tree> access_fns = vNULL;
-  tree ref, op;
-  tree base, off, access_fn;
-
   /* If analyzing a basic-block there are no indices to analyze
      and thus no access functions.  */
   if (!nest)
     {
-      DR_BASE_OBJECT (dr) = DR_REF (dr);
-      DR_ACCESS_FNS (dr).create (0);
+      dri->base_object = ref;
+      dri->access_fns.create (0);
       return;
     }
 
-  ref = DR_REF (dr);
+  vec<tree> access_fns = vNULL;
 
   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
      into a two element array with a constant index.  The base is
@@ -1338,8 +1334,8 @@  dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
     {
       if (TREE_CODE (ref) == ARRAY_REF)
 	{
-	  op = TREE_OPERAND (ref, 1);
-	  access_fn = analyze_scalar_evolution (loop, op);
+	  tree op = TREE_OPERAND (ref, 1);
+	  tree access_fn = analyze_scalar_evolution (loop, op);
 	  access_fn = instantiate_scev (nest, loop, access_fn);
 	  access_fns.safe_push (access_fn);
 	}
@@ -1370,16 +1366,16 @@  dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
      analyzed nest, add it as an additional independent access-function.  */
   if (TREE_CODE (ref) == MEM_REF)
     {
-      op = TREE_OPERAND (ref, 0);
-      access_fn = analyze_scalar_evolution (loop, op);
+      tree op = TREE_OPERAND (ref, 0);
+      tree access_fn = analyze_scalar_evolution (loop, op);
       access_fn = instantiate_scev (nest, loop, access_fn);
       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
 	{
-	  tree orig_type;
 	  tree memoff = TREE_OPERAND (ref, 1);
-	  base = initial_condition (access_fn);
-	  orig_type = TREE_TYPE (base);
+	  tree base = initial_condition (access_fn);
+	  tree orig_type = TREE_TYPE (base);
 	  STRIP_USELESS_TYPE_CONVERSION (base);
+	  tree off;
 	  split_constant_offset (base, &base, &off);
 	  STRIP_USELESS_TYPE_CONVERSION (base);
 	  /* Fold the MEM_REF offset into the evolutions initial
@@ -1424,7 +1420,7 @@  dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
 				 base, memoff);
 	  MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
 	  MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
-	  DR_UNCONSTRAINED_BASE (dr) = true;
+	  dri->unconstrained_base = true;
 	  access_fns.safe_push (access_fn);
 	}
     }
@@ -1436,8 +1432,8 @@  dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
 		    build_int_cst (reference_alias_ptr_type (ref), 0));
     }
 
-  DR_BASE_OBJECT (dr) = ref;
-  DR_ACCESS_FNS (dr) = access_fns;
+  dri->base_object = ref;
+  dri->access_fns = access_fns;
 }
 
 /* Extracts the alias analysis information from the memory reference DR.  */
@@ -1497,7 +1493,7 @@  create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
 
   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
 			nest != NULL ? loop : NULL, stmt);
-  dr_analyze_indices (dr, nest, loop);
+  dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
   dr_analyze_alias (dr);
 
   if (dump_file && (dump_flags & TDF_DETAILS))
@@ -3066,41 +3062,32 @@  access_fn_components_comparable_p (tree ref_a, tree ref_b)
 			     TREE_TYPE (TREE_OPERAND (ref_b, 0)));
 }
 
-/* Initialize a data dependence relation between data accesses A and
-   B.  NB_LOOPS is the number of loops surrounding the references: the
-   size of the classic distance/direction vectors.  */
+/* Initialize a data dependence relation RES in LOOP_NEST.  USE_ALT_INDICES
+   is true when the main indices of A and B were not comparable so we try again
+   with alternate indices computed on an indirect reference.  */
 
 struct data_dependence_relation *
-initialize_data_dependence_relation (struct data_reference *a,
-				     struct data_reference *b,
- 				     vec<loop_p> loop_nest)
+initialize_data_dependence_relation (struct data_dependence_relation *res,
+				     vec<loop_p> loop_nest,
+				     bool use_alt_indices)
 {
-  struct data_dependence_relation *res;
+  struct data_reference *a = DDR_A (res);
+  struct data_reference *b = DDR_B (res);
   unsigned int i;
 
-  res = XCNEW (struct data_dependence_relation);
-  DDR_A (res) = a;
-  DDR_B (res) = b;
-  DDR_LOOP_NEST (res).create (0);
-  DDR_SUBSCRIPTS (res).create (0);
-  DDR_DIR_VECTS (res).create (0);
-  DDR_DIST_VECTS (res).create (0);
-
-  if (a == NULL || b == NULL)
+  struct indices *indices_a, *indices_b;
+  if (!use_alt_indices)
     {
-      DDR_ARE_DEPENDENT (res) = chrec_dont_know;
-      return res;
+      indices_a = &a->indices;
+      indices_b = &b->indices;
     }
-
-  /* If the data references do not alias, then they are independent.  */
-  if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
+  else
     {
-      DDR_ARE_DEPENDENT (res) = chrec_known;
-      return res;
+      indices_a = &a->alt_indices;
+      indices_b = &b->alt_indices;
     }
-
-  unsigned int num_dimensions_a = DR_NUM_DIMENSIONS (a);
-  unsigned int num_dimensions_b = DR_NUM_DIMENSIONS (b);
+  unsigned int num_dimensions_a = indices_a->access_fns.length ();
+  unsigned int num_dimensions_b = indices_b->access_fns.length ();
   if (num_dimensions_a == 0 || num_dimensions_b == 0)
     {
       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
@@ -3125,9 +3112,9 @@  initialize_data_dependence_relation (struct data_reference *a,
 
      the a and b accesses have a single ARRAY_REF component reference [0]
      but have two subscripts.  */
-  if (DR_UNCONSTRAINED_BASE (a))
+  if (indices_a->unconstrained_base)
     num_dimensions_a -= 1;
-  if (DR_UNCONSTRAINED_BASE (b))
+  if (indices_b->unconstrained_base)
     num_dimensions_b -= 1;
 
   /* These structures describe sequences of component references in
@@ -3210,6 +3197,10 @@  initialize_data_dependence_relation (struct data_reference *a,
         B: [3, 4]  (i.e. s.e)  */
   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
     {
+      /* The alternate indices form always has a single dimension
+	 with unconstrained base.  */
+      gcc_assert (!use_alt_indices);
+
       /* REF_A and REF_B must be one of the component access types
 	 allowed by dr_analyze_indices.  */
       gcc_checking_assert (access_fn_component_p (ref_a));
@@ -3280,11 +3271,12 @@  initialize_data_dependence_relation (struct data_reference *a,
   /* See whether FULL_SEQ ends at the base and whether the two bases
      are equal.  We do not care about TBAA or alignment info so we can
      use OEP_ADDRESS_OF to avoid false negatives.  */
-  tree base_a = DR_BASE_OBJECT (a);
-  tree base_b = DR_BASE_OBJECT (b);
+  tree base_a = indices_a->base_object;
+  tree base_b = indices_b->base_object;
   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
 		      && full_seq.start_b + full_seq.length == num_dimensions_b
-		      && DR_UNCONSTRAINED_BASE (a) == DR_UNCONSTRAINED_BASE (b)
+		      && (indices_a->unconstrained_base
+			  == indices_b->unconstrained_base)
 		      && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
 		      && (types_compatible_p (TREE_TYPE (base_a),
 					      TREE_TYPE (base_b))
@@ -3323,7 +3315,7 @@  initialize_data_dependence_relation (struct data_reference *a,
      both lvalues are distinct from the object's declared type.  */
   if (same_base_p)
     {
-      if (DR_UNCONSTRAINED_BASE (a))
+      if (indices_a->unconstrained_base)
 	full_seq.length += 1;
     }
   else
@@ -3332,8 +3324,35 @@  initialize_data_dependence_relation (struct data_reference *a,
   /* Punt if we didn't find a suitable sequence.  */
   if (full_seq.length == 0)
     {
-      DDR_ARE_DEPENDENT (res) = chrec_dont_know;
-      return res;
+      if (use_alt_indices || !param_data_dep_alt_indices)
+	{
+	  /* Fully exhausted possibilities.  */
+	  DDR_ARE_DEPENDENT (res) = chrec_dont_know;
+	  return res;
+	}
+
+      /* Try evaluating both DRs as dereferences of pointers.  */
+      if (!a->alt_indices.base_object)
+	{
+	  tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
+				 build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
+				 build_int_cst
+				   (reference_alias_ptr_type (DR_REF (a)), 0));
+	  dr_analyze_indices (&a->alt_indices, alt_ref,
+			      loop_preheader_edge (loop_nest[0]),
+			      loop_containing_stmt (DR_STMT (a)));
+	}
+      if (!b->alt_indices.base_object)
+	{
+	  tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
+				 build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
+				 build_int_cst
+				   (reference_alias_ptr_type (DR_REF (b)), 0));
+	  dr_analyze_indices (&b->alt_indices, alt_ref,
+			      loop_preheader_edge (loop_nest[0]),
+			      loop_containing_stmt (DR_STMT (b)));
+	}
+      return initialize_data_dependence_relation (res, loop_nest, true);
     }
 
   if (!same_base_p)
@@ -3381,8 +3400,8 @@  initialize_data_dependence_relation (struct data_reference *a,
       struct subscript *subscript;
 
       subscript = XNEW (struct subscript);
-      SUB_ACCESS_FN (subscript, 0) = DR_ACCESS_FN (a, full_seq.start_a + i);
-      SUB_ACCESS_FN (subscript, 1) = DR_ACCESS_FN (b, full_seq.start_b + i);
+      SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
+      SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
@@ -3393,6 +3412,40 @@  initialize_data_dependence_relation (struct data_reference *a,
   return res;
 }
 
+/* Initialize a data dependence relation between data accesses A and
+   B.  NB_LOOPS is the number of loops surrounding the references: the
+   size of the classic distance/direction vectors.  */
+
+struct data_dependence_relation *
+initialize_data_dependence_relation (struct data_reference *a,
+				     struct data_reference *b,
+				     vec<loop_p> loop_nest)
+{
+  data_dependence_relation *res = XCNEW (struct data_dependence_relation);
+  DDR_A (res) = a;
+  DDR_B (res) = b;
+  DDR_LOOP_NEST (res).create (0);
+  DDR_SUBSCRIPTS (res).create (0);
+  DDR_DIR_VECTS (res).create (0);
+  DDR_DIST_VECTS (res).create (0);
+
+  if (a == NULL || b == NULL)
+    {
+      DDR_ARE_DEPENDENT (res) = chrec_dont_know;
+      return res;
+    }
+
+  /* If the data references do not alias, then they are independent.  */
+  if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
+    {
+      DDR_ARE_DEPENDENT (res) = chrec_known;
+      return res;
+    }
+
+  return initialize_data_dependence_relation (res, loop_nest, false);
+}
+
+
 /* Frees memory used by the conflict function F.  */
 
 static void
diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h
index 685f33d85ae..74f579c9f3f 100644
--- a/gcc/tree-data-ref.h
+++ b/gcc/tree-data-ref.h
@@ -166,14 +166,19 @@  struct data_reference
      and runs to completion.  */
   bool is_conditional_in_stmt;
 
+  /* Alias information for the data reference.  */
+  struct dr_alias alias;
+
   /* Behavior of the memory reference in the innermost loop.  */
   struct innermost_loop_behavior innermost;
 
   /* Subscripts of this data reference.  */
   struct indices indices;
 
-  /* Alias information for the data reference.  */
-  struct dr_alias alias;
+  /* Alternate subscripts initialized lazily and used by data-dependence
+     analysis only when the main indices of two DRs are not comparable.
+     Keep last to keep vec_info_shared::check_datarefs happy.  */
+  struct indices alt_indices;
 };
 
 #define DR_STMT(DR)                (DR)->stmt
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 3aa3e2a6783..20daa31187d 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -507,7 +507,8 @@  vec_info_shared::check_datarefs ()
     return;
   gcc_assert (datarefs.length () == datarefs_copy.length ());
   for (unsigned i = 0; i < datarefs.length (); ++i)
-    if (memcmp (&datarefs_copy[i], datarefs[i], sizeof (data_reference)) != 0)
+    if (memcmp (&datarefs_copy[i], datarefs[i],
+		offsetof (data_reference, alt_indices)) != 0)
       gcc_unreachable ();
 }