gomp: Various fixes for SVE types [PR101018]

Message ID mptczj42t4d.fsf@arm.com
State New
Headers
Series gomp: Various fixes for SVE types [PR101018] |

Commit Message

Richard Sandiford March 2, 2022, 4:53 p.m. UTC
  Various parts of the omp code checked whether the size of a decl
was an INTEGER_CST in order to determine whether the decl was
variable-sized or not.  If it was variable-sized, it was expected
to have a DECL_VALUE_EXPR replacement, as for VLAs.

This patch uses poly_int_tree_p instead, so that variable-length
SVE vectors are treated like constant-length vectors.  This means
that some structures become poly_int-sized, with some fields at
poly_int offsets, but we already have code to handle that.

An alternative would have been to handle the data via indirection
instead.  However, that's likely to be more complicated, and it
would contradict is_variable_sized, which already uses a check
for TREE_CONSTANT rather than INTEGER_CST.

gimple_add_tmp_var should probably not add a safelen of 1
for SVE vectors, but that's really a separate thing and might
be hard to test.

Tested on aarch64-linux-gnu.  Probably only suitable for GCC 13,
but posting now in case anyone has any early feedback.

Thanks,
Richard


gcc/
	PR middle-end/101018
	* poly-int.h (can_and_p): New function.
	* fold-const.cc (poly_int_binop): Use it to optimize BIT_AND_EXPRs
	involving POLY_INT_CSTs.
	* expr.cc (get_inner_reference): Fold poly_uint64 size_trees
	into the constant bitsize.
	* gimplify.cc (gimplify_bind_expr): Use poly_int_tree_p instead
	of INTEGER_CST when checking for constant-sized omp data.
	(omp_add_variable): Likewise.
	(omp_notice_variable): Likewise.
	(gimplify_adjust_omp_clauses_1): Likewise.
	(gimplify_adjust_omp_clauses): Likewise.
	* omp-low.cc (scan_sharing_clauses): Likewise.
	(lower_omp_target): Likewise.

gcc/testsuite/
	PR middle-end/101018
	* gcc.target/aarch64/sve/acle/pr101018-1.c: New test.
	* gcc.target/aarch64/sve/acle/pr101018-2.c: Likewise
---
 gcc/expr.cc                                   |  4 +--
 gcc/fold-const.cc                             |  7 +++++
 gcc/gimplify.cc                               | 23 ++++++++--------
 gcc/omp-low.cc                                | 10 +++----
 gcc/poly-int.h                                | 19 +++++++++++++
 .../aarch64/sve/acle/general/pr101018-1.c     | 27 +++++++++++++++++++
 .../aarch64/sve/acle/general/pr101018-2.c     | 23 ++++++++++++++++
 7 files changed, 94 insertions(+), 19 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr101018-1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr101018-2.c
  

Patch

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 35e40299753..52d16c992f3 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -7677,10 +7677,10 @@  get_inner_reference (tree exp, poly_int64_pod *pbitsize,
 
   if (size_tree != 0)
     {
-      if (! tree_fits_uhwi_p (size_tree))
+      if (! tree_fits_poly_uint64_p (size_tree))
 	mode = BLKmode, *pbitsize = -1;
       else
-	*pbitsize = tree_to_uhwi (size_tree);
+	*pbitsize = tree_to_poly_uint64 (size_tree);
     }
 
   *preversep = reverse_storage_order_for_component_p (exp);
diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 7b21240ba76..f5a46b692f8 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -1157,6 +1157,13 @@  poly_int_binop (poly_wide_int &res, enum tree_code code,
 	return false;
       break;
 
+    case BIT_AND_EXPR:
+      if (TREE_CODE (arg2) != INTEGER_CST
+	  || !can_and_p (wi::to_poly_wide (arg1), wi::to_wide (arg2),
+			 &res))
+	return false;
+      break;
+
     case BIT_IOR_EXPR:
       if (TREE_CODE (arg2) != INTEGER_CST
 	  || !can_ior_p (wi::to_poly_wide (arg1), wi::to_wide (arg2),
diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index f570daa015a..ed1aad54a55 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -7180,7 +7180,7 @@  omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags)
   /* When adding a variable-sized variable, we have to handle all sorts
      of additional bits of data: the pointer replacement variable, and
      the parameters of the type.  */
-  if (DECL_SIZE (decl) && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
+  if (DECL_SIZE (decl) && !poly_int_tree_p (DECL_SIZE (decl)))
     {
       /* Add the pointer replacement variable as PRIVATE if the variable
 	 replacement is private, else FIRSTPRIVATE since we'll need the
@@ -7830,7 +7830,8 @@  omp_notice_variable (struct gimplify_omp_ctx *ctx, tree decl, bool in_code)
       && (flags & (GOVD_SEEN | GOVD_LOCAL)) == GOVD_SEEN
       && DECL_SIZE (decl))
     {
-      if (TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
+      tree size;
+      if (!poly_int_tree_p (DECL_SIZE (decl)))
 	{
 	  splay_tree_node n2;
 	  tree t = DECL_VALUE_EXPR (decl);
@@ -7841,16 +7842,14 @@  omp_notice_variable (struct gimplify_omp_ctx *ctx, tree decl, bool in_code)
 	  n2->value |= GOVD_SEEN;
 	}
       else if (omp_privatize_by_reference (decl)
-	       && TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (decl)))
-	       && (TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (decl))))
-		   != INTEGER_CST))
+	       && (size = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (decl))))
+	       && !poly_int_tree_p (size))
 	{
 	  splay_tree_node n2;
-	  tree t = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (decl)));
-	  gcc_assert (DECL_P (t));
-	  n2 = splay_tree_lookup (ctx->variables, (splay_tree_key) t);
+	  gcc_assert (DECL_P (size));
+	  n2 = splay_tree_lookup (ctx->variables, (splay_tree_key) size);
 	  if (n2)
-	    omp_notice_variable (ctx, t, true);
+	    omp_notice_variable (ctx, size, true);
 	}
     }
 
@@ -11155,7 +11154,7 @@  gimplify_adjust_omp_clauses_1 (splay_tree_node n, void *data)
       if ((gimplify_omp_ctxp->region_type & ORT_ACC) == 0)
 	OMP_CLAUSE_MAP_RUNTIME_IMPLICIT_P (clause) = 1;
       if (DECL_SIZE (decl)
-	  && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
+	  && !poly_int_tree_p (DECL_SIZE (decl)))
 	{
 	  tree decl2 = DECL_VALUE_EXPR (decl);
 	  gcc_assert (TREE_CODE (decl2) == INDIRECT_REF);
@@ -11546,7 +11545,7 @@  gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
 		}
 	    }
 	  else if (DECL_SIZE (decl)
-		   && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST
+		   && !poly_int_tree_p (DECL_SIZE (decl))
 		   && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_POINTER
 		   && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_FIRSTPRIVATE_POINTER
 		   && (OMP_CLAUSE_MAP_KIND (c)
@@ -11606,7 +11605,7 @@  gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
 	  if (!DECL_P (decl))
 	    break;
 	  if (DECL_SIZE (decl)
-	      && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
+	      && !poly_int_tree_p (DECL_SIZE (decl)))
 	    {
 	      tree decl2 = DECL_VALUE_EXPR (decl);
 	      gcc_assert (TREE_CODE (decl2) == INDIRECT_REF);
diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index 77176efe715..aca787b1bf5 100644
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -1446,7 +1446,7 @@  scan_sharing_clauses (tree clauses, omp_context *ctx)
 	  else
 	    install_var_field (decl, false, 11, ctx);
 	  if (DECL_SIZE (decl)
-	      && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
+	      && !poly_int_tree_p (DECL_SIZE (decl)))
 	    {
 	      tree decl2 = DECL_VALUE_EXPR (decl);
 	      gcc_assert (TREE_CODE (decl2) == INDIRECT_REF);
@@ -1601,7 +1601,7 @@  scan_sharing_clauses (tree clauses, omp_context *ctx)
 	  if (DECL_P (decl))
 	    {
 	      if (DECL_SIZE (decl)
-		  && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
+		  && !poly_int_tree_p (DECL_SIZE (decl)))
 		{
 		  tree decl2 = DECL_VALUE_EXPR (decl);
 		  gcc_assert (TREE_CODE (decl2) == INDIRECT_REF);
@@ -1843,7 +1843,7 @@  scan_sharing_clauses (tree clauses, omp_context *ctx)
 		    = remap_type (TREE_TYPE (decl), &ctx->cb);
 		}
 	      else if (DECL_SIZE (decl)
-		       && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
+		       && !poly_int_tree_p (DECL_SIZE (decl)))
 		{
 		  tree decl2 = DECL_VALUE_EXPR (decl);
 		  gcc_assert (TREE_CODE (decl2) == INDIRECT_REF);
@@ -12692,7 +12692,7 @@  lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 	  }
 
 	if (DECL_SIZE (var)
-	    && TREE_CODE (DECL_SIZE (var)) != INTEGER_CST)
+	    && !poly_int_tree_p (DECL_SIZE (var)))
 	  {
 	    tree var2 = DECL_VALUE_EXPR (var);
 	    gcc_assert (TREE_CODE (var2) == INDIRECT_REF);
@@ -13012,7 +13012,7 @@  lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
 	    else
 	      {
 		if (DECL_SIZE (ovar)
-		    && TREE_CODE (DECL_SIZE (ovar)) != INTEGER_CST)
+		    && !poly_int_tree_p (DECL_SIZE (ovar)))
 		  {
 		    tree ovar2 = DECL_VALUE_EXPR (ovar);
 		    gcc_assert (TREE_CODE (ovar2) == INDIRECT_REF);
diff --git a/gcc/poly-int.h b/gcc/poly-int.h
index 2bf9d98599f..c4e3432937a 100644
--- a/gcc/poly-int.h
+++ b/gcc/poly-int.h
@@ -1964,6 +1964,25 @@  known_alignment (const poly_int_pod<N, Ca> &a)
   return r & -r;
 }
 
+/* Return true if we can compute A & B at compile time, storing the
+   result in RES if so.  */
+
+template<unsigned int N, typename Ca, typename Cb, typename Cr>
+inline typename if_nonpoly<Cb, bool>::type
+can_and_p (const poly_int_pod<N, Ca> &a, Cb b, Cr *result)
+{
+  /* Coefficients 1 and above must be a multiple of something greater
+     than ~B.  */
+  typedef POLY_INT_TYPE (Ca) int_type;
+  if (N >= 2)
+    for (unsigned int i = 1; i < N; i++)
+      if ((-(a.coeffs[i] & -a.coeffs[i]) & ~b) != int_type (0))
+	return false;
+  *result = a;
+  result->coeffs[0] &= b;
+  return true;
+}
+
 /* Return true if we can compute A | B at compile time, storing the
    result in RES if so.  */
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr101018-1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr101018-1.c
new file mode 100644
index 00000000000..7592ad4c12e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr101018-1.c
@@ -0,0 +1,27 @@ 
+/* { dg-options "-O -fopenmp" } */
+
+#include <stdint.h>
+#include <arm_sve.h>
+
+extern long N;
+extern double *a, *b, *c;
+
+void tuned_STREAM_Triad(double scalar)
+{
+  const uint64_t el = svcntd();
+  const svfloat64_t vscalar = svdup_n_f64(scalar);
+  const int Nadj = N/el;
+  int j;
+
+#pragma omp parallel for
+  for (j = 0; j < Nadj; j ++) {
+	  svfloat64_t va, vb, vc;
+	  vb = svld1_vnum_f64(svptrue_b64(), b, j);
+	  vc = svld1_vnum_f64(svptrue_b64(), c, j);
+	  va = svmla_f64_z(svptrue_b64(), vb , vscalar, vc );
+	  svst1_vnum_f64(svptrue_b64(), a, j+0, va);
+  }
+
+  for (j = Nadj*el ; j < N ; j++)
+	  a[j] = b[j] + scalar * c[j];
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr101018-2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr101018-2.c
new file mode 100644
index 00000000000..eee9c9f7e2b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr101018-2.c
@@ -0,0 +1,23 @@ 
+/* { dg-options "-O -fopenmp" } */
+
+#include <arm_sve.h>
+
+void ext(void *);
+svfloat32_t ext2();
+
+void
+foo (float32_t *ptr)
+{
+  svfloat32_t vec;
+  /* These directives are mostly nonsense, but they shouldn't ICE.  */
+  #pragma omp target data use_device_addr(vec)
+  ext(&vec);
+  #pragma omp target map(to:vec)
+  ext(&vec);
+  #pragma omp target defaultmap(none) firstprivate(vec)
+  ext(&vec);
+  #pragma omp target
+  ext(&vec);
+  #pragma omp target update to(vec)
+  vec = ext2();
+}