[32/40] Reference reduction localization

Message ID 20211215155447.19379-33-frederik@codesourcery.com
State New
Headers
Series OpenACC "kernels" Improvements |

Commit Message

Frederik Harwath Dec. 15, 2021, 3:54 p.m. UTC
  From: Julian Brown <julian@codesourcery.com>

        gcc/
        * gimplify.c (privatize_reduction): New struct.
        (localize_reductions_r, localize_reductions): New functions.
        (gimplify_omp_for): Call localize_reductions.
        (gimplify_omp_workshare): Likewise.
        * omp-low.c (lower_oacc_reductions): Handle localized reductions.
        Create fewer temp vars.
        * tree-core.h (omp_clause_code): Add OMP_CLAUSE_REDUCTION_PRIVATE_DECL
        documentation.
        * tree.c (omp_clause_num_ops): Bump number of ops for
        OMP_CLAUSE_REDUCTION to 6.
        (walk_tree_1): Adjust accordingly.
        * tree.h (OMP_CLAUSE_REDUCTION_PRIVATE_DECL): Add macro.
---
 gcc/gimplify.c  | 102 +++++++++++++++++++++++++++++++++++
 gcc/omp-low.c   |  45 +++++-----------
 gcc/tree-core.h |   4 +-
 gcc/tree.c      | 137 +++++++++++++++++++++++++++++++++++++++++++++---
 gcc/tree.h      |   2 +
 5 files changed, 250 insertions(+), 40 deletions(-)

--
2.33.0

-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
  

Patch

diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index c2ab96e7e182..9a4331c70d6e 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -240,6 +240,11 @@  struct gimplify_omp_ctx
   int defaultmap[5];
 };

+struct privatize_reduction
+{
+  tree ref_var, local_var;
+};
+
 static struct gimplify_ctx *gimplify_ctxp;
 static struct gimplify_omp_ctx *gimplify_omp_ctxp;
 static bool in_omp_construct;
@@ -11900,6 +11905,80 @@  gimplify_omp_taskloop_expr (tree type, tree *tp, gimple_seq *pre_p,
   OMP_FOR_CLAUSES (orig_for_stmt) = c;
 }

+/* Helper function for localize_reductions.  Replace all uses of REF_VAR with
+   LOCAL_VAR.  */
+
+static tree
+localize_reductions_r (tree *tp, int *walk_subtrees, void *data)
+{
+  enum tree_code tc = TREE_CODE (*tp);
+  struct privatize_reduction *pr = (struct privatize_reduction *) data;
+
+  if (TYPE_P (*tp))
+    *walk_subtrees = 0;
+
+  switch (tc)
+    {
+    case INDIRECT_REF:
+    case MEM_REF:
+      if (TREE_OPERAND (*tp, 0) == pr->ref_var)
+       *tp = pr->local_var;
+
+      *walk_subtrees = 0;
+      break;
+
+    case VAR_DECL:
+    case PARM_DECL:
+    case RESULT_DECL:
+      if (*tp == pr->ref_var)
+       *tp = pr->local_var;
+
+      *walk_subtrees = 0;
+      break;
+
+    default:
+      break;
+    }
+
+  return NULL_TREE;
+}
+
+/* OpenACC worker and vector loop state propagation requires reductions
+   to be inside local variables.  This function replaces all reference-type
+   reductions variables associated with the loop with a local copy.  It is
+   also used to create private copies of reduction variables for those
+   which are not associated with acc loops.  */
+
+static void
+localize_reductions (tree clauses, tree body)
+{
+  tree c, var, type, new_var;
+  struct privatize_reduction pr;
+
+  for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
+      {
+       var = OMP_CLAUSE_DECL (c);
+
+       if (!lang_hooks.decls.omp_privatize_by_reference (var))
+         {
+           OMP_CLAUSE_REDUCTION_PRIVATE_DECL (c) = NULL;
+           continue;
+         }
+
+       type = TREE_TYPE (TREE_TYPE (var));
+       new_var = create_tmp_var (type, IDENTIFIER_POINTER (DECL_NAME (var)));
+
+       pr.ref_var = var;
+       pr.local_var = new_var;
+
+       walk_tree (&body, localize_reductions_r, &pr, NULL);
+
+       OMP_CLAUSE_REDUCTION_PRIVATE_DECL (c) = new_var;
+      }
+}
+
+
 /* Gimplify the gross structure of an OMP_FOR statement.  */

 static enum gimplify_status
@@ -12126,6 +12205,23 @@  gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
       gcc_unreachable ();
     }

+  if (ort == ORT_ACC)
+    {
+      gimplify_omp_ctx *outer = gimplify_omp_ctxp;
+
+      while (outer
+            && outer->region_type != ORT_ACC_PARALLEL
+            && outer->region_type != ORT_ACC_KERNELS)
+       outer = outer->outer_context;
+
+      /* FIXME: Reductions only work in parallel regions at present.  We avoid
+        doing the reduction localization transformation in kernels regions
+        here, because the code to remove reductions in kernels regions cannot
+        handle that.  */
+      if (outer && outer->region_type == ORT_ACC_PARALLEL)
+       localize_reductions (OMP_FOR_CLAUSES (*expr_p), OMP_FOR_BODY (*expr_p));
+    }
+
   /* Set OMP_CLAUSE_LINEAR_NO_COPYIN flag on explicit linear
      clause for the IV.  */
   if (ort == ORT_SIMD && TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)) == 1)
@@ -13654,6 +13750,12 @@  gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p)
       || (ort & ORT_HOST_TEAMS) == ORT_HOST_TEAMS)
     {
       push_gimplify_context ();
+
+      /* FIXME: Reductions are not supported in kernels regions yet.  */
+      if (/*ort == ORT_ACC_KERNELS ||*/ ort == ORT_ACC_PARALLEL)
+        localize_reductions (OMP_TARGET_CLAUSES (*expr_p),
+                            OMP_TARGET_BODY (*expr_p));
+
       gimple *g = gimplify_and_return_first (OMP_BODY (expr), &body);
       if (gimple_code (g) == GIMPLE_BIND)
        pop_gimplify_context (g);
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index afd6061ae1e9..ae5cdfc5e260 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -7530,9 +7530,9 @@  lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
             || is_oacc_kernels_decomposed_graphite_part (ctx));

        tree orig = OMP_CLAUSE_DECL (c);
-       tree var = maybe_lookup_decl (orig, ctx);
+       tree var;
        tree ref_to_res = NULL_TREE;
-       tree incoming, outgoing, v1, v2, v3;
+       tree incoming, outgoing;
        bool is_private = false;

        enum tree_code rcode = OMP_CLAUSE_REDUCTION_CODE (c);
@@ -7544,6 +7544,9 @@  lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
          rcode = BIT_IOR_EXPR;
        tree op = build_int_cst (unsigned_type_node, rcode);

+       var = OMP_CLAUSE_REDUCTION_PRIVATE_DECL (c);
+       if (!var)
+         var = maybe_lookup_decl (orig, ctx);
        if (!var)
          var = orig;

@@ -7636,34 +7639,11 @@  lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,

        if (omp_privatize_by_reference (orig))
          {
-           tree type = TREE_TYPE (var);
-           const char *id = IDENTIFIER_POINTER (DECL_NAME (var));
-
-           if (!inner)
-             {
-               tree x = create_tmp_var (TREE_TYPE (type), id);
-               gimplify_assign (var, build_fold_addr_expr (x), fork_seq);
-             }
-
-           v1 = create_tmp_var (type, id);
-           v2 = create_tmp_var (type, id);
-           v3 = create_tmp_var (type, id);
-
-           gimplify_assign (v1, var, fork_seq);
-           gimplify_assign (v2, var, fork_seq);
-           gimplify_assign (v3, var, fork_seq);
-
-           var = build_simple_mem_ref (var);
-           v1 = build_simple_mem_ref (v1);
-           v2 = build_simple_mem_ref (v2);
-           v3 = build_simple_mem_ref (v3);
            outgoing = build_simple_mem_ref (outgoing);

            if (!TREE_CONSTANT (incoming))
              incoming = build_simple_mem_ref (incoming);
          }
-       else
-         v1 = v2 = v3 = var;

        /* Determine position in reduction buffer, which may be used
           by target.  The parser has ensured that this is not a
@@ -7696,20 +7676,21 @@  lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
          = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
                                          TREE_TYPE (var), 6, init_code,
                                          unshare_expr (ref_to_res),
-                                         v1, level, op, off);
+                                         var, level, op, off);
        tree fini_call
          = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
                                          TREE_TYPE (var), 6, fini_code,
                                          unshare_expr (ref_to_res),
-                                         v2, level, op, off);
+                                         var, level, op, off);
        tree teardown_call
          = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
-                                         TREE_TYPE (var), 6, teardown_code,
-                                         ref_to_res, v3, level, op, off);
+                                         TREE_TYPE (var), 6,
+                                         teardown_code, ref_to_res, var,
+                                         level, op, off);

-       gimplify_assign (v1, setup_call, &before_fork);
-       gimplify_assign (v2, init_call, &after_fork);
-       gimplify_assign (v3, fini_call, &before_join);
+       gimplify_assign (var, setup_call, &before_fork);
+       gimplify_assign (var, init_call, &after_fork);
+       gimplify_assign (var, fini_call, &before_join);
        gimplify_assign (outgoing, teardown_call, &after_join);
       }

diff --git a/gcc/tree-core.h b/gcc/tree-core.h
index f0c65a25f070..980bdee6c285 100644
--- a/gcc/tree-core.h
+++ b/gcc/tree-core.h
@@ -269,7 +269,9 @@  enum omp_clause_code {
                 placeholder used in OMP_CLAUSE_REDUCTION_{INIT,MERGE}.
      Operand 4: OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER: Another dummy
                VAR_DECL placeholder, used like the above for C/C++ array
-               reductions.  */
+               reductions.
+     Operand 5: OMP_CLAUSE_REDUCTION_PRIVATE_DECL: A private VAR_DECL of
+                the original DECL associated with the reduction clause.  */
   OMP_CLAUSE_REDUCTION,

   /* OpenMP clause: task_reduction (operator:variable_list).  */
diff --git a/gcc/tree.c b/gcc/tree.c
index 7bfd64160f4e..08f5a3e884bf 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -283,7 +283,7 @@  unsigned const char omp_clause_num_ops[] =
   1, /* OMP_CLAUSE_SHARED  */
   1, /* OMP_CLAUSE_FIRSTPRIVATE  */
   2, /* OMP_CLAUSE_LASTPRIVATE  */
-  5, /* OMP_CLAUSE_REDUCTION  */
+  6, /* OMP_CLAUSE_REDUCTION  */
   5, /* OMP_CLAUSE_TASK_REDUCTION  */
   5, /* OMP_CLAUSE_IN_REDUCTION  */
   1, /* OMP_CLAUSE_COPYIN  */
@@ -11134,12 +11134,135 @@  walk_tree_1 (tree *tp, walk_tree_fn func, void *data,
       break;

     case OMP_CLAUSE:
-      {
-       int len = omp_clause_num_ops[OMP_CLAUSE_CODE (*tp)];
-       for (int i = 0; i < len; i++)
-         WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, i));
-       WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp));
-      }
+      switch (OMP_CLAUSE_CODE (*tp))
+       {
+       case OMP_CLAUSE_GANG:
+         WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 1));
+         /* FALLTHRU */
+
+       case OMP_CLAUSE_ASYNC:
+       case OMP_CLAUSE_WAIT:
+       case OMP_CLAUSE_WORKER:
+       case OMP_CLAUSE_VECTOR:
+       case OMP_CLAUSE_NUM_GANGS:
+       case OMP_CLAUSE_NUM_WORKERS:
+       case OMP_CLAUSE_VECTOR_LENGTH:
+       case OMP_CLAUSE_PRIVATE:
+       case OMP_CLAUSE_SHARED:
+       case OMP_CLAUSE_FIRSTPRIVATE:
+       case OMP_CLAUSE_COPYIN:
+       case OMP_CLAUSE_COPYPRIVATE:
+       case OMP_CLAUSE_FILTER:
+       case OMP_CLAUSE_FINAL:
+       case OMP_CLAUSE_IF:
+       case OMP_CLAUSE_NUM_THREADS:
+       case OMP_CLAUSE_SCHEDULE:
+       case OMP_CLAUSE_UNIFORM:
+       case OMP_CLAUSE_DEPEND:
+       case OMP_CLAUSE_NONTEMPORAL:
+       case OMP_CLAUSE_NUM_TEAMS:
+       case OMP_CLAUSE_THREAD_LIMIT:
+       case OMP_CLAUSE_DEVICE:
+       case OMP_CLAUSE_DIST_SCHEDULE:
+       case OMP_CLAUSE_SAFELEN:
+       case OMP_CLAUSE_SIMDLEN:
+       case OMP_CLAUSE_ORDERED:
+       case OMP_CLAUSE_PRIORITY:
+       case OMP_CLAUSE_GRAINSIZE:
+       case OMP_CLAUSE_NUM_TASKS:
+       case OMP_CLAUSE_HINT:
+       case OMP_CLAUSE_TO_DECLARE:
+       case OMP_CLAUSE_LINK:
+       case OMP_CLAUSE_DETACH:
+       case OMP_CLAUSE_USE_DEVICE_PTR:
+       case OMP_CLAUSE_USE_DEVICE_ADDR:
+       case OMP_CLAUSE_IS_DEVICE_PTR:
+       case OMP_CLAUSE_INCLUSIVE:
+       case OMP_CLAUSE_EXCLUSIVE:
+       case OMP_CLAUSE__LOOPTEMP_:
+       case OMP_CLAUSE__REDUCTEMP_:
+       case OMP_CLAUSE__CONDTEMP_:
+       case OMP_CLAUSE__SCANTEMP_:
+       case OMP_CLAUSE__SIMDUID_:
+       case OMP_CLAUSE_AFFINITY:
+         WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 0));
+         /* FALLTHRU */
+
+       case OMP_CLAUSE_INDEPENDENT:
+       case OMP_CLAUSE_NOWAIT:
+       case OMP_CLAUSE_DEFAULT:
+       case OMP_CLAUSE_UNTIED:
+       case OMP_CLAUSE_MERGEABLE:
+       case OMP_CLAUSE_PROC_BIND:
+       case OMP_CLAUSE_DEVICE_TYPE:
+       case OMP_CLAUSE_INBRANCH:
+       case OMP_CLAUSE_NOTINBRANCH:
+       case OMP_CLAUSE_FOR:
+       case OMP_CLAUSE_PARALLEL:
+       case OMP_CLAUSE_SECTIONS:
+       case OMP_CLAUSE_TASKGROUP:
+       case OMP_CLAUSE_NOGROUP:
+       case OMP_CLAUSE_THREADS:
+       case OMP_CLAUSE_SIMD:
+       case OMP_CLAUSE_DEFAULTMAP:
+       case OMP_CLAUSE_ORDER:
+       case OMP_CLAUSE_BIND:
+       case OMP_CLAUSE_AUTO:
+       case OMP_CLAUSE_SEQ:
+       case OMP_CLAUSE_NOHOST:
+       case OMP_CLAUSE_TILE:
+       case OMP_CLAUSE__SIMT_:
+       case OMP_CLAUSE_IF_PRESENT:
+       case OMP_CLAUSE_FINALIZE:
+         WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp));
+
+       case OMP_CLAUSE_LASTPRIVATE:
+         WALK_SUBTREE (OMP_CLAUSE_DECL (*tp));
+         WALK_SUBTREE (OMP_CLAUSE_LASTPRIVATE_STMT (*tp));
+         WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp));
+
+       case OMP_CLAUSE_COLLAPSE:
+         {
+           int i;
+           for (i = 0; i < 3; i++)
+             WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, i));
+           WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp));
+         }
+
+       case OMP_CLAUSE_LINEAR:
+         WALK_SUBTREE (OMP_CLAUSE_DECL (*tp));
+         WALK_SUBTREE (OMP_CLAUSE_LINEAR_STEP (*tp));
+         WALK_SUBTREE (OMP_CLAUSE_LINEAR_STMT (*tp));
+         WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp));
+
+       case OMP_CLAUSE_ALIGNED:
+       case OMP_CLAUSE_ALLOCATE:
+       case OMP_CLAUSE_FROM:
+       case OMP_CLAUSE_TO:
+       case OMP_CLAUSE_MAP:
+       case OMP_CLAUSE__CACHE_:
+         WALK_SUBTREE (OMP_CLAUSE_DECL (*tp));
+         WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 1));
+         WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp));
+
+       case OMP_CLAUSE_REDUCTION:
+         {
+           for (int i = 0; i < 6; i++)
+             WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, i));
+           WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp));
+         }
+
+       case OMP_CLAUSE_TASK_REDUCTION:
+       case OMP_CLAUSE_IN_REDUCTION:
+         {
+           for (int i = 0; i < 5; i++)
+             WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, i));
+           WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp));
+         }
+
+       default:
+         gcc_unreachable ();
+       }
       break;

     case TARGET_EXPR:
diff --git a/gcc/tree.h b/gcc/tree.h
index 15e5147f40b0..5ee1c33f4e15 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -1746,6 +1746,8 @@  class auto_suppress_location_wrappers
 #define OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER(NODE) \
   OMP_CLAUSE_OPERAND (OMP_CLAUSE_RANGE_CHECK (NODE, OMP_CLAUSE_REDUCTION, \
                                              OMP_CLAUSE_IN_REDUCTION), 4)
+#define OMP_CLAUSE_REDUCTION_PRIVATE_DECL(NODE) \
+  OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_REDUCTION), 5)

 /* True if a REDUCTION clause may reference the original list item (omp_orig)
    in its OMP_CLAUSE_REDUCTION_{,GIMPLE_}INIT.  */