@@ -240,6 +240,11 @@ struct gimplify_omp_ctx
int defaultmap[5];
};
+struct privatize_reduction
+{
+ tree ref_var, local_var;
+};
+
static struct gimplify_ctx *gimplify_ctxp;
static struct gimplify_omp_ctx *gimplify_omp_ctxp;
static bool in_omp_construct;
@@ -11900,6 +11905,80 @@ gimplify_omp_taskloop_expr (tree type, tree *tp, gimple_seq *pre_p,
OMP_FOR_CLAUSES (orig_for_stmt) = c;
}
+/* Helper function for localize_reductions. Replace all uses of REF_VAR with
+ LOCAL_VAR. */
+
+static tree
+localize_reductions_r (tree *tp, int *walk_subtrees, void *data)
+{
+ enum tree_code tc = TREE_CODE (*tp);
+ struct privatize_reduction *pr = (struct privatize_reduction *) data;
+
+ if (TYPE_P (*tp))
+ *walk_subtrees = 0;
+
+ switch (tc)
+ {
+ case INDIRECT_REF:
+ case MEM_REF:
+ if (TREE_OPERAND (*tp, 0) == pr->ref_var)
+ *tp = pr->local_var;
+
+ *walk_subtrees = 0;
+ break;
+
+ case VAR_DECL:
+ case PARM_DECL:
+ case RESULT_DECL:
+ if (*tp == pr->ref_var)
+ *tp = pr->local_var;
+
+ *walk_subtrees = 0;
+ break;
+
+ default:
+ break;
+ }
+
+ return NULL_TREE;
+}
+
+/* OpenACC worker and vector loop state propagation requires reductions
+ to be inside local variables. This function replaces all reference-type
+ reductions variables associated with the loop with a local copy. It is
+ also used to create private copies of reduction variables for those
+ which are not associated with acc loops. */
+
+static void
+localize_reductions (tree clauses, tree body)
+{
+ tree c, var, type, new_var;
+ struct privatize_reduction pr;
+
+ for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
+ {
+ var = OMP_CLAUSE_DECL (c);
+
+ if (!lang_hooks.decls.omp_privatize_by_reference (var))
+ {
+ OMP_CLAUSE_REDUCTION_PRIVATE_DECL (c) = NULL;
+ continue;
+ }
+
+ type = TREE_TYPE (TREE_TYPE (var));
+ new_var = create_tmp_var (type, IDENTIFIER_POINTER (DECL_NAME (var)));
+
+ pr.ref_var = var;
+ pr.local_var = new_var;
+
+ walk_tree (&body, localize_reductions_r, &pr, NULL);
+
+ OMP_CLAUSE_REDUCTION_PRIVATE_DECL (c) = new_var;
+ }
+}
+
+
/* Gimplify the gross structure of an OMP_FOR statement. */
static enum gimplify_status
@@ -12126,6 +12205,23 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p)
gcc_unreachable ();
}
+ if (ort == ORT_ACC)
+ {
+ gimplify_omp_ctx *outer = gimplify_omp_ctxp;
+
+ while (outer
+ && outer->region_type != ORT_ACC_PARALLEL
+ && outer->region_type != ORT_ACC_KERNELS)
+ outer = outer->outer_context;
+
+ /* FIXME: Reductions only work in parallel regions at present. We avoid
+ doing the reduction localization transformation in kernels regions
+ here, because the code to remove reductions in kernels regions cannot
+ handle that. */
+ if (outer && outer->region_type == ORT_ACC_PARALLEL)
+ localize_reductions (OMP_FOR_CLAUSES (*expr_p), OMP_FOR_BODY (*expr_p));
+ }
+
/* Set OMP_CLAUSE_LINEAR_NO_COPYIN flag on explicit linear
clause for the IV. */
if (ort == ORT_SIMD && TREE_VEC_LENGTH (OMP_FOR_INIT (for_stmt)) == 1)
@@ -13654,6 +13750,12 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p)
|| (ort & ORT_HOST_TEAMS) == ORT_HOST_TEAMS)
{
push_gimplify_context ();
+
+ /* FIXME: Reductions are not supported in kernels regions yet. */
+ if (/*ort == ORT_ACC_KERNELS ||*/ ort == ORT_ACC_PARALLEL)
+ localize_reductions (OMP_TARGET_CLAUSES (*expr_p),
+ OMP_TARGET_BODY (*expr_p));
+
gimple *g = gimplify_and_return_first (OMP_BODY (expr), &body);
if (gimple_code (g) == GIMPLE_BIND)
pop_gimplify_context (g);
@@ -7530,9 +7530,9 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
|| is_oacc_kernels_decomposed_graphite_part (ctx));
tree orig = OMP_CLAUSE_DECL (c);
- tree var = maybe_lookup_decl (orig, ctx);
+ tree var;
tree ref_to_res = NULL_TREE;
- tree incoming, outgoing, v1, v2, v3;
+ tree incoming, outgoing;
bool is_private = false;
enum tree_code rcode = OMP_CLAUSE_REDUCTION_CODE (c);
@@ -7544,6 +7544,9 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
rcode = BIT_IOR_EXPR;
tree op = build_int_cst (unsigned_type_node, rcode);
+ var = OMP_CLAUSE_REDUCTION_PRIVATE_DECL (c);
+ if (!var)
+ var = maybe_lookup_decl (orig, ctx);
if (!var)
var = orig;
@@ -7636,34 +7639,11 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
if (omp_privatize_by_reference (orig))
{
- tree type = TREE_TYPE (var);
- const char *id = IDENTIFIER_POINTER (DECL_NAME (var));
-
- if (!inner)
- {
- tree x = create_tmp_var (TREE_TYPE (type), id);
- gimplify_assign (var, build_fold_addr_expr (x), fork_seq);
- }
-
- v1 = create_tmp_var (type, id);
- v2 = create_tmp_var (type, id);
- v3 = create_tmp_var (type, id);
-
- gimplify_assign (v1, var, fork_seq);
- gimplify_assign (v2, var, fork_seq);
- gimplify_assign (v3, var, fork_seq);
-
- var = build_simple_mem_ref (var);
- v1 = build_simple_mem_ref (v1);
- v2 = build_simple_mem_ref (v2);
- v3 = build_simple_mem_ref (v3);
outgoing = build_simple_mem_ref (outgoing);
if (!TREE_CONSTANT (incoming))
incoming = build_simple_mem_ref (incoming);
}
- else
- v1 = v2 = v3 = var;
/* Determine position in reduction buffer, which may be used
by target. The parser has ensured that this is not a
@@ -7696,20 +7676,21 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
= build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
TREE_TYPE (var), 6, init_code,
unshare_expr (ref_to_res),
- v1, level, op, off);
+ var, level, op, off);
tree fini_call
= build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
TREE_TYPE (var), 6, fini_code,
unshare_expr (ref_to_res),
- v2, level, op, off);
+ var, level, op, off);
tree teardown_call
= build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
- TREE_TYPE (var), 6, teardown_code,
- ref_to_res, v3, level, op, off);
+ TREE_TYPE (var), 6,
+ teardown_code, ref_to_res, var,
+ level, op, off);
- gimplify_assign (v1, setup_call, &before_fork);
- gimplify_assign (v2, init_call, &after_fork);
- gimplify_assign (v3, fini_call, &before_join);
+ gimplify_assign (var, setup_call, &before_fork);
+ gimplify_assign (var, init_call, &after_fork);
+ gimplify_assign (var, fini_call, &before_join);
gimplify_assign (outgoing, teardown_call, &after_join);
}
@@ -269,7 +269,9 @@ enum omp_clause_code {
placeholder used in OMP_CLAUSE_REDUCTION_{INIT,MERGE}.
Operand 4: OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER: Another dummy
VAR_DECL placeholder, used like the above for C/C++ array
- reductions. */
+ reductions.
+ Operand 5: OMP_CLAUSE_REDUCTION_PRIVATE_DECL: A private VAR_DECL of
+ the original DECL associated with the reduction clause. */
OMP_CLAUSE_REDUCTION,
/* OpenMP clause: task_reduction (operator:variable_list). */
@@ -283,7 +283,7 @@ unsigned const char omp_clause_num_ops[] =
1, /* OMP_CLAUSE_SHARED */
1, /* OMP_CLAUSE_FIRSTPRIVATE */
2, /* OMP_CLAUSE_LASTPRIVATE */
- 5, /* OMP_CLAUSE_REDUCTION */
+ 6, /* OMP_CLAUSE_REDUCTION */
5, /* OMP_CLAUSE_TASK_REDUCTION */
5, /* OMP_CLAUSE_IN_REDUCTION */
1, /* OMP_CLAUSE_COPYIN */
@@ -11134,12 +11134,135 @@ walk_tree_1 (tree *tp, walk_tree_fn func, void *data,
break;
case OMP_CLAUSE:
- {
- int len = omp_clause_num_ops[OMP_CLAUSE_CODE (*tp)];
- for (int i = 0; i < len; i++)
- WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, i));
- WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp));
- }
+ switch (OMP_CLAUSE_CODE (*tp))
+ {
+ case OMP_CLAUSE_GANG:
+ WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 1));
+ /* FALLTHRU */
+
+ case OMP_CLAUSE_ASYNC:
+ case OMP_CLAUSE_WAIT:
+ case OMP_CLAUSE_WORKER:
+ case OMP_CLAUSE_VECTOR:
+ case OMP_CLAUSE_NUM_GANGS:
+ case OMP_CLAUSE_NUM_WORKERS:
+ case OMP_CLAUSE_VECTOR_LENGTH:
+ case OMP_CLAUSE_PRIVATE:
+ case OMP_CLAUSE_SHARED:
+ case OMP_CLAUSE_FIRSTPRIVATE:
+ case OMP_CLAUSE_COPYIN:
+ case OMP_CLAUSE_COPYPRIVATE:
+ case OMP_CLAUSE_FILTER:
+ case OMP_CLAUSE_FINAL:
+ case OMP_CLAUSE_IF:
+ case OMP_CLAUSE_NUM_THREADS:
+ case OMP_CLAUSE_SCHEDULE:
+ case OMP_CLAUSE_UNIFORM:
+ case OMP_CLAUSE_DEPEND:
+ case OMP_CLAUSE_NONTEMPORAL:
+ case OMP_CLAUSE_NUM_TEAMS:
+ case OMP_CLAUSE_THREAD_LIMIT:
+ case OMP_CLAUSE_DEVICE:
+ case OMP_CLAUSE_DIST_SCHEDULE:
+ case OMP_CLAUSE_SAFELEN:
+ case OMP_CLAUSE_SIMDLEN:
+ case OMP_CLAUSE_ORDERED:
+ case OMP_CLAUSE_PRIORITY:
+ case OMP_CLAUSE_GRAINSIZE:
+ case OMP_CLAUSE_NUM_TASKS:
+ case OMP_CLAUSE_HINT:
+ case OMP_CLAUSE_TO_DECLARE:
+ case OMP_CLAUSE_LINK:
+ case OMP_CLAUSE_DETACH:
+ case OMP_CLAUSE_USE_DEVICE_PTR:
+ case OMP_CLAUSE_USE_DEVICE_ADDR:
+ case OMP_CLAUSE_IS_DEVICE_PTR:
+ case OMP_CLAUSE_INCLUSIVE:
+ case OMP_CLAUSE_EXCLUSIVE:
+ case OMP_CLAUSE__LOOPTEMP_:
+ case OMP_CLAUSE__REDUCTEMP_:
+ case OMP_CLAUSE__CONDTEMP_:
+ case OMP_CLAUSE__SCANTEMP_:
+ case OMP_CLAUSE__SIMDUID_:
+ case OMP_CLAUSE_AFFINITY:
+ WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 0));
+ /* FALLTHRU */
+
+ case OMP_CLAUSE_INDEPENDENT:
+ case OMP_CLAUSE_NOWAIT:
+ case OMP_CLAUSE_DEFAULT:
+ case OMP_CLAUSE_UNTIED:
+ case OMP_CLAUSE_MERGEABLE:
+ case OMP_CLAUSE_PROC_BIND:
+ case OMP_CLAUSE_DEVICE_TYPE:
+ case OMP_CLAUSE_INBRANCH:
+ case OMP_CLAUSE_NOTINBRANCH:
+ case OMP_CLAUSE_FOR:
+ case OMP_CLAUSE_PARALLEL:
+ case OMP_CLAUSE_SECTIONS:
+ case OMP_CLAUSE_TASKGROUP:
+ case OMP_CLAUSE_NOGROUP:
+ case OMP_CLAUSE_THREADS:
+ case OMP_CLAUSE_SIMD:
+ case OMP_CLAUSE_DEFAULTMAP:
+ case OMP_CLAUSE_ORDER:
+ case OMP_CLAUSE_BIND:
+ case OMP_CLAUSE_AUTO:
+ case OMP_CLAUSE_SEQ:
+ case OMP_CLAUSE_NOHOST:
+ case OMP_CLAUSE_TILE:
+ case OMP_CLAUSE__SIMT_:
+ case OMP_CLAUSE_IF_PRESENT:
+ case OMP_CLAUSE_FINALIZE:
+ WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp));
+
+ case OMP_CLAUSE_LASTPRIVATE:
+ WALK_SUBTREE (OMP_CLAUSE_DECL (*tp));
+ WALK_SUBTREE (OMP_CLAUSE_LASTPRIVATE_STMT (*tp));
+ WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp));
+
+ case OMP_CLAUSE_COLLAPSE:
+ {
+ int i;
+ for (i = 0; i < 3; i++)
+ WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, i));
+ WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp));
+ }
+
+ case OMP_CLAUSE_LINEAR:
+ WALK_SUBTREE (OMP_CLAUSE_DECL (*tp));
+ WALK_SUBTREE (OMP_CLAUSE_LINEAR_STEP (*tp));
+ WALK_SUBTREE (OMP_CLAUSE_LINEAR_STMT (*tp));
+ WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp));
+
+ case OMP_CLAUSE_ALIGNED:
+ case OMP_CLAUSE_ALLOCATE:
+ case OMP_CLAUSE_FROM:
+ case OMP_CLAUSE_TO:
+ case OMP_CLAUSE_MAP:
+ case OMP_CLAUSE__CACHE_:
+ WALK_SUBTREE (OMP_CLAUSE_DECL (*tp));
+ WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 1));
+ WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp));
+
+ case OMP_CLAUSE_REDUCTION:
+ {
+ for (int i = 0; i < 6; i++)
+ WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, i));
+ WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp));
+ }
+
+ case OMP_CLAUSE_TASK_REDUCTION:
+ case OMP_CLAUSE_IN_REDUCTION:
+ {
+ for (int i = 0; i < 5; i++)
+ WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, i));
+ WALK_SUBTREE_TAIL (OMP_CLAUSE_CHAIN (*tp));
+ }
+
+ default:
+ gcc_unreachable ();
+ }
break;
case TARGET_EXPR:
@@ -1746,6 +1746,8 @@ class auto_suppress_location_wrappers
#define OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER(NODE) \
OMP_CLAUSE_OPERAND (OMP_CLAUSE_RANGE_CHECK (NODE, OMP_CLAUSE_REDUCTION, \
OMP_CLAUSE_IN_REDUCTION), 4)
+#define OMP_CLAUSE_REDUCTION_PRIVATE_DECL(NODE) \
+ OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_REDUCTION), 5)
/* True if a REDUCTION clause may reference the original list item (omp_orig)
in its OMP_CLAUSE_REDUCTION_{,GIMPLE_}INIT. */