[committed] openmp: Handle unconstrained and reproducible modifiers on order(concurrent)

Message ID 20210918080203.GX304296@tucnak
State Committed
Delegated to: Jonathan Wakely
Headers
Series [committed] openmp: Handle unconstrained and reproducible modifiers on order(concurrent) |

Commit Message

Jakub Jelinek Sept. 18, 2021, 8:02 a.m. UTC
  Hi!

This patch adds handling for unconstrained and reproducible modifiers on
order(concurrent) clause.  For all static schedules (including auto and
no schedule or dist_schedule clauses) I believe what we implement is
reproducible, so the patch doesn't do much beyond recognizing those.
Note, there is an OpenMP/spec issue that needs resolution on what
should happen with the dynamic schedules (whether it should be an error
to mix such clauses, or silently make it non-reproducible, and in which
exact cases), so it might need some follow-up.

Besides that, this patch allows order(concurrent) clause on the distribute
construct which is something also added in OpenMP 5.1, and finally
check the newly added restriction that at most one order clause
can appear on a construct.

The allowing of order clause on distribute has a side-effect that
order(concurrent) copyin(thrpriv) is no longer allowed on combined/composite
constructs with distribute parallel for{, simd} in it, previously the
order applied only to for/simd and so a threadprivate var could be seen
in the construct, but now it also applies to distribute and so on the parallel
we shouldn't refer to a threadprivate var.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2021-09-18  Jakub Jelinek  <jakub@redhat.com>

gcc/
	* tree.h (OMP_CLAUSE_ORDER_UNCONSTRAINED): Define.
	* tree-pretty-print.c (dump_omp_clause): Print unconstrained:
	for OMP_CLAUSE_ORDER_UNCONSTRAINED.
gcc/c-family/
	* c-omp.c (c_omp_split_clauses): Split order clause also to
	distribute construct.  Copy over OMP_CLAUSE_ORDER_UNCONSTRAINED.
gcc/c/
	* c-parser.c (c_parser_omp_clause_order): Parse unconstrained
	and reproducible modifiers.
	(OMP_DISTRIBUTE_CLAUSE_MASK): Add order clause.
gcc/cp/
	* parser.c (cp_parser_omp_clause_order): Parse unconstrained
	and reproducible modifiers.
	(OMP_DISTRIBUTE_CLAUSE_MASK): Add order clause.
gcc/testsuite/
	* c-c++-common/gomp/order-1.c (f2): Add tests for distribute
	with order clause.
	(f3): Remove.
	* c-c++-common/gomp/order-2.c: Don't expect error for distribute
	with order clause.
	* c-c++-common/gomp/order-5.c: New test.
	* c-c++-common/gomp/order-6.c: New test.
	* c-c++-common/gomp/clause-dups-1.c (f1): Add tests for
	duplicated order clause.
	(f9): New function.
	* c-c++-common/gomp/clauses-1.c (baz, bar): Don't mix copyin and
	order(concurrent) clauses on the same composite construct combined
	with distribute, instead split it into two tests, one without
	copyin and one without order(concurrent).  Add order(concurrent)
	clauses to {,{,target} teams} distribute.
	* g++.dg/gomp/attrs-1.C (baz, bar): Likewise.
	* g++.dg/gomp/attrs-2.C (baz, bar): Likewise.


	Jakub
  

Patch

--- gcc/tree.h.jj	2021-09-16 10:51:02.295976216 +0200
+++ gcc/tree.h	2021-09-17 18:21:01.473512901 +0200
@@ -1715,6 +1715,10 @@  class auto_suppress_location_wrappers
 #define OMP_CLAUSE_ORDERED_EXPR(NODE) \
   OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_ORDERED), 0)
 
+/* True for unconstrained modifier on order(concurrent) clause.  */
+#define OMP_CLAUSE_ORDER_UNCONSTRAINED(NODE) \
+  (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_ORDER)->base.public_flag)
+
 #define OMP_CLAUSE_REDUCTION_CODE(NODE)	\
   (OMP_CLAUSE_RANGE_CHECK (NODE, OMP_CLAUSE_REDUCTION, \
      OMP_CLAUSE_IN_REDUCTION)->omp_clause.subcode.reduction_code)
--- gcc/tree-pretty-print.c.jj	2021-09-11 09:33:37.928331352 +0200
+++ gcc/tree-pretty-print.c	2021-09-17 19:20:35.470795696 +0200
@@ -1149,7 +1149,10 @@  dump_omp_clause (pretty_printer *pp, tre
       break;
 
     case OMP_CLAUSE_ORDER:
-      pp_string (pp, "order(concurrent)");
+      pp_string (pp, "order(");
+      if (OMP_CLAUSE_ORDER_UNCONSTRAINED (clause))
+	pp_string (pp, "unconstrained:");
+      pp_string (pp, "concurrent)");
       break;
 
     case OMP_CLAUSE_BIND:
--- gcc/c-family/c-omp.c.jj	2021-09-17 11:28:07.599834172 +0200
+++ gcc/c-family/c-omp.c	2021-09-17 19:19:52.628391341 +0200
@@ -2114,14 +2114,31 @@  c_omp_split_clauses (location_t loc, enu
 	    }
 	  s = C_OMP_CLAUSE_SPLIT_PARALLEL;
 	  break;
-	/* order clauses are allowed on for, simd and loop.  */
+	/* order clauses are allowed on distribute, for, simd and loop.  */
 	case OMP_CLAUSE_ORDER:
+	  if ((mask & (OMP_CLAUSE_MASK_1
+		       << PRAGMA_OMP_CLAUSE_DIST_SCHEDULE)) != 0)
+	    {
+	      if (code == OMP_DISTRIBUTE)
+		{
+		  s = C_OMP_CLAUSE_SPLIT_DISTRIBUTE;
+		  break;
+		}
+	      c = build_omp_clause (OMP_CLAUSE_LOCATION (clauses),
+				    OMP_CLAUSE_ORDER);
+	      OMP_CLAUSE_ORDER_UNCONSTRAINED (c)
+		= OMP_CLAUSE_ORDER_UNCONSTRAINED (clauses);
+	      OMP_CLAUSE_CHAIN (c) = cclauses[C_OMP_CLAUSE_SPLIT_DISTRIBUTE];
+	      cclauses[C_OMP_CLAUSE_SPLIT_DISTRIBUTE] = c;
+	    }
 	  if ((mask & (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_SCHEDULE)) != 0)
 	    {
 	      if (code == OMP_SIMD)
 		{
 		  c = build_omp_clause (OMP_CLAUSE_LOCATION (clauses),
 					OMP_CLAUSE_ORDER);
+		  OMP_CLAUSE_ORDER_UNCONSTRAINED (c)
+		    = OMP_CLAUSE_ORDER_UNCONSTRAINED (clauses);
 		  OMP_CLAUSE_CHAIN (c) = cclauses[C_OMP_CLAUSE_SPLIT_FOR];
 		  cclauses[C_OMP_CLAUSE_SPLIT_FOR] = c;
 		  s = C_OMP_CLAUSE_SPLIT_SIMD;
--- gcc/c/c-parser.c.jj	2021-09-17 16:45:26.390466583 +0200
+++ gcc/c/c-parser.c	2021-09-17 19:02:27.915917551 +0200
@@ -14591,7 +14591,14 @@  c_parser_oacc_clause_wait (c_parser *par
 
 
 /* OpenMP 5.0:
-   order ( concurrent ) */
+   order ( concurrent )
+
+   OpenMP 5.1:
+   order ( order-modifier : concurrent )
+
+   order-modifier:
+     reproducible
+     unconstrained  */
 
 static tree
 c_parser_omp_clause_order (c_parser *parser, tree list)
@@ -14599,10 +14606,26 @@  c_parser_omp_clause_order (c_parser *par
   location_t loc = c_parser_peek_token (parser)->location;
   tree c;
   const char *p;
+  bool unconstrained = false;
 
   matching_parens parens;
   if (!parens.require_open (parser))
     return list;
+  if (c_parser_next_token_is (parser, CPP_NAME)
+      && c_parser_peek_2nd_token (parser)->type == CPP_COLON)
+    {
+      p = IDENTIFIER_POINTER (c_parser_peek_token (parser)->value);
+      if (strcmp (p, "unconstrained") == 0)
+	unconstrained = true;
+      else if (strcmp (p, "reproducible") != 0)
+	{
+	  c_parser_error (parser, "expected %<reproducible%> or "
+				  "%<unconstrained%>");
+	  goto out_err;
+	}
+      c_parser_consume_token (parser);
+      c_parser_consume_token (parser);
+    }
   if (!c_parser_next_token_is (parser, CPP_NAME))
     {
       c_parser_error (parser, "expected %<concurrent%>");
@@ -14616,8 +14639,9 @@  c_parser_omp_clause_order (c_parser *par
     }
   c_parser_consume_token (parser);
   parens.skip_until_found_close (parser);
-  /* check_no_duplicate_clause (list, OMP_CLAUSE_ORDER, "order"); */
+  check_no_duplicate_clause (list, OMP_CLAUSE_ORDER, "order");
   c = build_omp_clause (loc, OMP_CLAUSE_ORDER);
+  OMP_CLAUSE_ORDER_UNCONSTRAINED (c) = unconstrained;
   OMP_CLAUSE_CHAIN (c) = list;
   return c;
 
@@ -20231,7 +20255,8 @@  c_parser_omp_cancellation_point (c_parse
 	| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_LASTPRIVATE)	\
 	| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DIST_SCHEDULE)\
 	| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_ALLOCATE)	\
-	| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_COLLAPSE))
+	| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_COLLAPSE)	\
+	| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_ORDER))
 
 static tree
 c_parser_omp_distribute (location_t loc, c_parser *parser,
--- gcc/cp/parser.c.jj	2021-09-17 16:45:26.415466234 +0200
+++ gcc/cp/parser.c	2021-09-17 19:02:55.504533680 +0200
@@ -37651,18 +37651,42 @@  cp_parser_omp_clause_defaultmap (cp_pars
 }
 
 /* OpenMP 5.0:
-   order ( concurrent ) */
+   order ( concurrent )
+
+   OpenMP 5.1:
+   order ( order-modifier : concurrent )
+
+   order-modifier:
+     reproducible
+     unconstrained  */
 
 static tree
 cp_parser_omp_clause_order (cp_parser *parser, tree list, location_t location)
 {
   tree c, id;
   const char *p;
+  bool unconstrained = false;
 
   matching_parens parens;
   if (!parens.require_open (parser))
     return list;
 
+  if (cp_lexer_next_token_is (parser->lexer, CPP_NAME)
+      && cp_lexer_nth_token_is (parser->lexer, 2, CPP_COLON))
+    {
+      id = cp_lexer_peek_token (parser->lexer)->u.value;
+      p = IDENTIFIER_POINTER (id);
+      if (strcmp (p, "unconstrained") == 0)
+	unconstrained = true;
+      else if (strcmp (p, "reproducible") != 0)
+	{
+	  cp_parser_error (parser, "expected %<reproducible%> or "
+				   "%<unconstrained%>");
+	  goto out_err;
+	}
+      cp_lexer_consume_token (parser->lexer);
+      cp_lexer_consume_token (parser->lexer);
+    }
   if (!cp_lexer_next_token_is (parser->lexer, CPP_NAME))
     {
       cp_parser_error (parser, "expected %<concurrent%>");
@@ -37682,8 +37706,9 @@  cp_parser_omp_clause_order (cp_parser *p
   if (!parens.require_close (parser))
     goto out_err;
 
-  /* check_no_duplicate_clause (list, OMP_CLAUSE_ORDER, "order", location); */
+  check_no_duplicate_clause (list, OMP_CLAUSE_ORDER, "order", location);
   c = build_omp_clause (location, OMP_CLAUSE_ORDER);
+  OMP_CLAUSE_ORDER_UNCONSTRAINED (c) = unconstrained;
   OMP_CLAUSE_CHAIN (c) = list;
   return c;
 
@@ -43294,7 +43319,8 @@  cp_parser_omp_cancellation_point (cp_par
 	| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_LASTPRIVATE)	\
 	| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DIST_SCHEDULE)\
 	| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_ALLOCATE)	\
-	| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_COLLAPSE))
+	| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_COLLAPSE)	\
+	| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_ORDER))
 
 static tree
 cp_parser_omp_distribute (cp_parser *parser, cp_token *pragma_tok,
--- gcc/testsuite/c-c++-common/gomp/order-1.c.jj	2020-01-12 11:54:37.014404341 +0100
+++ gcc/testsuite/c-c++-common/gomp/order-1.c	2021-09-17 19:07:19.490862355 +0200
@@ -29,6 +29,9 @@  f2 (int *a)
   #pragma omp teams distribute parallel for simd order(concurrent)
   for (i = 0; i < 128; i++)
     a[i]++;
+  #pragma omp teams distribute order(concurrent)
+  for (i = 0; i < 128; i++)
+    a[i]++;
   #pragma omp teams
   {
     #pragma omp distribute parallel for order(concurrent)
@@ -37,17 +40,11 @@  f2 (int *a)
     #pragma omp distribute parallel for simd order(concurrent)
     for (i = 0; i < 128; i++)
       a[i]++;
+    #pragma omp distribute order(concurrent)
+    for (i = 0; i < 128; i++)
+      a[i]++;
   }
   #pragma omp taskloop simd order (concurrent)
   for (i = 0; i < 128; i++)
     a[i]++;
 }
-
-void
-f3 (int *a)
-{
-  int i;
-  #pragma omp for order(concurrent) order(concurrent) order(concurrent)
-  for (i = 0; i < 128; i++)
-    a[i]++;
-}
--- gcc/testsuite/c-c++-common/gomp/order-2.c.jj	2020-01-12 11:54:37.014404341 +0100
+++ gcc/testsuite/c-c++-common/gomp/order-2.c	2021-09-17 19:08:10.919147337 +0200
@@ -24,7 +24,7 @@  f2 (int *a)
 {
   int i;
   #pragma omp teams
-  #pragma omp distribute order(concurrent)	/* { dg-error "'order' is not valid for '#pragma omp distribute'" } */
+  #pragma omp distribute order(concurrent)
   for (i = 0; i < 128; i++)
     a[i]++;
   #pragma omp taskloop order (concurrent)	/* { dg-error "'order' is not valid for '#pragma omp taskloop'" } */
--- gcc/testsuite/c-c++-common/gomp/order-5.c.jj	2021-09-17 19:09:35.237975035 +0200
+++ gcc/testsuite/c-c++-common/gomp/order-5.c	2021-09-17 19:11:00.149794490 +0200
@@ -0,0 +1,101 @@ 
+void
+f1 (int *a)
+{
+  int i;
+  #pragma omp for order(reproducible:concurrent)
+  for (i = 0; i < 128; i++)
+    a[i]++;
+  #pragma omp simd order ( reproducible : concurrent )
+  for (i = 0; i < 128; i++)
+    a[i]++;
+  #pragma omp for simd order(reproducible :concurrent)
+  for (i = 0; i < 128; i++)
+    a[i]++;
+}
+
+void
+f2 (int *a)
+{
+  int i;
+  #pragma omp parallel for order(reproducible: concurrent)
+  for (i = 0; i < 128; i++)
+    a[i]++;
+  #pragma omp parallel for simd order (reproducible:concurrent)
+  for (i = 0; i < 128; i++)
+    a[i]++;
+  #pragma omp teams distribute parallel for order(reproducible:concurrent)
+  for (i = 0; i < 128; i++)
+    a[i]++;
+  #pragma omp teams distribute parallel for simd order(reproducible:concurrent)
+  for (i = 0; i < 128; i++)
+    a[i]++;
+  #pragma omp teams distribute order(reproducible:concurrent)
+  for (i = 0; i < 128; i++)
+    a[i]++;
+  #pragma omp teams
+  {
+    #pragma omp distribute parallel for order(reproducible:concurrent)
+    for (i = 0; i < 128; i++)
+      a[i]++;
+    #pragma omp distribute parallel for simd order(reproducible:concurrent)
+    for (i = 0; i < 128; i++)
+      a[i]++;
+    #pragma omp distribute order(reproducible:concurrent)
+    for (i = 0; i < 128; i++)
+      a[i]++;
+  }
+  #pragma omp taskloop simd order (reproducible:concurrent)
+  for (i = 0; i < 128; i++)
+    a[i]++;
+}
+
+void
+f3 (int *a)
+{
+  int i;
+  #pragma omp for order(unconstrained:concurrent)
+  for (i = 0; i < 128; i++)
+    a[i]++;
+  #pragma omp simd order ( unconstrained : concurrent )
+  for (i = 0; i < 128; i++)
+    a[i]++;
+  #pragma omp for simd order(unconstrained :concurrent)
+  for (i = 0; i < 128; i++)
+    a[i]++;
+}
+
+void
+f4 (int *a)
+{
+  int i;
+  #pragma omp parallel for order(unconstrained: concurrent)
+  for (i = 0; i < 128; i++)
+    a[i]++;
+  #pragma omp parallel for simd order (unconstrained:concurrent)
+  for (i = 0; i < 128; i++)
+    a[i]++;
+  #pragma omp teams distribute parallel for order(unconstrained:concurrent)
+  for (i = 0; i < 128; i++)
+    a[i]++;
+  #pragma omp teams distribute parallel for simd order(unconstrained:concurrent)
+  for (i = 0; i < 128; i++)
+    a[i]++;
+  #pragma omp teams distribute order(unconstrained:concurrent)
+  for (i = 0; i < 128; i++)
+    a[i]++;
+  #pragma omp teams
+  {
+    #pragma omp distribute parallel for order(unconstrained:concurrent)
+    for (i = 0; i < 128; i++)
+      a[i]++;
+    #pragma omp distribute parallel for simd order(unconstrained:concurrent)
+    for (i = 0; i < 128; i++)
+      a[i]++;
+    #pragma omp distribute order(unconstrained:concurrent)
+    for (i = 0; i < 128; i++)
+      a[i]++;
+  }
+  #pragma omp taskloop simd order (unconstrained:concurrent)
+  for (i = 0; i < 128; i++)
+    a[i]++;
+}
--- gcc/testsuite/c-c++-common/gomp/order-6.c.jj	2021-09-17 19:11:15.710578145 +0200
+++ gcc/testsuite/c-c++-common/gomp/order-6.c	2021-09-17 19:13:15.207916750 +0200
@@ -0,0 +1,412 @@ 
+void foo (void);
+int v;
+#ifdef __cplusplus
+extern "C" {
+#endif
+int omp_get_thread_num (void);
+int omp_get_num_threads (void);
+int omp_target_is_present (const void *, int);
+int omp_get_cancellation (void);
+#ifdef __cplusplus
+}
+#endif
+
+void
+f1 (int *a)
+{
+  int i;
+  #pragma omp simd order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp parallel		/* { dg-error "OpenMP constructs other than 'ordered simd', 'simd', 'loop' or 'atomic' may not be nested inside 'simd' region" } */
+      foo ();
+    }
+  #pragma omp simd order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      int j;
+      #pragma omp simd
+      for (j = 0; j < 64; j++)
+	a[64 * i + j] = i + j;
+    }
+  #pragma omp simd order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp critical		/* { dg-error "OpenMP constructs other than 'ordered simd', 'simd', 'loop' or 'atomic' may not be nested inside 'simd' region" } */
+      foo ();
+    }
+  #pragma omp simd order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp ordered simd		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" } */
+      foo ();
+    }
+  #pragma omp simd order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp atomic		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" } */
+      v++;
+    }
+  #pragma omp simd order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp atomic read		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" "" { target c++ } } */
+      a[i] = v;				/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" "" { target c } } */
+    }
+  #pragma omp simd order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp atomic write		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" "" { target c++ } } */
+      v = a[i];				/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" "" { target c } } */
+    }
+  #pragma omp simd order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    a[i] += omp_get_thread_num ();	/* { dg-error "OpenMP runtime API call '\[^\n\r]*omp_get_thread_num\[^\n\r]*' in a region with 'order\\(concurrent\\)' clause" } */
+  #pragma omp simd order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    a[i] += omp_get_num_threads ();	/* { dg-error "OpenMP runtime API call '\[^\n\r]*omp_get_num_threads\[^\n\r]*' in a region with 'order\\(concurrent\\)' clause" } */
+  #pragma omp simd order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    a[i] += omp_target_is_present (a + i, 0);	/* { dg-error "OpenMP runtime API call '\[^\n\r]*omp_target_is_present\[^\n\r]*' in a region with 'order\\(concurrent\\)' clause" } */
+  #pragma omp simd order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    a[i] += omp_get_cancellation ();	/* { dg-error "OpenMP runtime API call '\[^\n\r]*omp_get_cancellation\[^\n\r]*' in a region with 'order\\(concurrent\\)' clause" } */
+}
+
+void
+f2 (int *a)
+{
+  int i;
+  #pragma omp for simd order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp parallel		/* { dg-error "OpenMP constructs other than 'ordered simd', 'simd', 'loop' or 'atomic' may not be nested inside 'simd' region" } */
+      foo ();
+    }
+  #pragma omp for simd order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      int j;
+      #pragma omp simd
+      for (j = 0; j < 64; j++)
+	a[64 * i + j] = i + j;
+    }
+  #pragma omp for simd order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp critical		/* { dg-error "OpenMP constructs other than 'ordered simd', 'simd', 'loop' or 'atomic' may not be nested inside 'simd' region" } */
+      foo ();
+    }
+  #pragma omp for simd order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp ordered simd		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" } */
+      foo ();
+    }
+  #pragma omp for simd order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp atomic		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" } */
+      v++;
+    }
+  #pragma omp for simd order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp atomic read		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" "" { target c++ } } */
+      a[i] = v;				/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" "" { target c } } */
+    }
+  #pragma omp for simd order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp atomic write		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" "" { target c++ } } */
+      v = a[i];				/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" "" { target c } } */
+    }
+  #pragma omp for simd order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    a[i] += omp_get_thread_num ();	/* { dg-error "OpenMP runtime API call '\[^\n\r]*omp_get_thread_num\[^\n\r]*' in a region with 'order\\(concurrent\\)' clause" } */
+  #pragma omp for simd order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    a[i] += omp_get_num_threads ();	/* { dg-error "OpenMP runtime API call '\[^\n\r]*omp_get_num_threads\[^\n\r]*' in a region with 'order\\(concurrent\\)' clause" } */
+  #pragma omp for simd order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    a[i] += omp_target_is_present (a + i, 0);	/* { dg-error "OpenMP runtime API call '\[^\n\r]*omp_target_is_present\[^\n\r]*' in a region with 'order\\(concurrent\\)' clause" } */
+  #pragma omp for simd order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    a[i] += omp_get_cancellation ();	/* { dg-error "OpenMP runtime API call '\[^\n\r]*omp_get_cancellation\[^\n\r]*' in a region with 'order\\(concurrent\\)' clause" } */
+}
+
+void
+f3 (int *a)
+{
+  int i;
+  #pragma omp for order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp parallel
+      foo ();
+    }
+  #pragma omp for order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      int j;
+      #pragma omp simd
+      for (j = 0; j < 64; j++)
+	a[64 * i + j] = i + j;
+    }
+  #pragma omp for order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp critical		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" } */
+      foo ();
+    }
+  #pragma omp for order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp ordered simd		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" } */
+      foo ();
+    }
+  #pragma omp for order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp atomic		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" } */
+      v++;
+    }
+  #pragma omp for order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp atomic read		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" "" { target c++ } } */
+      a[i] = v;				/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" "" { target c } } */
+    }
+  #pragma omp for order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp atomic write		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" "" { target c++ } } */
+      v = a[i];				/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" "" { target c } } */
+    }
+  #pragma omp for order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp task			/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" } */
+      a[i]++;
+    }
+  #pragma omp for order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      int j;
+      #pragma omp taskloop		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" } */
+      for (j = 0; j < 64; j++)
+	a[64 * i + j] = i + j;
+    }
+  #pragma omp for order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    a[i] += omp_get_thread_num ();	/* { dg-error "OpenMP runtime API call '\[^\n\r]*omp_get_thread_num\[^\n\r]*' in a region with 'order\\(concurrent\\)' clause" } */
+  #pragma omp for order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    a[i] += omp_get_num_threads ();	/* { dg-error "OpenMP runtime API call '\[^\n\r]*omp_get_num_threads\[^\n\r]*' in a region with 'order\\(concurrent\\)' clause" } */
+  #pragma omp for order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    a[i] += omp_target_is_present (a + i, 0);	/* { dg-error "OpenMP runtime API call '\[^\n\r]*omp_target_is_present\[^\n\r]*' in a region with 'order\\(concurrent\\)' clause" } */
+  #pragma omp for order(reproducible:concurrent)
+  for (i = 0; i < 64; i++)
+    a[i] += omp_get_cancellation ();	/* { dg-error "OpenMP runtime API call '\[^\n\r]*omp_get_cancellation\[^\n\r]*' in a region with 'order\\(concurrent\\)' clause" } */
+}
+
+void
+f4 (int *a)
+{
+  int i;
+  #pragma omp simd order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp parallel		/* { dg-error "OpenMP constructs other than 'ordered simd', 'simd', 'loop' or 'atomic' may not be nested inside 'simd' region" } */
+      foo ();
+    }
+  #pragma omp simd order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      int j;
+      #pragma omp simd
+      for (j = 0; j < 64; j++)
+	a[64 * i + j] = i + j;
+    }
+  #pragma omp simd order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp critical		/* { dg-error "OpenMP constructs other than 'ordered simd', 'simd', 'loop' or 'atomic' may not be nested inside 'simd' region" } */
+      foo ();
+    }
+  #pragma omp simd order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp ordered simd		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" } */
+      foo ();
+    }
+  #pragma omp simd order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp atomic		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" } */
+      v++;
+    }
+  #pragma omp simd order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp atomic read		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" "" { target c++ } } */
+      a[i] = v;				/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" "" { target c } } */
+    }
+  #pragma omp simd order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp atomic write		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" "" { target c++ } } */
+      v = a[i];				/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" "" { target c } } */
+    }
+  #pragma omp simd order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    a[i] += omp_get_thread_num ();	/* { dg-error "OpenMP runtime API call '\[^\n\r]*omp_get_thread_num\[^\n\r]*' in a region with 'order\\(concurrent\\)' clause" } */
+  #pragma omp simd order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    a[i] += omp_get_num_threads ();	/* { dg-error "OpenMP runtime API call '\[^\n\r]*omp_get_num_threads\[^\n\r]*' in a region with 'order\\(concurrent\\)' clause" } */
+  #pragma omp simd order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    a[i] += omp_target_is_present (a + i, 0);	/* { dg-error "OpenMP runtime API call '\[^\n\r]*omp_target_is_present\[^\n\r]*' in a region with 'order\\(concurrent\\)' clause" } */
+  #pragma omp simd order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    a[i] += omp_get_cancellation ();	/* { dg-error "OpenMP runtime API call '\[^\n\r]*omp_get_cancellation\[^\n\r]*' in a region with 'order\\(concurrent\\)' clause" } */
+}
+
+void
+f5 (int *a)
+{
+  int i;
+  #pragma omp for simd order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp parallel		/* { dg-error "OpenMP constructs other than 'ordered simd', 'simd', 'loop' or 'atomic' may not be nested inside 'simd' region" } */
+      foo ();
+    }
+  #pragma omp for simd order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      int j;
+      #pragma omp simd
+      for (j = 0; j < 64; j++)
+	a[64 * i + j] = i + j;
+    }
+  #pragma omp for simd order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp critical		/* { dg-error "OpenMP constructs other than 'ordered simd', 'simd', 'loop' or 'atomic' may not be nested inside 'simd' region" } */
+      foo ();
+    }
+  #pragma omp for simd order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp ordered simd		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" } */
+      foo ();
+    }
+  #pragma omp for simd order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp atomic		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" } */
+      v++;
+    }
+  #pragma omp for simd order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp atomic read		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" "" { target c++ } } */
+      a[i] = v;				/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" "" { target c } } */
+    }
+  #pragma omp for simd order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp atomic write		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" "" { target c++ } } */
+      v = a[i];				/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" "" { target c } } */
+    }
+  #pragma omp for simd order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    a[i] += omp_get_thread_num ();	/* { dg-error "OpenMP runtime API call '\[^\n\r]*omp_get_thread_num\[^\n\r]*' in a region with 'order\\(concurrent\\)' clause" } */
+  #pragma omp for simd order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    a[i] += omp_get_num_threads ();	/* { dg-error "OpenMP runtime API call '\[^\n\r]*omp_get_num_threads\[^\n\r]*' in a region with 'order\\(concurrent\\)' clause" } */
+  #pragma omp for simd order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    a[i] += omp_target_is_present (a + i, 0);	/* { dg-error "OpenMP runtime API call '\[^\n\r]*omp_target_is_present\[^\n\r]*' in a region with 'order\\(concurrent\\)' clause" } */
+  #pragma omp for simd order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    a[i] += omp_get_cancellation ();	/* { dg-error "OpenMP runtime API call '\[^\n\r]*omp_get_cancellation\[^\n\r]*' in a region with 'order\\(concurrent\\)' clause" } */
+}
+
+void
+f6 (int *a)
+{
+  int i;
+  #pragma omp for order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp parallel
+      foo ();
+    }
+  #pragma omp for order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      int j;
+      #pragma omp simd
+      for (j = 0; j < 64; j++)
+	a[64 * i + j] = i + j;
+    }
+  #pragma omp for order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp critical		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" } */
+      foo ();
+    }
+  #pragma omp for order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp ordered simd		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" } */
+      foo ();
+    }
+  #pragma omp for order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp atomic		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" } */
+      v++;
+    }
+  #pragma omp for order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp atomic read		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" "" { target c++ } } */
+      a[i] = v;				/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" "" { target c } } */
+    }
+  #pragma omp for order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp atomic write		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" "" { target c++ } } */
+      v = a[i];				/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" "" { target c } } */
+    }
+  #pragma omp for order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      #pragma omp task			/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" } */
+      a[i]++;
+    }
+  #pragma omp for order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    {
+      int j;
+      #pragma omp taskloop		/* { dg-error "OpenMP constructs other than 'parallel', 'loop' or 'simd' may not be nested inside a region with the 'order\\(concurrent\\)' clause" } */
+      for (j = 0; j < 64; j++)
+	a[64 * i + j] = i + j;
+    }
+  #pragma omp for order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    a[i] += omp_get_thread_num ();	/* { dg-error "OpenMP runtime API call '\[^\n\r]*omp_get_thread_num\[^\n\r]*' in a region with 'order\\(concurrent\\)' clause" } */
+  #pragma omp for order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    a[i] += omp_get_num_threads ();	/* { dg-error "OpenMP runtime API call '\[^\n\r]*omp_get_num_threads\[^\n\r]*' in a region with 'order\\(concurrent\\)' clause" } */
+  #pragma omp for order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    a[i] += omp_target_is_present (a + i, 0);	/* { dg-error "OpenMP runtime API call '\[^\n\r]*omp_target_is_present\[^\n\r]*' in a region with 'order\\(concurrent\\)' clause" } */
+  #pragma omp for order(unconstrained:concurrent)
+  for (i = 0; i < 64; i++)
+    a[i] += omp_get_cancellation ();	/* { dg-error "OpenMP runtime API call '\[^\n\r]*omp_get_cancellation\[^\n\r]*' in a region with 'order\\(concurrent\\)' clause" } */
+}
--- gcc/testsuite/c-c++-common/gomp/clause-dups-1.c.jj	2021-08-17 09:29:41.409204885 +0200
+++ gcc/testsuite/c-c++-common/gomp/clause-dups-1.c	2021-09-17 18:53:36.399313115 +0200
@@ -29,6 +29,12 @@  f1 (int *p)
   #pragma omp for nowait nowait					/* { dg-error "too many 'nowait' clauses" } */
   for (i = 0; i < 8; ++i)
     f0 ();
+  #pragma omp for schedule(static) order(concurrent) order(concurrent)	/* { dg-error "too many 'order' clauses" } */
+  for (i = 0; i < 8; ++i)
+    f0 ();
+  #pragma omp for schedule(static) order(reproducible:concurrent) order(unconstrained:concurrent)	/* { dg-error "too many 'order' clauses" } */
+  for (i = 0; i < 8; ++i)
+    f0 ();
   #pragma omp simd collapse(1) collapse(1)			/* { dg-error "too many 'collapse' clauses" } */
   for (i = 0; i < 8; ++i)
     f0 ();
@@ -207,6 +213,18 @@  f1 (int *p)
   f0 ();
   #pragma omp scope nowait nowait				/* { dg-error "too many 'nowait' clauses" } */
   ;
+  #pragma omp loop bind(thread) order(concurrent) order(concurrent)	/* { dg-error "too many 'order' clauses" } */
+  for (i = 0; i < 8; ++i)
+    f0 ();
+  #pragma omp loop bind(thread) order(reproducible:concurrent) order(unconstrained:concurrent)	/* { dg-error "too many 'order' clauses" } */
+  for (i = 0; i < 8; ++i)
+    f0 ();
+  #pragma omp simd order(concurrent) order(concurrent)	/* { dg-error "too many 'order' clauses" } */
+  for (i = 0; i < 8; ++i)
+    f0 ();
+  #pragma omp simd order(reproducible:concurrent) order(unconstrained:concurrent)	/* { dg-error "too many 'order' clauses" } */
+  for (i = 0; i < 8; ++i)
+    f0 ();
 }
 
 #pragma omp declare simd simdlen (4) simdlen (4)		/* { dg-error "too many 'simdlen' clauses" } */
@@ -223,3 +241,17 @@  void f6 (int a, int b);
 void f7 (int a, int b);
 #pragma omp declare simd linear (a) uniform (a)			/* { dg-error "'a' appears more than once in data clauses" } */
 void f8 (int a, int b);
+
+#pragma omp declare target
+void
+f9 (void)
+{
+  int i;
+  #pragma omp distribute dist_schedule(static) order(concurrent) order(concurrent)	/* { dg-error "too many 'order' clauses" } */
+  for (i = 0; i < 8; ++i)
+    f0 ();
+  #pragma omp loop bind(thread) order(reproducible:concurrent) order(unconstrained:concurrent)	/* { dg-error "too many 'order' clauses" } */
+  for (i = 0; i < 8; ++i)
+    f0 ();
+}
+#pragma omp end declare target
--- gcc/testsuite/c-c++-common/gomp/clauses-1.c.jj	2021-08-13 22:49:19.408185386 +0200
+++ gcc/testsuite/c-c++-common/gomp/clauses-1.c	2021-09-18 09:35:15.209447707 +0200
@@ -66,14 +66,27 @@  baz (int d, int m, int i1, int i2, int p
   #pragma omp distribute parallel for \
     private (p) firstprivate (f) collapse(1) dist_schedule(static, 16) \
     if (parallel: i2) default(shared) shared(s) reduction(+:r) num_threads (nth) proc_bind(spread) \
-    lastprivate (l) schedule(static, 4) copyin(t) order(concurrent) allocate (p)
+    lastprivate (l) schedule(static, 4) copyin(t) allocate (p)
+  for (int i = 0; i < 64; i++)
+    ll++;
+  #pragma omp distribute parallel for \
+    private (p) firstprivate (f) collapse(1) dist_schedule(static, 16) \
+    if (parallel: i2) default(shared) shared(s) reduction(+:r) num_threads (nth) proc_bind(spread) \
+    lastprivate (l) schedule(static, 4) order(concurrent) allocate (p)
   for (int i = 0; i < 64; i++)
     ll++;
   #pragma omp distribute parallel for simd \
     private (p) firstprivate (f) collapse(1) dist_schedule(static, 16) \
     if (parallel: i2) if(simd: i1) default(shared) shared(s) reduction(+:r) num_threads (nth) proc_bind(spread) \
     lastprivate (l) schedule(static, 4) nontemporal(ntm) \
-    safelen(8) simdlen(4) aligned(q: 32) copyin(t) order(concurrent) allocate (f)
+    safelen(8) simdlen(4) aligned(q: 32) copyin(t) allocate (f)
+  for (int i = 0; i < 64; i++)
+    ll++;
+  #pragma omp distribute parallel for simd \
+    private (p) firstprivate (f) collapse(1) dist_schedule(static, 16) \
+    if (parallel: i2) if(simd: i1) default(shared) shared(s) reduction(+:r) num_threads (nth) proc_bind(spread) \
+    lastprivate (l) schedule(static, 4) nontemporal(ntm) \
+    safelen(8) simdlen(4) aligned(q: 32) order(concurrent) allocate (f)
   for (int i = 0; i < 64; i++)
     ll++;
   #pragma omp distribute simd \
@@ -156,7 +169,7 @@  bar (int d, int m, int i1, int i2, int i
     ;
   #pragma omp target teams distribute \
     device(d) map (tofrom: m) if (target: i1) private (p) firstprivate (f) defaultmap(tofrom: scalar) is_device_ptr (idp) \
-    shared(s) default(shared) reduction(+:r) num_teams(nte) thread_limit(tl) \
+    shared(s) default(shared) reduction(+:r) num_teams(nte) thread_limit(tl) order(concurrent) \
     collapse(1) dist_schedule(static, 16) nowait depend(inout: dd[0]) allocate (omp_default_mem_alloc:f) in_reduction(+:r2)
   for (int i = 0; i < 64; i++)
     ;
@@ -218,7 +231,7 @@  bar (int d, int m, int i1, int i2, int i
   #pragma omp target nowait depend(inout: dd[0]) in_reduction(+:r2)
   #pragma omp teams distribute \
     private(p) firstprivate (f) shared(s) default(shared) reduction(+:r) num_teams(nte) thread_limit(tl) \
-    collapse(1) dist_schedule(static, 16) allocate (omp_default_mem_alloc: f)
+    collapse(1) dist_schedule(static, 16) allocate (omp_default_mem_alloc: f) order(concurrent)
   for (int i = 0; i < 64; i++)
     ;
   #pragma omp target
@@ -249,20 +262,36 @@  bar (int d, int m, int i1, int i2, int i
     ll++;
   #pragma omp teams distribute parallel for \
     private(p) firstprivate (f) shared(s) default(shared) reduction(+:r) num_teams(nte) thread_limit(tl) \
-    collapse(1) dist_schedule(static, 16) order(concurrent) \
+    collapse(1) dist_schedule(static, 16) \
     if (parallel: i2) num_threads (nth) proc_bind(spread) \
     lastprivate (l) schedule(static, 4) copyin(t) allocate (f)
   for (int i = 0; i < 64; i++)
     ll++;
+  #pragma omp teams distribute parallel for \
+    private(p) firstprivate (f) shared(s) default(shared) reduction(+:r) num_teams(nte) thread_limit(tl) \
+    collapse(1) dist_schedule(static, 16) order(concurrent) \
+    if (parallel: i2) num_threads (nth) proc_bind(spread) \
+    lastprivate (l) schedule(static, 4) allocate (f)
+  for (int i = 0; i < 64; i++)
+    ll++;
   #pragma omp teams distribute parallel for simd \
     private(p) firstprivate (f) shared(s) default(shared) reduction(+:r) num_teams(nte) thread_limit(tl) \
     collapse(1) dist_schedule(static, 16) \
     if (parallel: i2) num_threads (nth) proc_bind(spread) \
-    lastprivate (l) schedule(static, 4) order(concurrent) \
+    lastprivate (l) schedule(static, 4) \
     safelen(8) simdlen(4) aligned(q: 32) if (simd: i3) nontemporal(ntm) copyin(t) \
     allocate (f)
   for (int i = 0; i < 64; i++)
     ll++;
+  #pragma omp teams distribute parallel for simd \
+    private(p) firstprivate (f) shared(s) default(shared) reduction(+:r) num_teams(nte) thread_limit(tl) \
+    collapse(1) dist_schedule(static, 16) \
+    if (parallel: i2) num_threads (nth) proc_bind(spread) \
+    lastprivate (l) schedule(static, 4) order(concurrent) \
+    safelen(8) simdlen(4) aligned(q: 32) if (simd: i3) nontemporal(ntm) \
+    allocate (f)
+  for (int i = 0; i < 64; i++)
+    ll++;
   #pragma omp teams distribute simd \
     private(p) firstprivate (f) shared(s) default(shared) reduction(+:r) num_teams(nte) thread_limit(tl) \
     collapse(1) dist_schedule(static, 16) order(concurrent) \
--- gcc/testsuite/g++.dg/gomp/attrs-1.C.jj	2021-09-07 19:32:58.710062094 +0200
+++ gcc/testsuite/g++.dg/gomp/attrs-1.C	2021-09-18 09:33:19.185053854 +0200
@@ -63,7 +63,7 @@  foo (int d, int m, int i1, int i2, int p
     ll++;
   [[omp::directive (distribute
     private (p) firstprivate (f) collapse(1) dist_schedule(static, 16)
-    allocate (omp_default_mem_alloc:f))]]
+    allocate (omp_default_mem_alloc:f) order(concurrent))]]
   for (int i = 0; i < 64; i++)
     ll++;
 }
@@ -85,14 +85,27 @@  baz (int d, int m, int i1, int i2, int p
   [[omp::directive (distribute parallel for
     private (p) firstprivate (f) collapse(1) dist_schedule(static, 16)
     if (parallel: i2) default(shared) shared(s) reduction(+:r) num_threads (nth) proc_bind(spread)
-    lastprivate (l) schedule(static, 4) copyin(t) order(concurrent) allocate (p))]]
+    lastprivate (l) schedule(static, 4) copyin(t) allocate (p))]]
+  for (int i = 0; i < 64; i++)
+    ll++;
+  [[omp::directive (distribute parallel for
+    private (p) firstprivate (f) collapse(1) dist_schedule(static, 16)
+    if (parallel: i2) default(shared) shared(s) reduction(+:r) num_threads (nth) proc_bind(spread)
+    lastprivate (l) schedule(static, 4) order(concurrent) allocate (p))]]
   for (int i = 0; i < 64; i++)
     ll++;
   [[omp::directive (distribute parallel for simd
     private (p) firstprivate (f) collapse(1) dist_schedule(static, 16)
     if (parallel: i2) if(simd: i1) default(shared) shared(s) reduction(+:r) num_threads (nth) proc_bind(spread)
     lastprivate (l) schedule(static, 4) nontemporal(ntm)
-    safelen(8) simdlen(4) aligned(q: 32) copyin(t) order(concurrent) allocate (f))]]
+    safelen(8) simdlen(4) aligned(q: 32) copyin(t) allocate (f))]]
+  for (int i = 0; i < 64; i++)
+    ll++;
+  [[omp::directive (distribute parallel for simd
+    private (p) firstprivate (f) collapse(1) dist_schedule(static, 16)
+    if (parallel: i2) if(simd: i1) default(shared) shared(s) reduction(+:r) num_threads (nth) proc_bind(spread)
+    lastprivate (l) schedule(static, 4) nontemporal(ntm)
+    safelen(8) simdlen(4) aligned(q: 32) order(concurrent) allocate (f))]]
   for (int i = 0; i < 64; i++)
     ll++;
   [[omp::directive (distribute simd
@@ -207,7 +220,7 @@  bar (int d, int m, int i1, int i2, int i
     ;
   [[omp::sequence (omp::directive (target teams distribute
     device(d) map (tofrom: m) if (target: i1) private (p) firstprivate (f) defaultmap(tofrom: scalar) is_device_ptr (idp)
-    shared(s) default(shared) reduction(+:r) num_teams(nte) thread_limit(tl)
+    shared(s) default(shared) reduction(+:r) num_teams(nte) thread_limit(tl) order(concurrent)
     collapse(1) dist_schedule(static, 16) nowait depend(inout: dd[0]) allocate (omp_default_mem_alloc:f) in_reduction(+:r2)))]]
   for (int i = 0; i < 64; i++)
     ;
@@ -292,7 +305,7 @@  bar (int d, int m, int i1, int i2, int i
   [[omp::sequence (directive (target nowait depend(inout: dd[0]) in_reduction(+:r2)),
     directive (teams distribute
     private(p) firstprivate (f) shared(s) default(shared) reduction(+:r) num_teams(nte) thread_limit(tl)
-    collapse(1) dist_schedule(static, 16) allocate (omp_default_mem_alloc: f)))]]
+    collapse(1) dist_schedule(static, 16) allocate (omp_default_mem_alloc: f) order(concurrent)))]]
   for (int i = 0; i < 64; i++)
     ;
   [[omp::directive (teams
@@ -327,20 +340,36 @@  bar (int d, int m, int i1, int i2, int i
     ll++;
   [[omp::directive (teams distribute parallel for
     private(p) firstprivate (f) shared(s) default(shared) reduction(+:r) num_teams(nte) thread_limit(tl)
-    collapse(1) dist_schedule(static, 16) order(concurrent)
+    collapse(1) dist_schedule(static, 16)
     if (parallel: i2) num_threads (nth) proc_bind(spread)
     lastprivate (l) schedule(static, 4) copyin(t) allocate (f))]]
   for (int i = 0; i < 64; i++)
     ll++;
+  [[omp::directive (teams distribute parallel for
+    private(p) firstprivate (f) shared(s) default(shared) reduction(+:r) num_teams(nte) thread_limit(tl)
+    collapse(1) dist_schedule(static, 16) order(concurrent)
+    if (parallel: i2) num_threads (nth) proc_bind(spread)
+    lastprivate (l) schedule(static, 4) allocate (f))]]
+  for (int i = 0; i < 64; i++)
+    ll++;
   [[omp::directive (teams distribute parallel for simd
     private(p) firstprivate (f) shared(s) default(shared) reduction(+:r) num_teams(nte) thread_limit(tl)
     collapse(1) dist_schedule(static, 16)
     if (parallel: i2) num_threads (nth) proc_bind(spread)
-    lastprivate (l) schedule(static, 4) order(concurrent)
+    lastprivate (l) schedule(static, 4)
     safelen(8) simdlen(4) aligned(q: 32) if (simd: i3) nontemporal(ntm) copyin(t)
     allocate (f))]]
   for (int i = 0; i < 64; i++)
     ll++;
+  [[omp::directive (teams distribute parallel for simd
+    private(p) firstprivate (f) shared(s) default(shared) reduction(+:r) num_teams(nte) thread_limit(tl)
+    collapse(1) dist_schedule(static, 16)
+    if (parallel: i2) num_threads (nth) proc_bind(spread)
+    lastprivate (l) schedule(static, 4) order(concurrent)
+    safelen(8) simdlen(4) aligned(q: 32) if (simd: i3) nontemporal(ntm)
+    allocate (f))]]
+  for (int i = 0; i < 64; i++)
+    ll++;
   [[omp::directive (teams distribute simd
     private(p) firstprivate (f) shared(s) default(shared) reduction(+:r) num_teams(nte) thread_limit(tl)
     collapse(1) dist_schedule(static, 16) order(concurrent)
--- gcc/testsuite/g++.dg/gomp/attrs-2.C.jj	2021-09-07 19:32:58.711062081 +0200
+++ gcc/testsuite/g++.dg/gomp/attrs-2.C	2021-09-18 09:39:11.217180617 +0200
@@ -63,7 +63,7 @@  foo (int d, int m, int i1, int i2, int p
     ll++;
   [[omp::directive (distribute,
     private (p),firstprivate (f),collapse(1),dist_schedule(static, 16),
-    allocate (omp_default_mem_alloc:f))]]
+    allocate (omp_default_mem_alloc:f),order(concurrent))]]
   for (int i = 0; i < 64; i++)
     ll++;
 }
@@ -85,14 +85,27 @@  baz (int d, int m, int i1, int i2, int p
   [[omp::directive (distribute parallel for,
     private (p),firstprivate (f),collapse(1),dist_schedule(static, 16),
     if (parallel: i2),default(shared),shared(s),reduction(+:r),num_threads (nth),proc_bind(spread),
-    lastprivate (l),schedule(static, 4),copyin(t),order(concurrent),allocate (p))]]
+    lastprivate (l),schedule(static, 4),copyin(t),allocate (p))]]
+  for (int i = 0; i < 64; i++)
+    ll++;
+  [[omp::directive (distribute parallel for,
+    private (p),firstprivate (f),collapse(1),dist_schedule(static, 16),
+    if (parallel: i2),default(shared),shared(s),reduction(+:r),num_threads (nth),proc_bind(spread),
+    lastprivate (l),schedule(static, 4),order(concurrent),allocate (p))]]
   for (int i = 0; i < 64; i++)
     ll++;
   [[omp::directive (distribute parallel for simd,
     private (p),firstprivate (f),collapse(1),dist_schedule(static, 16),
     if (parallel: i2),if(simd: i1),default(shared),shared(s),reduction(+:r),num_threads (nth),proc_bind(spread),
     lastprivate (l),schedule(static, 4),nontemporal(ntm),
-    safelen(8),simdlen(4),aligned(q: 32),copyin(t),order(concurrent),allocate (f))]]
+    safelen(8),simdlen(4),aligned(q: 32),copyin(t),allocate (f))]]
+  for (int i = 0; i < 64; i++)
+    ll++;
+  [[omp::directive (distribute parallel for simd,
+    private (p),firstprivate (f),collapse(1),dist_schedule(static, 16),
+    if (parallel: i2),if(simd: i1),default(shared),shared(s),reduction(+:r),num_threads (nth),proc_bind(spread),
+    lastprivate (l),schedule(static, 4),nontemporal(ntm),
+    safelen(8),simdlen(4),aligned(q: 32),order(concurrent),allocate (f))]]
   for (int i = 0; i < 64; i++)
     ll++;
   [[omp::directive (distribute simd,
@@ -207,7 +220,7 @@  bar (int d, int m, int i1, int i2, int i
     ;
   [[omp::sequence (omp::directive (target teams distribute,
     device(d),map (tofrom: m),if (target: i1),private (p),firstprivate (f),defaultmap(tofrom: scalar),is_device_ptr (idp),
-    shared(s),default(shared),reduction(+:r),num_teams(nte),thread_limit(tl),
+    shared(s),default(shared),reduction(+:r),num_teams(nte),thread_limit(tl),order(concurrent),
     collapse(1),dist_schedule(static, 16),nowait depend(inout: dd[0]),allocate (omp_default_mem_alloc:f),in_reduction(+:r2)))]]
   for (int i = 0; i < 64; i++)
     ;
@@ -292,7 +305,7 @@  bar (int d, int m, int i1, int i2, int i
   [[omp::sequence (directive (target, nowait,depend(inout: dd[0]),in_reduction(+:r2)),
     directive (teams distribute,
     private(p),firstprivate (f),shared(s),default(shared),reduction(+:r),num_teams(nte),thread_limit(tl),
-    collapse(1),dist_schedule(static, 16),allocate (omp_default_mem_alloc: f)))]]
+    collapse(1),dist_schedule(static, 16),allocate (omp_default_mem_alloc: f),order(concurrent)))]]
   for (int i = 0; i < 64; i++)
     ;
   [[omp::directive (teams,
@@ -327,20 +340,36 @@  bar (int d, int m, int i1, int i2, int i
     ll++;
   [[omp::directive (teams distribute parallel for,
     private(p),firstprivate (f),shared(s),default(shared),reduction(+:r),num_teams(nte),thread_limit(tl),
-    collapse(1),dist_schedule(static, 16),order(concurrent),
+    collapse(1),dist_schedule(static, 16),
     if (parallel: i2),num_threads (nth),proc_bind(spread),
     lastprivate (l),schedule(static, 4),copyin(t),allocate (f))]]
   for (int i = 0; i < 64; i++)
     ll++;
+  [[omp::directive (teams distribute parallel for,
+    private(p),firstprivate (f),shared(s),default(shared),reduction(+:r),num_teams(nte),thread_limit(tl),
+    collapse(1),dist_schedule(static, 16),order(concurrent),
+    if (parallel: i2),num_threads (nth),proc_bind(spread),
+    lastprivate (l),schedule(static, 4),allocate (f))]]
+  for (int i = 0; i < 64; i++)
+    ll++;
   [[omp::directive (teams distribute parallel for simd,
     private(p),firstprivate (f),shared(s),default(shared),reduction(+:r),num_teams(nte),thread_limit(tl),
     collapse(1),dist_schedule(static, 16),
     if (parallel: i2),num_threads (nth),proc_bind(spread),
-    lastprivate (l),schedule(static, 4),order(concurrent),
+    lastprivate (l),schedule(static, 4),
     safelen(8),simdlen(4),aligned(q: 32),if (simd: i3),nontemporal(ntm),copyin(t),
     allocate (f))]]
   for (int i = 0; i < 64; i++)
     ll++;
+  [[omp::directive (teams distribute parallel for simd,
+    private(p),firstprivate (f),shared(s),default(shared),reduction(+:r),num_teams(nte),thread_limit(tl),
+    collapse(1),dist_schedule(static, 16),
+    if (parallel: i2),num_threads (nth),proc_bind(spread),
+    lastprivate (l),schedule(static, 4),order(concurrent),
+    safelen(8),simdlen(4),aligned(q: 32),if (simd: i3),nontemporal(ntm),
+    allocate (f))]]
+  for (int i = 0; i < 64; i++)
+    ll++;
   [[omp::directive (teams distribute simd,
     private(p),firstprivate (f),shared(s),default(shared),reduction(+:r),num_teams(nte),thread_limit(tl),
     collapse(1),dist_schedule(static, 16),order(concurrent),