Fortran/openmp: Fix '!$omp end'

Message ID e2a8a2ee-0f3a-3f56-7326-14da23189fec@codesourcery.com
State New
Headers
Series Fortran/openmp: Fix '!$omp end' |

Commit Message

Tobias Burnus Nov. 11, 2021, 5:11 p.m. UTC
  Found this when looking at the num_teams patch – and when
converting clauses-1.c to clauses-1.f90.

OK?

Tobias
-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
  

Comments

Jakub Jelinek Nov. 11, 2021, 6:01 p.m. UTC | #1
On Thu, Nov 11, 2021 at 06:11:23PM +0100, Tobias Burnus wrote:
> --- a/gcc/fortran/parse.c
> +++ b/gcc/fortran/parse.c
> @@ -915,15 +915,16 @@ decode_omp_directive (void)
>        matcho ("error", gfc_match_omp_error, ST_OMP_ERROR);
>        matcho ("end atomic", gfc_match_omp_eos_error, ST_OMP_END_ATOMIC);
>        matcho ("end critical", gfc_match_omp_end_critical, ST_OMP_END_CRITICAL);
> -      matchs ("end distribute parallel do simd", gfc_match_omp_eos_error,
> +      matchs ("end distribute parallel do simd", gfc_match_omp_end_nowait,
>  	      ST_OMP_END_DISTRIBUTE_PARALLEL_DO_SIMD);
> -      matcho ("end distribute parallel do", gfc_match_omp_eos_error,
> +      matcho ("end distribute parallel do", gfc_match_omp_end_nowait,

I think the above two changes are incorrect.
At least looking at 5.1 which is clearer than 5.2, 5.1 [221:17-23] says
for C/C++ that while nowait is allowed on worksharing-loop, it is not
allowed on combined parallel worksharing-loop, and Fortran has that
restriction through the syntax (no [nowait] on !$omp end parallel do).

> @@ -936,9 +937,12 @@ decode_omp_directive (void)
>  	      ST_OMP_END_MASTER_TASKLOOP);
>        matcho ("end master", gfc_match_omp_eos_error, ST_OMP_END_MASTER);
>        matchs ("end ordered", gfc_match_omp_eos_error, ST_OMP_END_ORDERED);
> -      matchs ("end parallel do simd", gfc_match_omp_eos_error,
> +      matchs ("end parallel do simd", gfc_match_omp_end_nowait,
>  	      ST_OMP_END_PARALLEL_DO_SIMD);
> -      matcho ("end parallel do", gfc_match_omp_eos_error, ST_OMP_END_PARALLEL_DO);
> +      matcho ("end parallel do", gfc_match_omp_end_nowait,
> +	      ST_OMP_END_PARALLEL_DO);

Likewise for the above two.

> @@ -951,46 +955,53 @@ decode_omp_directive (void)
>  	      ST_OMP_END_PARALLEL_MASTER_TASKLOOP);
>        matcho ("end parallel master", gfc_match_omp_eos_error,
>  	      ST_OMP_END_PARALLEL_MASTER);
> -      matcho ("end parallel sections", gfc_match_omp_eos_error,
> +      matcho ("end parallel sections", gfc_match_omp_end_nowait,
>  	      ST_OMP_END_PARALLEL_SECTIONS);
> -      matcho ("end parallel workshare", gfc_match_omp_eos_error,
> +      matcho ("end parallel workshare", gfc_match_omp_end_nowait,
>  	      ST_OMP_END_PARALLEL_WORKSHARE);

Ditto for the above two.

>        matcho ("end parallel", gfc_match_omp_eos_error, ST_OMP_END_PARALLEL);
>        matcho ("end scope", gfc_match_omp_end_nowait, ST_OMP_END_SCOPE);
>        matcho ("end sections", gfc_match_omp_end_nowait, ST_OMP_END_SECTIONS);
>        matcho ("end single", gfc_match_omp_end_single, ST_OMP_END_SINGLE);
>        matcho ("end target data", gfc_match_omp_eos_error, ST_OMP_END_TARGET_DATA);
> -      matchs ("end target parallel do simd", gfc_match_omp_eos_error,
> +      matchs ("end target parallel do simd", gfc_match_omp_end_nowait,

The above seems like a bug in 5.1 standard, haven't checked 5.2.
!$omp end target parallel do simd nowait
should be IMO valid, but [241:16] mistakenly doesn't list it.

>  	      ST_OMP_END_TARGET_PARALLEL_DO_SIMD);
> -      matcho ("end target parallel do", gfc_match_omp_eos_error,
> +      matcho ("end target parallel do", gfc_match_omp_end_nowait,

Similarly.

>  	      ST_OMP_END_TARGET_PARALLEL_DO);
> -      matcho ("end target parallel", gfc_match_omp_eos_error,
> +      matcho ("end target parallel loop", gfc_match_omp_end_nowait,
> +	      ST_OMP_END_TARGET_PARALLEL_LOOP);

Similarly.

> +      matcho ("end target parallel", gfc_match_omp_end_nowait,
>  	      ST_OMP_END_TARGET_PARALLEL);

Similarly.

> -      matchs ("end target simd", gfc_match_omp_eos_error, ST_OMP_END_TARGET_SIMD);
> +      matchs ("end target simd", gfc_match_omp_end_nowait, ST_OMP_END_TARGET_SIMD);

Similarly.

>        matchs ("end target teams distribute parallel do simd",
> -	      gfc_match_omp_eos_error,
> +	      gfc_match_omp_end_nowait,
>  	      ST_OMP_END_TARGET_TEAMS_DISTRIBUTE_PARALLEL_DO_SIMD);
> -      matcho ("end target teams distribute parallel do", gfc_match_omp_eos_error,
> +      matcho ("end target teams distribute parallel do",
> +	      gfc_match_omp_end_nowait,
>  	      ST_OMP_END_TARGET_TEAMS_DISTRIBUTE_PARALLEL_DO);
> -      matchs ("end target teams distribute simd", gfc_match_omp_eos_error,
> +      matchs ("end target teams distribute simd", gfc_match_omp_end_nowait,
>  	      ST_OMP_END_TARGET_TEAMS_DISTRIBUTE_SIMD);
> -      matcho ("end target teams distribute", gfc_match_omp_eos_error,
> +      matcho ("end target teams distribute", gfc_match_omp_end_nowait,
>  	      ST_OMP_END_TARGET_TEAMS_DISTRIBUTE);
> -      matcho ("end target teams", gfc_match_omp_eos_error, ST_OMP_END_TARGET_TEAMS);
> -      matcho ("end target", gfc_match_omp_eos_error, ST_OMP_END_TARGET);
> +      matcho ("end target teams loop", gfc_match_omp_end_nowait,
> +	      ST_OMP_END_TARGET_TEAMS_LOOP);
> +      matcho ("end target teams", gfc_match_omp_end_nowait,
> +	      ST_OMP_END_TARGET_TEAMS);
> +      matcho ("end target", gfc_match_omp_end_nowait, ST_OMP_END_TARGET);

Similarly all the above.  !$omp end target
followed by anything should accept nowait.

>        matcho ("end taskgroup", gfc_match_omp_eos_error, ST_OMP_END_TASKGROUP);
>        matchs ("end taskloop simd", gfc_match_omp_eos_error,
>  	      ST_OMP_END_TASKLOOP_SIMD);
>        matcho ("end taskloop", gfc_match_omp_eos_error, ST_OMP_END_TASKLOOP);
>        matcho ("end task", gfc_match_omp_eos_error, ST_OMP_END_TASK);
> -      matchs ("end teams distribute parallel do simd", gfc_match_omp_eos_error,
> +      matchs ("end teams distribute parallel do simd", gfc_match_omp_end_nowait,
>  	      ST_OMP_END_TEAMS_DISTRIBUTE_PARALLEL_DO_SIMD);
> -      matcho ("end teams distribute parallel do", gfc_match_omp_eos_error,
> +      matcho ("end teams distribute parallel do", gfc_match_omp_end_nowait,

These again shouldn't allow nowait, the outermost leaf construct
that accepts nowait is do and when do is combined with parallel, nowait
shouldn't be specified.

	Jakub
  
Tobias Burnus Nov. 12, 2021, 11:01 a.m. UTC | #2
On 11.11.21 19:01, Jakub Jelinek wrote:
> On Thu, Nov 11, 2021 at 06:11:23PM +0100, Tobias Burnus wrote:
>> --- a/gcc/fortran/parse.c
>> +++ b/gcc/fortran/parse.c
>> ...
>> +      matchs ("end distribute parallel do simd", gfc_match_omp_end_nowait,
>> ...
>> +      matcho ("end distribute parallel do", gfc_match_omp_end_nowait,
> I think the above two changes are incorrect.
> At least looking at 5.1 which is clearer than 5.2, 5.1 [221:17-23] says
> for C/C++ that while nowait is allowed on worksharing-loop, it is not
> allowed on combined parallel worksharing-loop, and Fortran has that
> restriction through the syntax (no [nowait] on !$omp end parallel do).

I did look at 5.2 and did miss it – but now after searching more careful:

In 5.2 it is hidden in "17.3 Combined and Composite Directive Names" [342:14-16]

"If directive-name-A is parallel then directive-name-B may be loop,
  sections, workshare, masked, for, do or the directive name of a
  combined or composite construct for which directive-name-A is
  masked, for or do."

and "17.4 Combined Construct Semantics" [343:14-16]

"If directive-name-A is parallel, the nowait and in_reduction clauses
  must not be specified."

And for completeness, "nowait" (Sect. 15.6) is permitted for
"Directives: dispatch, do, for, interop, scope, sections,
  single, target, target enter data, target exit data,
  target update, taskwait, workshare"

  * * *

With the attached patch, the following combined/composite
directives accept 'nowait' at 'end':

"end critical"
"end do simd"
"end do"
"end scope"
"end sections"
"end single"
"end target parallel"               (newly permits nowait)
"end target simd"                   (newly permits nowait)
"end target teams distribute simd"  (newly permits nowait)
"end target teams distribute"       (newly permits nowait)
"end target teams loop"             (newly permits nowait)
"end target teams"                  (newly permits nowait)
"end target"
"end workshare"

and the following don't

"end atomic"
"end distribute parallel do simd"
"end distribute parallel do"
"end distribute simd"
"end distribute"
"end loop"  (was completely missing before)
"end simd"
"end masked taskloop simd"
"end masked taskloop"
"end masked"
"end master taskloop simd"
"end master taskloop"
"end master"
"end ordered"
"end parallel do simd"
"end parallel do"
"end parallel loop"  (was completely missing before)
"end parallel masked taskloop simd"
"end parallel masked taskloop"
"end parallel masked"
"end parallel master taskloop simd"
"end parallel master taskloop"
"end parallel master"
"end parallel sections"
"end parallel workshare"
"end parallel"
"end target data"
"end target parallel do simd"
"end target parallel do"
"end target parallel loop"  (was completely missing before)
"end target teams distribute parallel do simd"
"end target teams distribute parallel do"
"end taskgroup"
"end taskloop simd"
"end taskloop"
"end task"
"end teams distribute parallel do simd"
"end teams distribute parallel do"
"end teams distribute simd"
"end teams distribute"
"end teams loop"
"end teams"

  * * *

[target parallel do simd:]

> The above seems like a bug in 5.1 standard, haven't checked 5.2.
> !$omp end target parallel do simd nowait
> should be IMO valid, but [241:16] mistakenly doesn't list it.

It is the same – A="target" → B="simd/parallel/teams" and
there B can be also the A in a combined/composite construct,
such that A=parallel (see first quote above).

>> +      matcho ("end target parallel do", gfc_match_omp_end_nowait,
>> +      matcho ("end target parallel loop", gfc_match_omp_end_nowait,
>> +      matcho ("end target parallel", gfc_match_omp_end_nowait,
> Similarly.
While the first two are still invalid, in the latter parallel does not
appear as "A" and is thus valid in 5.2 in my reading.
>> -      matchs ("end target simd", gfc_match_omp_eos_error, ST_OMP_END_TARGET_SIMD);
>> +      matchs ("end target simd", gfc_match_omp_end_nowait, ST_OMP_END_TARGET_SIMD);
> Similarly.

Likewise now valid in my reading.

Revised version attached – it does follow 5.2 by permitting all 'target'
combined/composite constructions which does not contain 'parallel' as
"A". — I hope I got it now right (following OMP 5.2).

Tobias

PS: I note that c-c++-common/gomp/clauses-1.c has the following. I think
it follows into the category could be valid (and attached to 'target')
but OMP 5.1 and OMP 5.2 do not permit it.

[I have no checked other variations, I just saw them fail in the
Fortranized version and looked at the original.]

* #pragma omp target parallel for ... nowait   (twice)

* #pragma omp target parallel for simd ... nowait

* #pragma omp target teams distribute parallel for ... nowait

* #pragma omp target teams distribute parallel for simd .. nowait

* #pragma omp target parallel loop ... nowait  (twice)

* #pragma omp target parallel for ... nowait  (twice)

* #pragma omp target parallel for simd  ... nowait
-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
  
Jakub Jelinek Nov. 12, 2021, 12:02 p.m. UTC | #3
On Fri, Nov 12, 2021 at 12:01:27PM +0100, Tobias Burnus wrote:
> With the attached patch, the following combined/composite
> directives accept 'nowait' at 'end':

I've filed https://github.com/OpenMP/spec/issues/3184 because
I think OpenMP 5.2 got it wrong (and 5.1 got it wrong for
the end directives in many other cases too).

I believe the general rule should be:
1) non-combined/composite constructs allow nowait when
   they mention the clause (critical, for/do, scope, sections, single,
   workshare)
2) for simd, do simd allow it
3) anything combined with target allows it

So:
> 
> "end critical"
> "end do simd"
> "end do"
> "end scope"
> "end sections"
> "end single"
> "end target parallel"               (newly permits nowait)
> "end target simd"                   (newly permits nowait)
> "end target teams distribute simd"  (newly permits nowait)
> "end target teams distribute"       (newly permits nowait)
> "end target teams loop"             (newly permits nowait)
> "end target teams"                  (newly permits nowait)
> "end target"
> "end workshare"

is ok, but:

> and the following don't
> 
> "end target parallel do simd"
> "end target parallel do"
> "end target parallel loop"  (was completely missing before)
> "end target teams distribute parallel do simd"
> "end target teams distribute parallel do"

The above 5 should allow it too.
As per e.g. 5.2 [341:23],
"The effect of the nowait clause is as if it is applied to the
outermost leaf construct that permits it."
so even for the above 5 the clause splitting should put
nowait on target and not the others and be done with it.

	Jakub
  
Tobias Burnus Nov. 12, 2021, 3:56 p.m. UTC | #4
On 12.11.21 13:02, Jakub Jelinek wrote:
> 3) anything combined with target allows it

... and puts it on 'target' as it shouldn't be on 'for' or 'do' in
'target ... parallel do/for ...', I'd guess.

Updated patch attach.

Tobias
-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
  
Jakub Jelinek Nov. 12, 2021, 4:07 p.m. UTC | #5
On Fri, Nov 12, 2021 at 04:56:37PM +0100, Tobias Burnus wrote:
> Fortran/openmp: Fix '!$omp end'
> 
> gcc/fortran/ChangeLog:
> 
> 	* parse.c (decode_omp_directive): Fix permitting 'nowait' for some
> 	combined directives, add missing 'omp end ... loop'.
> 	(gfc_ascii_statement): Fix ST_OMP_END_TEAMS_LOOP result.
> 	* openmp.c (resolve_omp_clauses): Add missing combined loop constructs
> 	case values to the 'if(directive-name: ...)' check.
> 	* trans-openmp.c (gfc_split_omp_clauses): Put nowait on target if
> 	first leaf construct accepting it.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gfortran.dg/gomp/unexpected-end.f90: Update dg-error.
> 	* gfortran.dg/gomp/clauses-1.f90: New test.
> 	* gfortran.dg/gomp/nowait-2.f90: New test.
> 	* gfortran.dg/gomp/nowait-3.f90: New test.

Mostly good, except:

> @@ -6132,10 +6134,9 @@ gfc_split_omp_clauses (gfc_code *code,
>     if (mask & GFC_OMP_MASK_TEAMS && innermost != GFC_OMP_MASK_TEAMS)
>       gfc_add_clause_implicitly (&clausesa[GFC_OMP_SPLIT_TEAMS],
>  				code->ext.omp_clauses, false, false);
> -   if (((mask & (GFC_OMP_MASK_PARALLEL | GFC_OMP_MASK_DO))
> -	== (GFC_OMP_MASK_PARALLEL | GFC_OMP_MASK_DO))
> -       && !is_loop)
> -    clausesa[GFC_OMP_SPLIT_DO].nowait = true;
> +   if ((mask & (GFC_OMP_MASK_PARALLEL | GFC_OMP_MASK_DO))
> +       == (GFC_OMP_MASK_PARALLEL | GFC_OMP_MASK_DO))
> +    clausesa[GFC_OMP_SPLIT_DO].nowait = false;
>  }

this.  In the standard, yes, for parallel {do,sections,workshare}
indeed the do/sections/workshare doesn't get nowait (either
it is not allowed to specify it at all, or if combined with
target, nowait should go to target and nothing else).
But, for the middle-end, we actually want nowait true
whenever a worksharing construct is combined with parallel,
because when the worksharing construct ends, doing a barrier there
will mean we wait, then immediately get to the implicit barrier at the end
of parallel.

c_omp_split_clauses does:
  /* Add implicit nowait clause on
     #pragma omp parallel {for,for simd,sections}.  */
  if ((mask & (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_NUM_THREADS)) != 0)
    switch (code)
      {
      case OMP_FOR:
      case OMP_SIMD:
        if ((mask & (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_SCHEDULE)) != 0)
          cclauses[C_OMP_CLAUSE_SPLIT_FOR]
            = build_omp_clause (loc, OMP_CLAUSE_NOWAIT);
        break;
      case OMP_SECTIONS:
        cclauses[C_OMP_CLAUSE_SPLIT_SECTIONS]
          = build_omp_clause (loc, OMP_CLAUSE_NOWAIT);
        break;
      default:
        break;
      }
and I think the previous code did exactly that.

So, the patch is ok for trunk without the above hunk.

	Jakub
  

Patch

Fortran/openmp: Fix '!$omp end'

gcc/fortran/ChangeLog:

	* parse.c (decode_omp_directive): Fix permitting 'nowait' for some
	combined directives, add missing 'omp end ... loop'.
	(gfc_ascii_statement): Fix ST_OMP_END_TEAMS_LOOP result.
	* openmp.c (resolve_omp_clauses): Add missing combined loop constructs
	case values to the 'if(directive-name: ...)' check.
	* trans-openmp.c (gfc_split_omp_clauses): Put nowait on target if
	first leaf construct accepting it.
	(gfc_trans_omp_parallel_sections, gfc_trans_omp_parallel_workshare):
	Unset nowait for parallel if set.

gcc/testsuite/ChangeLog:

	* gfortran.dg/gomp/unexpected-end.f90: Update dg-error.
	* gfortran.dg/gomp/clauses-1.f90: New test.
	* gfortran.dg/gomp/nowait-2.f90: New test.
	* gfortran.dg/gomp/nowait-3.f90: New test.

 gcc/fortran/openmp.c                              |   3 +
 gcc/fortran/parse.c                               |  49 +-
 gcc/fortran/trans-openmp.c                        |   5 +
 gcc/testsuite/gfortran.dg/gomp/clauses-1.f90      | 667 ++++++++++++++++++++++
 gcc/testsuite/gfortran.dg/gomp/nowait-2.f90       | 240 ++++++++
 gcc/testsuite/gfortran.dg/gomp/nowait-3.f90       | 151 +++++
 gcc/testsuite/gfortran.dg/gomp/unexpected-end.f90 |  12 +-
 7 files changed, 1102 insertions(+), 25 deletions(-)

diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c
index 7b2df0d0be3..2893ab2befb 100644
--- a/gcc/fortran/openmp.c
+++ b/gcc/fortran/openmp.c
@@ -6232,6 +6232,7 @@  resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses,
 
 	    case EXEC_OMP_PARALLEL:
 	    case EXEC_OMP_PARALLEL_DO:
+	    case EXEC_OMP_PARALLEL_LOOP:
 	    case EXEC_OMP_PARALLEL_MASKED:
 	    case EXEC_OMP_PARALLEL_MASTER:
 	    case EXEC_OMP_PARALLEL_SECTIONS:
@@ -6285,6 +6286,7 @@  resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses,
 	    case EXEC_OMP_TARGET:
 	    case EXEC_OMP_TARGET_TEAMS:
 	    case EXEC_OMP_TARGET_TEAMS_DISTRIBUTE:
+	    case EXEC_OMP_TARGET_TEAMS_LOOP:
 	      ok = ifc == OMP_IF_TARGET;
 	      break;
 
@@ -6312,6 +6314,7 @@  resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses,
 	    case EXEC_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_DO:
 	    case EXEC_OMP_TARGET_PARALLEL:
 	    case EXEC_OMP_TARGET_PARALLEL_DO:
+	    case EXEC_OMP_TARGET_PARALLEL_LOOP:
 	      ok = ifc == OMP_IF_TARGET || ifc == OMP_IF_PARALLEL;
 	      break;
 
diff --git a/gcc/fortran/parse.c b/gcc/fortran/parse.c
index 12aa80ec45c..d4b985e75eb 100644
--- a/gcc/fortran/parse.c
+++ b/gcc/fortran/parse.c
@@ -915,15 +915,16 @@  decode_omp_directive (void)
       matcho ("error", gfc_match_omp_error, ST_OMP_ERROR);
       matcho ("end atomic", gfc_match_omp_eos_error, ST_OMP_END_ATOMIC);
       matcho ("end critical", gfc_match_omp_end_critical, ST_OMP_END_CRITICAL);
-      matchs ("end distribute parallel do simd", gfc_match_omp_eos_error,
+      matchs ("end distribute parallel do simd", gfc_match_omp_end_nowait,
 	      ST_OMP_END_DISTRIBUTE_PARALLEL_DO_SIMD);
-      matcho ("end distribute parallel do", gfc_match_omp_eos_error,
+      matcho ("end distribute parallel do", gfc_match_omp_end_nowait,
 	      ST_OMP_END_DISTRIBUTE_PARALLEL_DO);
       matchs ("end distribute simd", gfc_match_omp_eos_error,
 	      ST_OMP_END_DISTRIBUTE_SIMD);
       matcho ("end distribute", gfc_match_omp_eos_error, ST_OMP_END_DISTRIBUTE);
       matchs ("end do simd", gfc_match_omp_end_nowait, ST_OMP_END_DO_SIMD);
       matcho ("end do", gfc_match_omp_end_nowait, ST_OMP_END_DO);
+      matcho ("end loop", gfc_match_omp_eos_error, ST_OMP_END_LOOP);
       matchs ("end simd", gfc_match_omp_eos_error, ST_OMP_END_SIMD);
       matcho ("end masked taskloop simd", gfc_match_omp_eos_error,
 	      ST_OMP_END_MASKED_TASKLOOP_SIMD);
@@ -936,9 +937,12 @@  decode_omp_directive (void)
 	      ST_OMP_END_MASTER_TASKLOOP);
       matcho ("end master", gfc_match_omp_eos_error, ST_OMP_END_MASTER);
       matchs ("end ordered", gfc_match_omp_eos_error, ST_OMP_END_ORDERED);
-      matchs ("end parallel do simd", gfc_match_omp_eos_error,
+      matchs ("end parallel do simd", gfc_match_omp_end_nowait,
 	      ST_OMP_END_PARALLEL_DO_SIMD);
-      matcho ("end parallel do", gfc_match_omp_eos_error, ST_OMP_END_PARALLEL_DO);
+      matcho ("end parallel do", gfc_match_omp_end_nowait,
+	      ST_OMP_END_PARALLEL_DO);
+      matcho ("end parallel loop", gfc_match_omp_eos_error,
+	      ST_OMP_END_PARALLEL_LOOP);
       matcho ("end parallel masked taskloop simd", gfc_match_omp_eos_error,
 	      ST_OMP_END_PARALLEL_MASKED_TASKLOOP_SIMD);
       matcho ("end parallel masked taskloop", gfc_match_omp_eos_error,
@@ -951,46 +955,53 @@  decode_omp_directive (void)
 	      ST_OMP_END_PARALLEL_MASTER_TASKLOOP);
       matcho ("end parallel master", gfc_match_omp_eos_error,
 	      ST_OMP_END_PARALLEL_MASTER);
-      matcho ("end parallel sections", gfc_match_omp_eos_error,
+      matcho ("end parallel sections", gfc_match_omp_end_nowait,
 	      ST_OMP_END_PARALLEL_SECTIONS);
-      matcho ("end parallel workshare", gfc_match_omp_eos_error,
+      matcho ("end parallel workshare", gfc_match_omp_end_nowait,
 	      ST_OMP_END_PARALLEL_WORKSHARE);
       matcho ("end parallel", gfc_match_omp_eos_error, ST_OMP_END_PARALLEL);
       matcho ("end scope", gfc_match_omp_end_nowait, ST_OMP_END_SCOPE);
       matcho ("end sections", gfc_match_omp_end_nowait, ST_OMP_END_SECTIONS);
       matcho ("end single", gfc_match_omp_end_single, ST_OMP_END_SINGLE);
       matcho ("end target data", gfc_match_omp_eos_error, ST_OMP_END_TARGET_DATA);
-      matchs ("end target parallel do simd", gfc_match_omp_eos_error,
+      matchs ("end target parallel do simd", gfc_match_omp_end_nowait,
 	      ST_OMP_END_TARGET_PARALLEL_DO_SIMD);
-      matcho ("end target parallel do", gfc_match_omp_eos_error,
+      matcho ("end target parallel do", gfc_match_omp_end_nowait,
 	      ST_OMP_END_TARGET_PARALLEL_DO);
-      matcho ("end target parallel", gfc_match_omp_eos_error,
+      matcho ("end target parallel loop", gfc_match_omp_end_nowait,
+	      ST_OMP_END_TARGET_PARALLEL_LOOP);
+      matcho ("end target parallel", gfc_match_omp_end_nowait,
 	      ST_OMP_END_TARGET_PARALLEL);
-      matchs ("end target simd", gfc_match_omp_eos_error, ST_OMP_END_TARGET_SIMD);
+      matchs ("end target simd", gfc_match_omp_end_nowait, ST_OMP_END_TARGET_SIMD);
       matchs ("end target teams distribute parallel do simd",
-	      gfc_match_omp_eos_error,
+	      gfc_match_omp_end_nowait,
 	      ST_OMP_END_TARGET_TEAMS_DISTRIBUTE_PARALLEL_DO_SIMD);
-      matcho ("end target teams distribute parallel do", gfc_match_omp_eos_error,
+      matcho ("end target teams distribute parallel do",
+	      gfc_match_omp_end_nowait,
 	      ST_OMP_END_TARGET_TEAMS_DISTRIBUTE_PARALLEL_DO);
-      matchs ("end target teams distribute simd", gfc_match_omp_eos_error,
+      matchs ("end target teams distribute simd", gfc_match_omp_end_nowait,
 	      ST_OMP_END_TARGET_TEAMS_DISTRIBUTE_SIMD);
-      matcho ("end target teams distribute", gfc_match_omp_eos_error,
+      matcho ("end target teams distribute", gfc_match_omp_end_nowait,
 	      ST_OMP_END_TARGET_TEAMS_DISTRIBUTE);
-      matcho ("end target teams", gfc_match_omp_eos_error, ST_OMP_END_TARGET_TEAMS);
-      matcho ("end target", gfc_match_omp_eos_error, ST_OMP_END_TARGET);
+      matcho ("end target teams loop", gfc_match_omp_end_nowait,
+	      ST_OMP_END_TARGET_TEAMS_LOOP);
+      matcho ("end target teams", gfc_match_omp_end_nowait,
+	      ST_OMP_END_TARGET_TEAMS);
+      matcho ("end target", gfc_match_omp_end_nowait, ST_OMP_END_TARGET);
       matcho ("end taskgroup", gfc_match_omp_eos_error, ST_OMP_END_TASKGROUP);
       matchs ("end taskloop simd", gfc_match_omp_eos_error,
 	      ST_OMP_END_TASKLOOP_SIMD);
       matcho ("end taskloop", gfc_match_omp_eos_error, ST_OMP_END_TASKLOOP);
       matcho ("end task", gfc_match_omp_eos_error, ST_OMP_END_TASK);
-      matchs ("end teams distribute parallel do simd", gfc_match_omp_eos_error,
+      matchs ("end teams distribute parallel do simd", gfc_match_omp_end_nowait,
 	      ST_OMP_END_TEAMS_DISTRIBUTE_PARALLEL_DO_SIMD);
-      matcho ("end teams distribute parallel do", gfc_match_omp_eos_error,
+      matcho ("end teams distribute parallel do", gfc_match_omp_end_nowait,
 	      ST_OMP_END_TEAMS_DISTRIBUTE_PARALLEL_DO);
       matchs ("end teams distribute simd", gfc_match_omp_eos_error,
 	      ST_OMP_END_TEAMS_DISTRIBUTE_SIMD);
       matcho ("end teams distribute", gfc_match_omp_eos_error,
 	      ST_OMP_END_TEAMS_DISTRIBUTE);
+      matcho ("end teams loop", gfc_match_omp_eos_error, ST_OMP_END_TEAMS_LOOP);
       matcho ("end teams", gfc_match_omp_eos_error, ST_OMP_END_TEAMS);
       matcho ("end workshare", gfc_match_omp_end_nowait,
 	      ST_OMP_END_WORKSHARE);
@@ -2553,7 +2564,7 @@  gfc_ascii_statement (gfc_statement st)
       p = "!$OMP END TEAMS DISTRIBUTE SIMD";
       break;
     case ST_OMP_END_TEAMS_LOOP:
-      p = "!$OMP END TEAMS LOP";
+      p = "!$OMP END TEAMS LOOP";
       break;
     case ST_OMP_END_WORKSHARE:
       p = "!$OMP END WORKSHARE";
diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c
index 6bc7e9a6017..928d205a5aa 100644
--- a/gcc/fortran/trans-openmp.c
+++ b/gcc/fortran/trans-openmp.c
@@ -5878,6 +5878,9 @@  gfc_split_omp_clauses (gfc_code *code,
 	  /* And this is copied to all.  */
 	  clausesa[GFC_OMP_SPLIT_TARGET].if_expr
 	    = code->ext.omp_clauses->if_expr;
+	  if (is_loop || !(mask & GFC_OMP_SPLIT_DO))
+	    clausesa[GFC_OMP_SPLIT_TARGET].nowait
+	      = code->ext.omp_clauses->nowait;
 	}
       if (mask & GFC_OMP_MASK_TEAMS)
 	{
@@ -6296,6 +6299,7 @@  gfc_trans_omp_parallel_sections (gfc_code *code)
 
   memset (&section_clauses, 0, sizeof (section_clauses));
   section_clauses.nowait = true;
+  code->ext.omp_clauses->nowait = false;
 
   gfc_start_block (&block);
   omp_clauses = gfc_trans_omp_clauses (&block, code->ext.omp_clauses,
@@ -6322,6 +6326,7 @@  gfc_trans_omp_parallel_workshare (gfc_code *code)
 
   memset (&workshare_clauses, 0, sizeof (workshare_clauses));
   workshare_clauses.nowait = true;
+  code->ext.omp_clauses->nowait = false;
 
   gfc_start_block (&block);
   omp_clauses = gfc_trans_omp_clauses (&block, code->ext.omp_clauses,
diff --git a/gcc/testsuite/gfortran.dg/gomp/clauses-1.f90 b/gcc/testsuite/gfortran.dg/gomp/clauses-1.f90
new file mode 100644
index 00000000000..d22274d7033
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/gomp/clauses-1.f90
@@ -0,0 +1,667 @@ 
+! { dg-do compile }
+
+module m
+  use iso_c_binding, only: c_intptr_t
+  implicit none (external, type)
+
+  integer(c_intptr_t), parameter :: &
+    omp_null_allocator = 0,         &
+    omp_default_mem_alloc = 1,      &
+    omp_large_cap_mem_alloc = 2,    &
+    omp_const_mem_alloc = 3,        &
+    omp_high_bw_mem_alloc = 4,      &
+    omp_low_lat_mem_alloc = 5,      &
+    omp_cgroup_mem_alloc = 6,       &
+    omp_pteam_mem_alloc = 7,        &
+    omp_thread_mem_alloc = 8
+
+  integer, parameter :: &
+    omp_allocator_handle_kind = c_intptr_t
+
+  integer :: t
+  !$omp threadprivate (t)
+
+  integer :: f, l, ll, r, r2
+  !$omp declare target (f, l, ll, r, r2)
+
+contains
+
+subroutine foo (d, m, i1, i2, i3, p, idp, s, nte, tl, nth, g, nta, fi, pp, q, dd, ntm)
+  !$omp declare target (foo)
+  integer :: d, m, p, idp, s, nte, tl, nth, g, nta, pp, q, dd, ntm
+  logical :: i1, i2, i3, fi
+  pointer :: q
+  integer :: i
+
+  !$omp distribute parallel do &
+  !$omp&  private (p) firstprivate (f) collapse(1) dist_schedule(static, 16) &
+  !$omp&  if (parallel: i2) default(shared) shared(s) reduction(+:r) num_threads (nth) proc_bind(spread) &
+  !$omp&  lastprivate (l) schedule(static, 4) order(concurrent)
+  ! FIXME/TODO: allocate (omp_default_mem_alloc:f)
+  do i = 1, 64
+    ll = ll +1
+  end do
+
+  !$omp distribute parallel do simd &
+  !$omp&  private (p) firstprivate (f) collapse(1) dist_schedule(static, 16) &
+  !$omp&  if (parallel: i2) if(simd: i1) default(shared) shared(s) reduction(+:r) num_threads (nth) proc_bind(spread) &
+  !$omp&  lastprivate (l) schedule(static, 4) nontemporal(ntm) &
+  !$omp&  safelen(8) simdlen(4) aligned(q: 32) order(concurrent)
+  ! FIXME/TODO: allocate (omp_default_mem_alloc:f)
+  do i = 1, 64
+    ll = ll +1
+  end do
+
+  !$omp distribute simd &
+  !$omp&  private (p) firstprivate (f) collapse(1) dist_schedule(static, 16) &
+  !$omp&  safelen(8) simdlen(4) aligned(q: 32) reduction(+:r) if(i1) nontemporal(ntm) &
+  !$omp&  order(concurrent)
+  ! FIXME/TODO: allocate (omp_default_mem_alloc:f)
+  do i = 1, 64
+    ll = ll +1
+  end do
+end
+
+subroutine qux (p)
+  !$omp declare target (qux)
+  integer, value :: p
+
+  !$omp loop bind(teams) order(concurrent) &
+  !$omp&  private (p) lastprivate (l) collapse(1) reduction(+:r)
+  do l = 1, 64
+    ll = ll + 1
+  end do
+end
+
+subroutine baz (d, m, i1, i2, i3, p, idp, s, nte, tl, nth, g, nta, fi, pp, q, dd, ntm)
+  integer :: d, m, p, idp, s, nte, tl, nth, g, nta, pp, q, dd, ntm
+  logical :: i1, i2, i3, fi
+  pointer :: q
+  integer :: i
+  !$omp distribute parallel do &
+  !$omp&  private (p) firstprivate (f) collapse(1) dist_schedule(static, 16) &
+  !$omp&  if (parallel: i2) default(shared) shared(s) reduction(+:r) num_threads (nth) proc_bind(spread) &
+  !$omp&  lastprivate (l) schedule(static, 4) copyin(t)
+  ! FIXME/TODO: allocate (p)
+  do i = 1, 64
+    ll = ll +1
+  end do
+
+  !$omp distribute parallel do &
+  !$omp&  private (p) firstprivate (f) collapse(1) dist_schedule(static, 16) &
+  !$omp&  if (parallel: i2) default(shared) shared(s) reduction(+:r) num_threads (nth) proc_bind(spread) &
+  !$omp&  lastprivate (l) schedule(static, 4) order(concurrent)
+  ! FIXME/TODO: allocate (p)
+  do i = 1, 64
+    ll = ll +1
+  end do
+
+  !$omp distribute parallel do simd &
+  !$omp&  private (p) firstprivate (f) collapse(1) dist_schedule(static, 16) &
+  !$omp&  if (parallel: i2) if(simd: i1) default(shared) shared(s) reduction(+:r) num_threads (nth) proc_bind(spread) &
+  !$omp&  lastprivate (l) schedule(static, 4) nontemporal(ntm) &
+  !$omp&  safelen(8) simdlen(4) aligned(q: 32) copyin(t)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll + 1
+  end do
+
+  !$omp distribute parallel do simd &
+  !$omp&  private (p) firstprivate (f) collapse(1) dist_schedule(static, 16) &
+  !$omp&  if (parallel: i2) if(simd: i1) default(shared) shared(s) reduction(+:r) num_threads (nth) proc_bind(spread) &
+  !$omp&  lastprivate (l) schedule(static, 4) nontemporal(ntm) &
+  !$omp&  safelen(8) simdlen(4) aligned(q: 32) order(concurrent)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll + 1
+  end do
+
+  !$omp distribute simd &
+  !$omp&  private (p) firstprivate (f) collapse(1) dist_schedule(static, 16) &
+  !$omp&  safelen(8) simdlen(4) aligned(q: 32) reduction(+:r) if(i1) nontemporal(ntm) &
+  !$omp&  order(concurrent)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll + 1
+  end do
+
+  !$omp loop bind(parallel) order(concurrent) &
+  !$omp&  private (p) lastprivate (l) collapse(1) reduction(+:r)
+  do l = 1, 64
+    ll = ll + 1
+  end do
+end
+
+subroutine bar (d, m, i1, i2, i3, p, idp, s, nte, tl, nth, g, nta, fi, pp, q, dd, ntm)
+  integer :: d, m, p, idp, s, nte, tl, nth, g, nta, pp, q, dd(0:5), ntm
+  logical :: i1, i2, i3, fi
+  pointer :: q
+  integer :: i
+
+  !$omp do simd &
+  !$omp&  private (p) firstprivate (f) lastprivate (l) linear (ll:1) reduction(+:r) schedule(static, 4) collapse(1) &
+  !$omp&  safelen(8) simdlen(4) aligned(q: 32) nontemporal(ntm) if(i1) order(concurrent)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll + 1
+  end do
+  !$omp end do simd
+
+  !$omp parallel do &
+  !$omp&  private (p) firstprivate (f) if (parallel: i2) default(shared) shared(s) copyin(t) reduction(+:r) num_threads (nth) &
+  !$omp&  proc_bind(spread) lastprivate (l) linear (ll:1) ordered schedule(static, 4) collapse(1)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll + 1
+  end do
+
+  !$omp parallel do &
+  !$omp&  private (p) firstprivate (f) if (parallel: i2) default(shared) shared(s) copyin(t) reduction(+:r) num_threads (nth) &
+  !$omp&  proc_bind(spread) lastprivate (l) linear (ll:1) schedule(static, 4) collapse(1) order(concurrent)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll + 1
+  end do
+
+  !$omp parallel do simd &
+  !$omp&  private (p) firstprivate (f) if (i2) default(shared) shared(s) copyin(t) reduction(+:r) num_threads (nth) &
+  !$omp&  proc_bind(spread) lastprivate (l) linear (ll:1) schedule(static, 4) collapse(1) &
+  !$omp&  safelen(8) simdlen(4) aligned(q: 32) nontemporal(ntm) order(concurrent)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll + 1
+  end do
+
+  !$omp parallel sections &
+  !$omp&  private (p) firstprivate (f) if (parallel: i2) default(shared) shared(s) copyin(t) reduction(+:r) num_threads (nth) &
+  !$omp&  proc_bind(spread) lastprivate (l)
+  ! FIXME/TODO: allocate (f)
+    !$omp section
+      block; end block
+    !$omp section
+      block; end block
+  !$omp end parallel sections
+
+  !$omp target parallel &
+  !$omp&  device(d) map (tofrom: m) if (target: i1) private (p) firstprivate (f) defaultmap(tofrom: scalar) is_device_ptr (idp) &
+  !$omp&  if (parallel: i2) default(shared) shared(s) reduction(+:r) num_threads (nth) proc_bind(spread) &
+  !$omp&  depend(inout: dd(0)) in_reduction(+:r2)
+  ! FIXME/TODO: allocate (omp_default_mem_alloc:f)
+  !$omp end target parallel nowait
+
+  !$omp target parallel do &
+  !$omp&  device(d) map (tofrom: m) if (target: i1) private (p) firstprivate (f) defaultmap(tofrom: scalar) is_device_ptr (idp) &
+  !$omp&  if (parallel: i2) default(shared) shared(s) reduction(+:r) num_threads (nth) proc_bind(spread) &
+  !$omp&  lastprivate (l) linear (ll:1) ordered schedule(static, 4) collapse(1) depend(inout: dd(0)) &
+  !$omp&  in_reduction(+:r2)
+  ! FIXME/TODO: allocate (omp_default_mem_alloc:f)
+  do i = 1, 64
+    ll = ll + 1
+  end do
+  !$omp end target parallel do nowait
+
+  !$omp target parallel do &
+  !$omp&  device(d) map (tofrom: m) if (target: i1) private (p) firstprivate (f) defaultmap(tofrom: scalar) is_device_ptr (idp) &
+  !$omp&  if (parallel: i2) default(shared) shared(s) reduction(+:r) num_threads (nth) proc_bind(spread) &
+  !$omp&  lastprivate (l) linear (ll:1) schedule(static, 4) collapse(1) depend(inout: dd(0)) order(concurrent) &
+  !$omp&  in_reduction(+:r2)
+  ! FIXME/TODO: allocate (omp_default_mem_alloc:f)
+  do i = 1, 64
+    ll = ll + 1
+  end do
+  !$omp end target parallel do nowait
+
+  !$omp target parallel do simd &
+  !$omp&  device(d) map (tofrom: m) if (target: i1) private (p) firstprivate (f) defaultmap(tofrom: scalar) is_device_ptr (idp) &
+  !$omp&  if (parallel: i2) default(shared) shared(s) reduction(+:r) num_threads (nth) proc_bind(spread) &
+  !$omp&  lastprivate (l) linear (ll:1) schedule(static, 4) collapse(1) &
+  !$omp&  safelen(8) simdlen(4) aligned(q: 32) depend(inout: dd(0)) nontemporal(ntm) if (simd: i3) order(concurrent) &
+  !$omp&  in_reduction(+:r2)
+  ! FIXME/TODO: allocate (omp_default_mem_alloc:f)
+  do i = 1, 64
+    ll = ll + 1
+  end do
+  !$omp end target parallel do simd nowait
+
+  !$omp target teams &
+  !$omp&  device(d) map (tofrom: m) if (target: i1) private (p) firstprivate (f) defaultmap(tofrom: scalar) is_device_ptr (idp) &
+  !$omp&  shared(s) default(shared) reduction(+:r) num_teams(nte - 1:nte) thread_limit(tl) depend(inout: dd(0)) &
+  !$omp&  in_reduction(+:r2)
+  ! FIXME/TODO: allocate (omp_default_mem_alloc:f)
+  !$omp end target teams nowait
+
+  !$omp target teams distribute &
+  !$omp&  device(d) map (tofrom: m) if (target: i1) private (p) firstprivate (f) defaultmap(tofrom: scalar) is_device_ptr (idp) &
+  !$omp&  shared(s) default(shared) reduction(+:r) num_teams(nte) thread_limit(tl) order(concurrent) &
+  !$omp&  collapse(1) dist_schedule(static, 16) depend(inout: dd(0)) in_reduction(+:r2)
+  ! FIXME/TODO: allocate (omp_default_mem_alloc:f)
+  do i = 1, 64
+  end do
+  !$omp end target teams distribute nowait
+
+  !$omp target teams distribute parallel do &
+  !$omp&  device(d) map (tofrom: m) if (target: i1) private (p) firstprivate (f) defaultmap(tofrom: scalar) is_device_ptr (idp) &
+  !$omp&  shared(s) default(shared) reduction(+:r) num_teams(nte-1:nte) thread_limit(tl) &
+  !$omp&  collapse(1) dist_schedule(static, 16) &
+  !$omp&  if (parallel: i2) num_threads (nth) proc_bind(spread) &
+  !$omp&  lastprivate (l) schedule(static, 4) depend(inout: dd(0)) order(concurrent) &
+  !$omp&  in_reduction(+:r2)
+  ! FIXME/TODO: allocate (omp_default_mem_alloc:f)
+  do i = 1, 64
+    ll = ll + 1
+  end do
+  !$omp end target teams distribute parallel do nowait
+
+  !$omp target teams distribute parallel do simd &
+  !$omp&  device(d) map (tofrom: m) if (target: i1) private (p) firstprivate (f) defaultmap(tofrom: scalar) is_device_ptr (idp) &
+  !$omp&  shared(s) default(shared) reduction(+:r) num_teams(nte) thread_limit(tl) &
+  !$omp&  collapse(1) dist_schedule(static, 16) &
+  !$omp&  if (parallel: i2) num_threads (nth) proc_bind(spread) &
+  !$omp&  lastprivate (l) schedule(static, 4) order(concurrent) &
+  !$omp&  safelen(8) simdlen(4) aligned(q: 32) depend(inout: dd(0)) nontemporal(ntm) if (simd: i3) &
+  !$omp&  in_reduction(+:r2)
+  ! FIXME/TODO: allocate (omp_default_mem_alloc:f)
+  do i = 1, 64
+    ll = ll + 1
+  end do
+  !$omp end target teams distribute parallel do simd
+
+  !$omp target teams distribute simd &
+  !$omp&  device(d) map (tofrom: m) if (i1) private (p) firstprivate (f) defaultmap(tofrom: scalar) is_device_ptr (idp) &
+  !$omp&  shared(s) default(shared) reduction(+:r) num_teams(nte-1:nte) thread_limit(tl) &
+  !$omp&  collapse(1) dist_schedule(static, 16) order(concurrent) &
+  !$omp&  safelen(8) simdlen(4) aligned(q: 32) depend(inout: dd(0)) nontemporal(ntm) &
+  !$omp&  in_reduction(+:r2)
+  ! FIXME/TODO: allocate (omp_default_mem_alloc:f)
+  do i = 1, 64
+    ll = ll + 1
+  end do
+  !$omp end target teams distribute simd
+
+  !$omp target simd &
+  !$omp&  device(d) map (tofrom: m) if (target: i1) private (p) firstprivate (f) defaultmap(tofrom: scalar) is_device_ptr (idp) &
+  !$omp&  safelen(8) simdlen(4) lastprivate (l) linear(ll: 1) aligned(q: 32) reduction(+:r) &
+  !$omp&  depend(inout: dd(0)) nontemporal(ntm) if(simd:i3) order(concurrent) &
+  !$omp&  in_reduction(+:r2)
+  ! FIXME/TODO: allocate (omp_default_mem_alloc:f)
+  do i = 1, 64
+    ll = ll + 1
+  end do
+  !$omp end target simd
+
+  !$omp taskgroup task_reduction(+:r2)
+  ! FIXME/TODO: allocate (r2)
+  !$omp taskloop simd &
+  !$omp&  private (p) firstprivate (f) lastprivate (l) shared (s) default(shared) grainsize (g) collapse(1) untied &
+  !$omp&  if(taskloop: i1) if(simd: i2) final(fi) mergeable priority (pp) &
+  !$omp&  safelen(8) simdlen(4) linear(ll: 1) aligned(q: 32) reduction(default, +:r) in_reduction(+:r2) nontemporal(ntm) &
+  !$omp&  order(concurrent)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll + 1
+  end do
+  !$omp end taskgroup
+
+  !$omp taskgroup task_reduction(+:r)
+  ! FIXME/TODO: allocate (r)
+  !$omp taskloop simd &
+  !$omp&  private (p) firstprivate (f) lastprivate (l) shared (s) default(shared) grainsize (g) collapse(1) untied if(i1) &
+  !$omp&  final(fi) mergeable nogroup priority (pp) &
+  !$omp&  safelen(8) simdlen(4) linear(ll: 1) aligned(q: 32) in_reduction(+:r) nontemporal(ntm) &
+  !$omp&  order(concurrent)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll + 1
+  end do
+  !$omp end taskgroup
+
+  !$omp taskwait
+  !$omp taskloop simd &
+  !$omp&  private (p) firstprivate (f) lastprivate (l) shared (s) default(shared) num_tasks (nta) collapse(1) if(taskloop: i1) &
+  !$omp&  final(fi) priority (pp) safelen(8) simdlen(4) linear(ll: 1) aligned(q: 32) reduction(+:r) if (simd: i3) nontemporal(ntm) &
+  !$omp&  order(concurrent)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll + 1
+  end do
+
+  !$omp target depend(inout: dd(0)) in_reduction(+:r2)
+  !$omp teams distribute &
+  !$omp&  private(p) firstprivate (f) shared(s) default(shared) reduction(+:r) num_teams(nte) thread_limit(tl) &
+  !$omp&  collapse(1) dist_schedule(static, 16) order(concurrent)
+  ! FIXME/TODO: allocate (omp_default_mem_alloc: f)
+  do i = 1, 64
+  end do
+  !$omp end target nowait
+
+  !$omp target
+  !$omp teams distribute parallel do &
+  !$omp&  private(p) firstprivate (f) shared(s) default(shared) reduction(+:r) num_teams(nte-1:nte) thread_limit(tl) &
+  !$omp&  collapse(1) dist_schedule(static, 16) &
+  !$omp&  if (parallel: i2) num_threads (nth) proc_bind(spread) &
+  !$omp&  lastprivate (l) schedule(static, 4) order(concurrent)
+  ! FIXME/TODO: allocate (omp_default_mem_alloc: f)
+  do i = 1, 64
+    ll = ll +1
+  end do
+  !$omp end target
+
+  !$omp target
+  !$omp teams distribute parallel do simd &
+  !$omp&  private(p) firstprivate (f) shared(s) default(shared) reduction(+:r) num_teams(nte) thread_limit(tl) &
+  !$omp&  collapse(1) dist_schedule(static, 16) &
+  !$omp&  if (parallel: i2) num_threads (nth) proc_bind(spread) &
+  !$omp&  lastprivate (l) schedule(static, 4) order(concurrent) &
+  !$omp&  safelen(8) simdlen(4) aligned(q: 32) if (simd: i3) nontemporal(ntm)
+  ! FIXME/TODO: allocate (omp_default_mem_alloc: f)
+  do i = 1, 64
+    ll = ll +1
+  end do
+  !$omp end target
+
+  !$omp target
+  !$omp teams distribute simd &
+  !$omp&  private(p) firstprivate (f) shared(s) default(shared) reduction(+:r) num_teams(nte-1:nte) thread_limit(tl) &
+  !$omp&  collapse(1) dist_schedule(static, 16) order(concurrent) &
+  !$omp&  safelen(8) simdlen(4) aligned(q: 32) if(i3) nontemporal(ntm)
+  ! FIXME/TODO: allocate (omp_default_mem_alloc: f)
+  do i = 1, 64
+    ll = ll +1
+  end do
+  !$omp end target
+
+  !$omp teams distribute parallel do &
+  !$omp&  private(p) firstprivate (f) shared(s) default(shared) reduction(+:r) num_teams(nte) thread_limit(tl) &
+  !$omp&  collapse(1) dist_schedule(static, 16) &
+  !$omp&  if (parallel: i2) num_threads (nth) proc_bind(spread) &
+  !$omp&  lastprivate (l) schedule(static, 4) copyin(t)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll +1
+  end do
+
+  !$omp teams distribute parallel do &
+  !$omp&  private(p) firstprivate (f) shared(s) default(shared) reduction(+:r) num_teams(nte-1:nte) thread_limit(tl) &
+  !$omp&  collapse(1) dist_schedule(static, 16) order(concurrent) &
+  !$omp&  if (parallel: i2) num_threads (nth) proc_bind(spread) &
+  !$omp&  lastprivate (l) schedule(static, 4)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll +1
+  end do
+
+  !$omp teams distribute parallel do simd &
+  !$omp&  private(p) firstprivate (f) shared(s) default(shared) reduction(+:r) num_teams(nte) thread_limit(tl) &
+  !$omp&  collapse(1) dist_schedule(static, 16) &
+  !$omp&  if (parallel: i2) num_threads (nth) proc_bind(spread) &
+  !$omp&  lastprivate (l) schedule(static, 4) &
+  !$omp&  safelen(8) simdlen(4) aligned(q: 32) if (simd: i3) nontemporal(ntm) copyin(t)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll +1
+  end do
+
+  !$omp teams distribute parallel do simd &
+  !$omp&  private(p) firstprivate (f) shared(s) default(shared) reduction(+:r) num_teams(nte-1:nte) thread_limit(tl) &
+  !$omp&  collapse(1) dist_schedule(static, 16) &
+  !$omp&  if (parallel: i2) num_threads (nth) proc_bind(spread) &
+  !$omp&  lastprivate (l) schedule(static, 4) order(concurrent) &
+  !$omp&  safelen(8) simdlen(4) aligned(q: 32) if (simd: i3) nontemporal(ntm)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll +1
+  end do
+
+  !$omp teams distribute simd &
+  !$omp&  private(p) firstprivate (f) shared(s) default(shared) reduction(+:r) num_teams(nte) thread_limit(tl) &
+  !$omp&  collapse(1) dist_schedule(static, 16) order(concurrent) &
+  !$omp&  safelen(8) simdlen(4) aligned(q: 32) if(i3) nontemporal(ntm)
+  ! FIXME/TODO: allocate(f)
+  do i = 1, 64
+    ll = ll +1
+  end do
+
+  !$omp parallel master &
+  !$omp&  private (p) firstprivate (f) if (parallel: i2) default(shared) shared(s) reduction(+:r) &
+  !$omp&  num_threads (nth) proc_bind(spread) copyin(t)
+  ! FIXME/TODO: allocate (f)
+  !$omp end parallel master
+
+  !$omp parallel masked &
+  !$omp&  private (p) firstprivate (f) if (parallel: i2) default(shared) shared(s) reduction(+:r) &
+  !$omp&  num_threads (nth) proc_bind(spread) copyin(t) filter (d)
+  ! FIXME/TODO: allocate (f)
+  !$omp end parallel masked
+
+  !$omp taskgroup task_reduction (+:r2)
+  ! FIXME/TODO: allocate (r2)
+  !$omp master taskloop &
+  !$omp&  private (p) firstprivate (f) lastprivate (l) shared (s) default(shared) grainsize (g) collapse(1) untied &
+  !$omp&  if(taskloop: i1) final(fi) mergeable priority (pp) &
+  !$omp&  reduction(default, +:r) in_reduction(+:r2)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll +1
+  end do
+  !$omp end taskgroup
+
+  !$omp taskgroup task_reduction (+:r2)
+  ! FIXME/TODO: allocate (r2)
+  !$omp masked taskloop &
+  !$omp&  private (p) firstprivate (f) lastprivate (l) shared (s) default(shared) grainsize (g) collapse(1) untied &
+  !$omp&  if(taskloop: i1) final(fi) mergeable priority (pp) reduction(default, +:r) in_reduction(+:r2) filter (d)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll +1
+  end do
+  !$omp end taskgroup
+
+  !$omp taskgroup task_reduction (+:r2)
+  ! FIXME/TODO: allocate (r2)
+  !$omp master taskloop simd &
+  !$omp&  private (p) firstprivate (f) lastprivate (l) shared (s) default(shared) grainsize (g) collapse(1) untied &
+  !$omp&  if(taskloop: i1) if(simd: i2) final(fi) mergeable priority (pp) &
+  !$omp&  safelen(8) simdlen(4) linear(ll: 1) aligned(q: 32) reduction(default, +:r) in_reduction(+:r2) nontemporal(ntm) &
+  !$omp&  order(concurrent)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll +1
+  end do
+  !$omp end taskgroup
+
+  !$omp taskgroup task_reduction (+:r2)
+  ! FIXME/TODO: allocate (r2)
+  !$omp masked taskloop simd &
+  !$omp&  private (p) firstprivate (f) lastprivate (l) shared (s) default(shared) grainsize (g) collapse(1) untied &
+  !$omp&  if(taskloop: i1) if(simd: i2) final(fi) mergeable priority (pp) &
+  !$omp&  safelen(8) simdlen(4) linear(ll: 1) aligned(q: 32) reduction(default, +:r) in_reduction(+:r2) nontemporal(ntm) &
+  !$omp&  order(concurrent) filter (d)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll +1
+  end do
+  !$omp end taskgroup
+
+  !$omp parallel master taskloop &
+  !$omp&  private (p) firstprivate (f) lastprivate (l) shared (s) default(shared) grainsize (g) collapse(1) untied &
+  !$omp&  if(taskloop: i1) final(fi) mergeable priority (pp) &
+  !$omp&  reduction(default, +:r) if (parallel: i2) num_threads (nth) proc_bind(spread) copyin(t)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll +1
+  end do
+
+  !$omp parallel masked taskloop &
+  !$omp&  private (p) firstprivate (f) lastprivate (l) shared (s) default(shared) grainsize (g) collapse(1) untied &
+  !$omp&  if(taskloop: i1) final(fi) mergeable priority (pp) &
+  !$omp&  reduction(default, +:r) if (parallel: i2) num_threads (nth) proc_bind(spread) copyin(t) filter (d)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll +1
+  end do
+
+  !$omp parallel master taskloop simd &
+  !$omp&  private (p) firstprivate (f) lastprivate (l) shared (s) default(shared) grainsize (g) collapse(1) untied &
+  !$omp&  if(taskloop: i1) if(simd: i2) final(fi) mergeable priority (pp) &
+  !$omp&  safelen(8) simdlen(4) linear(ll: 1) aligned(q: 32) reduction(default, +:r) nontemporal(ntm) if (parallel: i2) &
+  !$omp&  num_threads (nth) proc_bind(spread) copyin(t) order(concurrent)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll +1
+  end do
+
+  !$omp parallel masked taskloop simd &
+  !$omp&  private (p) firstprivate (f) lastprivate (l) shared (s) default(shared) grainsize (g) collapse(1) untied &
+  !$omp&  if(taskloop: i1) if(simd: i2) final(fi) mergeable priority (pp) &
+  !$omp&  safelen(8) simdlen(4) linear(ll: 1) aligned(q: 32) reduction(default, +:r) nontemporal(ntm) if (parallel: i2) &
+  !$omp&  num_threads (nth) proc_bind(spread) copyin(t) order(concurrent) filter (d)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll +1
+  end do
+
+  !$omp taskgroup task_reduction (+:r2)
+  ! FIXME/TODO: allocate (r2)
+  !$omp master taskloop &
+  !$omp&  private (p) firstprivate (f) lastprivate (l) shared (s) default(shared) num_tasks (nta) collapse(1) &
+  !$omp&  untied if(i1) final(fi) mergeable priority (pp) reduction(default, +:r) in_reduction(+:r2)
+  do i = 1, 64
+    ll = ll +1
+  end do
+  !$omp end taskgroup
+
+  !$omp taskgroup task_reduction (+:r2)
+  ! FIXME/TODO: allocate (r2)
+  !$omp masked taskloop &
+  !$omp&  private (p) firstprivate (f) lastprivate (l) shared (s) default(shared) num_tasks (nta) collapse(1) &
+  !$omp&  untied if(i1) final(fi) mergeable priority (pp) reduction(default, +:r) in_reduction(+:r2) filter (d)
+  do i = 1, 64
+    ll = ll +1
+  end do
+  !$omp end taskgroup
+
+  !$omp taskgroup task_reduction (+:r2)
+  ! FIXME/TODO: allocate (r2)
+  !$omp master taskloop simd &
+  !$omp&  private (p) firstprivate (f) lastprivate (l) shared (s) default(shared) num_tasks (nta) collapse(1) untied if(i1) &
+  !$omp&  final(fi) mergeable priority (pp) safelen(8) simdlen(4) linear(ll: 1) aligned(q: 32) reduction(default, +:r) &
+  !$omp&  in_reduction(+:r2) nontemporal(ntm) order(concurrent)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll +1
+  end do
+  !$omp end taskgroup
+
+  !$omp taskgroup task_reduction (+:r2)
+  ! FIXME/TODO: allocate (r2)
+  !$omp masked taskloop simd &
+  !$omp&  private (p) firstprivate (f) lastprivate (l) shared (s) default(shared) num_tasks (nta) collapse(1) untied &
+  !$omp&  if(i1) final(fi) mergeable priority (pp) safelen(8) simdlen(4) linear(ll: 1) aligned(q: 32) reduction(default, +:r) &
+  !$omp&  in_reduction(+:r2) nontemporal(ntm) order(concurrent) filter (d)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll +1
+  end do
+  !$omp end taskgroup
+
+  !$omp parallel master taskloop &
+  !$omp&  private (p) firstprivate (f) lastprivate (l) shared (s) default(shared) num_tasks (nta) collapse(1) untied &
+  !$omp&  if(i1) final(fi) mergeable priority (pp) reduction(default, +:r) num_threads (nth) proc_bind(spread) copyin(t)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll +1
+  end do
+
+  !$omp parallel masked taskloop &
+  !$omp&  private (p) firstprivate (f) lastprivate (l) shared (s) default(shared) num_tasks (nta) collapse(1) untied &
+  !$omp&  if(i1) final(fi) mergeable priority (pp) reduction(default, +:r) num_threads (nth) proc_bind(spread) &
+  !$omp&  copyin(t) filter (d)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll +1
+  end do
+
+  !$omp parallel master taskloop simd &
+  !$omp&  private (p) firstprivate (f) lastprivate (l) shared (s) default(shared) num_tasks (nta) collapse(1) untied &
+  !$omp&  if(i1) final(fi) mergeable priority (pp) safelen(8) simdlen(4) linear(ll: 1) aligned(q: 32) reduction(default, +:r) &
+  !$omp&  nontemporal(ntm) num_threads (nth) proc_bind(spread)copyin(t) order(concurrent)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll +1
+  end do
+
+  !$omp parallel masked taskloop simd &
+  !$omp&  private (p) firstprivate (f) lastprivate (l) shared (s) default(shared) num_tasks (nta) collapse(1) untied if(i1) &
+  !$omp&  final(fi) mergeable priority (pp) safelen(8) simdlen(4) linear(ll: 1) aligned(q: 32) reduction(default, +:r) &
+  !$omp&  nontemporal(ntm) num_threads (nth) proc_bind(spread) copyin(t) order(concurrent) filter (d)
+  ! FIXME/TODO: allocate (f)
+  do i = 1, 64
+    ll = ll +1
+  end do
+
+  !$omp loop bind(thread) order(concurrent) &
+  !$omp&  private (p) lastprivate (l) collapse(1) reduction(+:r)
+  do l = 1, 64
+    ll = ll + 1
+  end do
+
+  !$omp parallel loop &
+  !$omp&  private (p) firstprivate (f) default(shared) shared(s) copyin(t) reduction(+:r) num_threads (nth) &
+  !$omp&  proc_bind(spread) lastprivate (l) collapse(1) bind(parallel) order(concurrent) if (parallel: i2)
+  ! FIXME/TODO: allocate (f)
+  do l = 1, 64
+    ll = ll + 1
+  end do
+
+  !$omp parallel loop &
+  !$omp&  private (p) firstprivate (f) default(shared) shared(s) copyin(t) reduction(+:r) num_threads (nth) &
+  !$omp&  proc_bind(spread) lastprivate (l) collapse(1) if (parallel: i2)
+  ! FIXME/TODO: allocate (f)
+  do l = 1, 64
+    ll = ll + 1
+  end do
+
+  !$omp teams loop &
+  !$omp&  private(p) firstprivate (f) shared(s) default(shared) reduction(+:r) num_teams(nte-1:nte) thread_limit(tl) &
+  !$omp&  collapse(1) lastprivate (l) bind(teams)
+  ! FIXME/TODO: allocate (f)
+  do l = 1, 64
+  end do
+
+  !$omp teams loop &
+  !$omp&  private(p) firstprivate (f) shared(s) default(shared) reduction(+:r) num_teams(nte) thread_limit(tl) &
+  !$omp&  collapse(1) lastprivate (l) order(concurrent)
+  ! FIXME/TODO: allocate (f)
+  do l = 1, 64
+  end do
+
+  !$omp target parallel loop &
+  !$omp&  device(d) map (tofrom: m) private (p) firstprivate (f) defaultmap(tofrom: scalar) is_device_ptr (idp) &
+  !$omp&  default(shared) shared(s) reduction(+:r) num_threads (nth) proc_bind(spread) &
+  !$omp&  depend(inout: dd(0)) lastprivate (l) order(concurrent) collapse(1) in_reduction(+:r2) &
+  !$omp&  if (target: i1) if (parallel: i2)
+  ! FIXME/TODO: allocate (omp_default_mem_alloc: f)
+  do l = 1, 64
+  end do
+  !$omp end target parallel loop
+
+  !$omp target teams loop &
+  !$omp&  device(d) map (tofrom: m) private (p) firstprivate (f) defaultmap(tofrom: scalar) is_device_ptr (idp) &
+  !$omp&  shared(s) default(shared) reduction(+:r) num_teams(nte-1:nte) thread_limit(tl) depend(inout: dd(0)) &
+  !$omp&  lastprivate (l) bind(teams) collapse(1) in_reduction(+:r2) if (target: i1)
+  ! FIXME/TODO: allocate (omp_default_mem_alloc: f)
+  do l = 1, 64
+  end do
+  !$omp end target teams loop
+
+  !$omp target teams loop &
+  !$omp&  device(d) map (tofrom: m) private (p) firstprivate (f) defaultmap(tofrom: scalar) is_device_ptr (idp) &
+  !$omp&  shared(s) default(shared) reduction(+:r) num_teams(nte) thread_limit(tl) depend(inout: dd(0)) &
+  !$omp&  lastprivate (l) order(concurrent) collapse(1) in_reduction(+:r2) if (target: i1)
+  ! FIXME/TODO: allocate (omp_default_mem_alloc: f)
+  do l = 1, 64
+  end do
+  !$omp end target teams loop
+
+end
+end module
diff --git a/gcc/testsuite/gfortran.dg/gomp/nowait-2.f90 b/gcc/testsuite/gfortran.dg/gomp/nowait-2.f90
new file mode 100644
index 00000000000..a1a3e86f6b0
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/gomp/nowait-2.f90
@@ -0,0 +1,240 @@ 
+! Cross check that it is accepted without nowait
+subroutine bar()
+implicit none
+integer :: i
+!$omp atomic write
+i = 5
+!$omp end atomic
+
+!$omp critical
+!$omp end critical
+
+!$omp distribute
+do i = 1, 5
+end do
+!$omp end distribute
+
+!$omp distribute simd
+do i = 1, 5
+end do
+!$omp end distribute simd
+
+!$omp masked
+!$omp end masked
+
+!$omp masked taskloop
+do i = 1, 5
+end do
+!$omp end masked taskloop
+
+!$omp masked taskloop simd
+do i = 1, 5
+end do
+!$omp end masked taskloop simd
+
+!$omp master
+!$omp end master
+
+!$omp master taskloop
+do i = 1, 5
+end do
+!$omp end master taskloop
+
+!$omp master taskloop simd
+do i = 1, 5
+end do
+!$omp end master taskloop simd
+
+!$omp ordered
+!$omp end ordered
+
+!$omp parallel
+!$omp end parallel
+
+!$omp parallel masked
+!$omp end parallel masked
+
+!$omp parallel masked taskloop
+do i = 1, 5
+end do
+!$omp end parallel masked taskloop
+
+!$omp parallel masked taskloop simd
+do i = 1, 5
+end do
+!$omp end parallel masked taskloop simd
+
+!$omp parallel master
+!$omp end parallel master
+
+!$omp parallel master taskloop
+do i = 1, 5
+end do
+!$omp end parallel master taskloop
+
+!$omp parallel master taskloop simd
+do i = 1, 5
+end do
+!$omp end parallel master taskloop simd
+
+!$omp parallel sections
+!$omp end parallel sections
+
+!$omp simd
+do i = 1, 5
+end do
+!$omp end simd
+
+!$omp task
+!$omp end task
+
+!$omp taskgroup
+!$omp end taskgroup
+
+!$omp taskloop
+do i = 1, 5
+end do
+!$omp end taskloop
+
+!$omp taskloop simd
+do i = 1, 5
+end do
+!$omp end taskloop simd
+
+!$omp teams
+!$omp end teams
+
+!$omp teams distribute
+do i = 1, 5
+end do
+!$omp end teams distribute
+
+!$omp teams distribute simd
+do i = 1, 5
+end do
+!$omp end teams distribute simd
+
+!$omp target data map(tofrom:i)
+!$omp end target data
+
+end
+
+! invalid nowait
+
+subroutine foo
+implicit none
+integer :: i
+!$omp atomic write
+i = 5
+!$omp end atomic nowait  ! { dg-error "Unexpected junk" }
+
+!$omp critical
+!$omp end critical nowait  ! { dg-error "Unexpected junk" }
+
+!$omp distribute
+do i = 1, 5
+end do
+!$omp end distribute nowait  ! { dg-error "Unexpected junk" }
+
+!$omp distribute simd
+do i = 1, 5
+end do
+!$omp end distribute simd nowait  ! { dg-error "Unexpected junk" }
+
+!$omp masked
+!$omp end masked nowait  ! { dg-error "Unexpected junk" }
+
+!$omp masked taskloop
+do i = 1, 5
+end do
+!$omp end masked taskloop nowait  ! { dg-error "Unexpected junk" }
+
+!$omp masked taskloop simd
+do i = 1, 5
+end do
+!$omp end masked taskloop simd nowait  ! { dg-error "Unexpected junk" }
+
+!$omp master
+!$omp end master nowait  ! { dg-error "Unexpected junk" }
+
+!$omp master taskloop
+do i = 1, 5
+end do
+!$omp end master taskloop nowait  ! { dg-error "Unexpected junk" }
+
+!$omp master taskloop simd
+do i = 1, 5
+end do
+!$omp end master taskloop simd nowait  ! { dg-error "Unexpected junk" }
+
+!$omp ordered
+!$omp end ordered nowait  ! { dg-error "Unexpected junk" }
+
+!$omp parallel
+!$omp end parallel nowait  ! { dg-error "Unexpected junk" }
+
+!$omp parallel masked
+!$omp end parallel masked nowait  ! { dg-error "Unexpected junk" }
+
+!$omp parallel masked taskloop
+do i = 1, 5
+end do
+!$omp end parallel masked taskloop nowait  ! { dg-error "Unexpected junk" }
+
+!$omp parallel masked taskloop simd
+do i = 1, 5
+end do
+!$omp end parallel masked taskloop simd nowait  ! { dg-error "Unexpected junk" }
+
+!$omp parallel master
+!$omp end parallel master nowait  ! { dg-error "Unexpected junk" }
+
+!$omp parallel master taskloop
+do i = 1, 5
+end do
+!$omp end parallel master taskloop nowait  ! { dg-error "Unexpected junk" }
+
+!$omp parallel master taskloop simd
+do i = 1, 5
+end do
+!$omp end parallel master taskloop simd nowait  ! { dg-error "Unexpected junk" }
+
+!$omp simd
+do i = 1, 5
+end do
+!$omp end simd nowait  ! { dg-error "Unexpected junk" }
+
+!$omp task
+!$omp end task nowait  ! { dg-error "Unexpected junk" }
+
+!$omp taskgroup
+!$omp end taskgroup nowait  ! { dg-error "Unexpected junk" }
+
+!$omp taskloop
+do i = 1, 5
+end do
+!$omp end taskloop nowait  ! { dg-error "Unexpected junk" }
+
+!$omp taskloop simd
+do i = 1, 5
+end do
+!$omp end taskloop simd nowait  ! { dg-error "Unexpected junk" }
+
+!$omp teams
+!$omp end teams nowait  ! { dg-error "Unexpected junk" }
+
+!$omp teams distribute
+do i = 1, 5
+end do
+!$omp end teams distribute nowait  ! { dg-error "Unexpected junk" }
+
+!$omp teams distribute simd
+do i = 1, 5
+end do
+!$omp end teams distribute simd
+
+!$omp target data map(tofrom:i)
+!$omp end target data nowait  ! { dg-error "Unexpected junk" }
+
+end  ! { dg-error "Unexpected END statement" }
+! { dg-prune-output "Unexpected end of file" }
diff --git a/gcc/testsuite/gfortran.dg/gomp/nowait-3.f90 b/gcc/testsuite/gfortran.dg/gomp/nowait-3.f90
new file mode 100644
index 00000000000..94d95ba6dc9
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/gomp/nowait-3.f90
@@ -0,0 +1,151 @@ 
+! { dg-additional-options "-fdump-tree-original" }
+
+subroutine foo
+implicit none
+integer :: i, a(5)
+
+!$omp distribute parallel do
+do i = 1, 5
+end do
+!$omp end distribute parallel do nowait
+
+!$omp distribute parallel do simd
+do i = 1, 5
+end do
+!$omp end distribute parallel do simd nowait
+
+!$omp do
+do i = 1, 5
+end do
+!$omp end do nowait
+
+!$omp do simd
+do i = 1, 5
+end do
+!$omp end do simd nowait
+
+!$omp parallel do
+do i = 1, 5
+end do
+!$omp end parallel do nowait
+
+!$omp parallel sections
+  !$omp section
+  block; end block
+!$omp end parallel sections nowait
+
+!$omp parallel do simd
+do i = 1, 5
+end do
+!$omp end parallel do simd nowait
+
+!$omp parallel workshare
+a(:) = 5
+!$omp end parallel workshare nowait
+
+!$omp scope
+!$omp end scope nowait
+
+!$omp sections
+  !$omp section
+  block; end block
+!$omp end sections nowait
+
+!$omp single
+!$omp end single nowait
+
+!$omp target
+!$omp end target nowait
+
+!$omp target parallel
+!$omp end target parallel nowait
+
+!$omp target parallel do
+do i = 1, 5
+end do
+!$omp end target parallel do
+
+!$omp target parallel do simd
+do i = 1, 5
+end do
+!$omp end target parallel do simd nowait
+
+!$omp target parallel loop
+do i = 1, 5
+end do
+!$omp end target parallel loop nowait
+
+!$omp target simd
+do i = 1, 5
+end do
+!$omp end target simd nowait
+
+!$omp target teams
+!$omp end target teams nowait
+
+!$omp target teams distribute
+do i = 1, 5
+end do
+!$omp end target teams distribute nowait
+
+!$omp target teams distribute parallel do
+do i = 1, 5
+end do
+!$omp end target teams distribute parallel do nowait
+
+!$omp target teams distribute parallel do simd
+do i = 1, 5
+end do
+!$omp end target teams distribute parallel do simd nowait
+
+!$omp target teams distribute simd
+do i = 1, 5
+end do
+!$omp end target teams distribute simd nowait
+
+!$omp target teams loop
+do i = 1, 5
+end do
+!$omp end target teams loop nowait
+
+!$omp teams distribute parallel do
+do i = 1, 5
+end do
+!$omp end teams distribute parallel do nowait
+
+!$omp teams distribute parallel do simd
+do i = 1, 5
+end do
+!$omp end teams distribute parallel do simd nowait
+
+!$omp workshare
+A(:) = 5
+!$omp end workshare nowait
+end
+
+! Expected with 'nowait'
+
+! { dg-final { scan-tree-dump-times "#pragma omp for nowait" 12 "original" } }
+! { dg-final { scan-tree-dump-times "#pragma omp for schedule\\(static\\) nowait" 2 "original" } }
+! { dg-final { scan-tree-dump-times "#pragma omp sections nowait" 2 "original" } }
+! { dg-final { scan-tree-dump-times "#pragma omp single nowait" 1 "original" } }
+! { dg-final { scan-tree-dump-times "#pragma omp target nowait" 7 "original" } }
+
+! Never:
+
+! { dg-final { scan-tree-dump-not "#pragma omp distribute\[^\n\r]*nowait" "original" } }
+! { dg-final { scan-tree-dump-not "#pragma omp loop\[^\n\r]*nowait" "original" } }
+! { dg-final { scan-tree-dump-not "#pragma omp parallel\[^\n\r]*nowait" "original" } }
+! { dg-final { scan-tree-dump-not "#pragma omp section\[^s\]\[^\n\r]*nowait" "original" } }
+! { dg-final { scan-tree-dump-not "#pragma omp simd\[^\n\r]*nowait" "original" } }
+! { dg-final { scan-tree-dump-not "#pragma omp teams\[^\n\r]*nowait" "original" } }
+
+! Sometimes or never with nowait:
+
+! { dg-final { scan-tree-dump-times "#pragma omp distribute\[\n\r]" 8 "original" } }
+! { dg-final { scan-tree-dump-times "#pragma omp loop\[\n\r]" 2 "original" } }
+! { dg-final { scan-tree-dump-times "#pragma omp parallel\[\n\r]" 14 "original" } }
+! { dg-final { scan-tree-dump-times "#pragma omp section\[\n\r]" 2 "original" } }
+! { dg-final { scan-tree-dump-times "#pragma omp simd linear\\(i:1\\)\[\n\r]" 8 "original" } }
+! { dg-final { scan-tree-dump-times "#pragma omp target\[\n\r]" 5 "original" } }
+! { dg-final { scan-tree-dump-times "#pragma omp teams\[\n\r]" 8 "original" } }
diff --git a/gcc/testsuite/gfortran.dg/gomp/unexpected-end.f90 b/gcc/testsuite/gfortran.dg/gomp/unexpected-end.f90
index 96f10b594cf..70f54f9be5e 100644
--- a/gcc/testsuite/gfortran.dg/gomp/unexpected-end.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/unexpected-end.f90
@@ -16,12 +16,12 @@ 
 
 !$omp end DO SIMD  ! { dg-error "Unexpected !.OMP END DO SIMD" }
 
-!$omp end LOOP  ! { dg-error "Unclassifiable OpenMP directive" }
+!$omp end LOOP  ! { dg-error "Unexpected !.OMP END LOOP" }
 
 !$omp parallel loop
 do i = 1, 5
 end do
-!$omp end LOOP  ! { dg-error "Unclassifiable OpenMP directive" }
+!$omp end LOOP  ! { dg-error "Unexpected !.OMP END LOOP" }
 
 !$omp end MASKED  ! { dg-error "Unexpected !.OMP END MASKED" }
 
@@ -44,7 +44,7 @@  end do
 !$omp end PARALLEL DO SIMD  ! { dg-error "Unexpected !.OMP END PARALLEL DO SIMD" }
 
 !$omp loop
-!$omp end PARALLEL LOOP  ! { dg-error "Unexpected junk" }
+!$omp end PARALLEL LOOP  ! { dg-error "Unexpected !.OMP END PARALLEL LOOP" }
 
 !$omp end PARALLEL MASKED  ! { dg-error "Unexpected !.OMP END PARALLEL MASKED" }
 
@@ -80,7 +80,7 @@  end do
 
 !$omp end TARGET PARALLEL DO SIMD  ! { dg-error "Unexpected !.OMP END TARGET PARALLEL DO SIMD" }
 
-!$omp end TARGET PARALLEL LOOP  ! { dg-error "Unexpected junk" }
+!$omp end TARGET PARALLEL LOOP  ! { dg-error "Unexpected !.OMP END TARGET PARALLEL LOOP" }
 
 !$omp end TARGET SIMD  ! { dg-error "Unexpected !.OMP END TARGET SIMD" }
 
@@ -94,7 +94,7 @@  end do
 
 !$omp end TARGET TEAMS DISTRIBUTE SIMD  ! { dg-error "Unexpected !.OMP END TARGET TEAMS DISTRIBUTE SIMD" }
 
-!$omp end TARGET TEAMS LOOP  ! { dg-error "Unexpected junk" }
+!$omp end TARGET TEAMS LOOP  ! { dg-error "Unexpected !.OMP END TARGET TEAMS LOOP" }
 
 !$omp end TASK  ! { dg-error "Unexpected !.OMP END TASK" }
 
@@ -114,7 +114,7 @@  end do
 
 !$omp end TEAMS DISTRIBUTE SIMD  ! { dg-error "Unexpected !.OMP END TEAMS DISTRIBUTE SIMD" }
 
-!$omp end TEAMS LOOP  ! { dg-error "Unexpected junk" }
+!$omp end TEAMS LOOP  ! { dg-error "Unexpected !.OMP END TEAMS LOOP" }
 
 !$omp end WORKSHARE  ! { dg-error "Unexpected !.OMP END WORKSHARE" }