Enhance further testcases to verify handling of OpenACC privatization level [PR90115]

Message ID 87czirmibw.fsf@euler.schwinge.homeip.net
State New
Headers
Series Enhance further testcases to verify handling of OpenACC privatization level [PR90115] |

Commit Message

Thomas Schwinge March 12, 2022, 1:05 p.m. UTC
  Hi!

On 2021-05-21T21:29:19+0200, I wrote:
> I've pushed "[OpenACC privatization] Largely extend diagnostics and
> corresponding testsuite coverage [PR90115]" to master branch in commit
> 11b8286a83289f5b54e813f14ff56d730c3f3185

To demonstrate that later changes don't vs. how they do change things,
pushed to master branch commit 2e53fa7bb2ae9fe1152c27e423be9e261da82ddc
"Enhance further testcases to verify handling of OpenACC privatization
level [PR90115]", see attached.


Grüße
 Thomas


-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
  

Patch

From 2e53fa7bb2ae9fe1152c27e423be9e261da82ddc Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <thomas@codesourcery.com>
Date: Fri, 11 Mar 2022 15:10:59 +0100
Subject: [PATCH] Enhance further testcases to verify handling of OpenACC
 privatization level [PR90115]

As originally introduced in commit 11b8286a83289f5b54e813f14ff56d730c3f3185
"[OpenACC privatization] Largely extend diagnostics and corresponding testsuite
coverage [PR90115]".

	PR middle-end/90115
	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/default-1.c: Enhance.
	* testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise.
	* testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90: Likewise.
---
 .../libgomp.oacc-c-c++-common/default-1.c     |  32 ++-
 .../kernels-reduction-1.c                     |  14 +-
 .../libgomp.oacc-c-c++-common/parallel-dims.c | 261 +++++++++++++++---
 .../kernels-reduction-1.f90                   |  14 +-
 4 files changed, 266 insertions(+), 55 deletions(-)

diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/default-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/default-1.c
index 1ac0b9587b9..0ac8d7132d4 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/default-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/default-1.c
@@ -1,4 +1,18 @@ 
-/* { dg-do run } */
+/* { dg-additional-options "-fopt-info-all-omp" }
+   { dg-additional-options "-foffload=-fopt-info-all-omp" } */
+
+/* { dg-additional-options "--param=openacc-privatization=noisy" }
+   { dg-additional-options "-foffload=--param=openacc-privatization=noisy" }
+   Prune a few: uninteresting, and potentially varying depending on GCC configuration (data types):
+   { dg-prune-output {note: variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} } */
+
+/* It's only with Tcl 8.5 (released in 2007) that "the variable 'varName'
+   passed to 'incr' may be unset, and in that case, it will be set to [...]",
+   so to maintain compatibility with earlier Tcl releases, we manually
+   initialize counter variables:
+   { dg-line l_dummy[variable c_compute 0 c_loop_i 0] }
+   { dg-message dummy {} { target iN-VAl-Id } l_dummy } to avoid
+   "WARNING: dg-line var l_dummy defined, but not used".  */
 
 #include  <openacc.h>
 
@@ -13,10 +27,15 @@  int test_parallel ()
     ary[i] = ~0;
 
   /* val defaults to firstprivate, ary defaults to copy.  */
-#pragma acc parallel num_gangs (32) copy (ok) copy(ondev)
+#pragma acc parallel num_gangs (32) copy (ok) copy(ondev) /* { dg-line l_compute[incr c_compute] } */
+  /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
   {
     ondev = acc_on_device (acc_device_not_host);
-#pragma acc loop gang(static:1)
+    /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
+       ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
+#pragma acc loop gang(static:1) /* { dg-line l_loop_i[incr c_loop_i] } */
+    /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
+    /* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
     for (unsigned i = 0; i < 32; i++)
       {
 	if (val != 2)
@@ -51,10 +70,13 @@  int test_kernels ()
     ary[i] = ~0;
 
   /* val defaults to copy, ary defaults to copy.  */
-#pragma acc kernels copy(ondev)
+#pragma acc kernels copy(ondev) /* { dg-line l_compute[incr c_compute] } */
+  /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
+  /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute$c_compute } */
   {
     ondev = acc_on_device (acc_device_not_host);
-#pragma acc loop 
+#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */
+    /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
     for (unsigned i = 0; i < 32; i++)
       {
 	ary[i] = val;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c
index 95f1b77986c..fbd9815f683 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c
@@ -1,6 +1,14 @@ 
 /* Verify that a simple, explicit acc loop reduction works inside
  a kernels region.  */
 
+/* { dg-additional-options "-fopt-info-all-omp" }
+   { dg-additional-options "-foffload=-fopt-info-all-omp" } */
+
+/* { dg-additional-options "--param=openacc-privatization=noisy" }
+   { dg-additional-options "-foffload=--param=openacc-privatization=noisy" }
+   Prune a few: uninteresting, and potentially varying depending on GCC configuration (data types):
+   { dg-prune-output {note: variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} } */
+
 #include <stdlib.h>
 
 #define N 100
@@ -10,9 +18,11 @@  main ()
 {
   int i, red = 0;
 
-#pragma acc kernels
+#pragma acc kernels /* { dg-line l_compute1 } */
+  /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute1 } */
   {
-#pragma acc loop reduction (+:red)
+#pragma acc loop reduction (+:red) /* { dg-line l_loop_i1 } */
+    /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i1 } */
   for (i = 0; i < N; i++)
     red++;
   }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
index c2f264a1ec8..f9c7aed3a56 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
@@ -1,6 +1,22 @@ 
 /* OpenACC parallelism dimensions clauses: num_gangs, num_workers,
    vector_length.  */
 
+/* { dg-additional-options "-fopt-info-all-omp" }
+   { dg-additional-options "-foffload=-fopt-info-all-omp" } */
+
+/* { dg-additional-options "--param=openacc-privatization=noisy" }
+   { dg-additional-options "-foffload=--param=openacc-privatization=noisy" }
+   Prune a few: uninteresting, and potentially varying depending on GCC configuration (data types):
+   { dg-prune-output {note: variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} } */
+
+/* It's only with Tcl 8.5 (released in 2007) that "the variable 'varName'
+   passed to 'incr' may be unset, and in that case, it will be set to [...]",
+   so to maintain compatibility with earlier Tcl releases, we manually
+   initialize counter variables:
+   { dg-line l_dummy[variable c_compute 0 c_loop_i 0 c_loop_j 0 c_loop_k 0] }
+   { dg-message dummy {} { target iN-VAl-Id } l_dummy } to avoid
+   "WARNING: dg-line var l_dummy defined, but not used".  */
+
 /* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting
    aspects of that functionality.  */
 
@@ -11,18 +27,21 @@ 
 #include <gomp-constants.h>
 
 #pragma acc routine seq
+inline __attribute__ ((always_inline))
 static int acc_gang ()
 {
   return __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
 }
 
 #pragma acc routine seq
+inline __attribute__ ((always_inline))
 static int acc_worker ()
 {
   return __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
 }
 
 #pragma acc routine seq
+inline __attribute__ ((always_inline))
 static int acc_vector ()
 {
   return __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
@@ -39,14 +58,19 @@  int main ()
 
   /* GR, WS, VS.  */
   {
-#define GANGS 0 /* { dg-warning "'num_gangs' value must be positive" "" { target c } } */
+#define GANGS 0
+    /* { dg-warning {'num_gangs' value must be positive} {} { target c } .-1 } */
     int gangs_actual = GANGS;
     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     gangs_min = workers_min = vectors_min = INT_MAX;
     gangs_max = workers_max = vectors_max = INT_MIN;
-#pragma acc parallel copy (gangs_actual) \
+#pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
+  copy (gangs_actual) \
   reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) \
-  num_gangs (GANGS) /* { dg-warning "'num_gangs' value must be positive" "" { target c++ } } */
+  num_gangs (GANGS)
+    /* { dg-note {in expansion of macro 'GANGS'} {} { target c } .-1 } */
+    /* { dg-warning {'num_gangs' value must be positive} {} { target c++ } .-2 } */
+    /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
     {
       /* We're actually executing with num_gangs (1).  */
       gangs_actual = 1;
@@ -68,18 +92,27 @@  int main ()
 
   /* GP, WS, VS.  */
   {
-#define GANGS 0 /* { dg-warning "'num_gangs' value must be positive" "" { target c } } */
+#define GANGS 0
+    /* { dg-warning {'num_gangs' value must be positive} {} { target c } .-1 } */
     int gangs_actual = GANGS;
     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     gangs_min = workers_min = vectors_min = INT_MAX;
     gangs_max = workers_max = vectors_max = INT_MIN;
-#pragma acc parallel copy (gangs_actual) \
-  num_gangs (GANGS) /* { dg-warning "'num_gangs' value must be positive" "" { target c++ } } */
-    /* { dg-warning "region contains gang partitioned code but is not gang partitioned" "" { target *-*-* } .-2 } */
+#pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
+  copy (gangs_actual) \
+  num_gangs (GANGS)
+    /* { dg-note {in expansion of macro 'GANGS'} {} { target c } .-1 } */
+    /* { dg-warning {'num_gangs' value must be positive} {} { target c++ } .-2 } */
+    /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
+    /* { dg-warning {region contains gang partitioned code but is not gang partitioned} {} { target *-*-* } l_compute$c_compute } */
     {
       /* We're actually executing with num_gangs (1).  */
       gangs_actual = 1;
-#pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
+  gang \
+  reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+      /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
+      /* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
       for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i)
 	{
 	  gangs_min = gangs_max = acc_gang ();
@@ -98,18 +131,27 @@  int main ()
 
   /* GR, WP, VS.  */
   {
-#define WORKERS 0 /* { dg-warning "'num_workers' value must be positive" "" { target c } } */
+#define WORKERS 0
+    /* { dg-warning {'num_workers' value must be positive} {} { target c } .-1 } */
     int workers_actual = WORKERS;
     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     gangs_min = workers_min = vectors_min = INT_MAX;
     gangs_max = workers_max = vectors_max = INT_MIN;
-#pragma acc parallel copy (workers_actual) \
-  num_workers (WORKERS) /* { dg-warning "'num_workers' value must be positive" "" { target c++ } } */
-    /* { dg-warning "region contains worker partitioned code but is not worker partitioned" "" { target *-*-* } .-2 } */
+#pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
+  copy (workers_actual) \
+  num_workers (WORKERS)
+    /* { dg-note {in expansion of macro 'WORKERS'} {} { target c } .-1 } */
+    /* { dg-warning {'num_workers' value must be positive} {} { target c++ } .-2 } */
+    /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
+    /* { dg-warning {region contains worker partitioned code but is not worker partitioned} {} { target *-*-* } l_compute$c_compute } */
     {
       /* We're actually executing with num_workers (1).  */
       workers_actual = 1;
-#pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
+  worker \
+  reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+      /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
+      /* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
       for (int i = 100 * workers_actual; i > -100 * workers_actual; --i)
 	{
 	  gangs_min = gangs_max = acc_gang ();
@@ -128,22 +170,34 @@  int main ()
 
   /* GR, WS, VP.  */
   {
-#define VECTORS 0 /* { dg-warning "'vector_length' value must be positive" "" { target c } } */
+#define VECTORS 0
+    /* { dg-warning {'vector_length' value must be positive} {} { target c } .-1 } */
     int vectors_actual = VECTORS;
     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     gangs_min = workers_min = vectors_min = INT_MAX;
     gangs_max = workers_max = vectors_max = INT_MIN;
-#pragma acc parallel copy (vectors_actual) /* { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } } */ \
-  vector_length (VECTORS) /* { dg-warning "'vector_length' value must be positive" "" { target c++ } } */
-    /* { dg-warning "region contains vector partitioned code but is not vector partitioned" "" { target *-*-* } .-2 } */
+#pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
+  copy (vectors_actual) \
+  vector_length (VECTORS)
+    /* { dg-note {in expansion of macro 'VECTORS'} {} { target c } .-1 } */
+    /* { dg-warning {'vector_length' value must be positive} {} { target c++ } .-2 } */
+    /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
+    /* { dg-warning {region contains vector partitioned code but is not vector partitioned} {} { target *-*-* } l_compute$c_compute } */
+    /* { dg-warning {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
     {
       /* We're actually executing with vector_length (1), just the GCC nvptx
 	 back end enforces vector_length (32).  */
       if (acc_on_device (acc_device_nvidia))
+	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
+	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 	vectors_actual = 32;
       else
 	vectors_actual = 1;
-#pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
+  vector \
+  reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+      /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
+      /* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
       for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i)
 	{
 	  gangs_min = gangs_max = acc_gang ();
@@ -178,12 +232,16 @@  int main ()
     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     gangs_min = workers_min = vectors_min = INT_MAX;
     gangs_max = workers_max = vectors_max = INT_MIN;
-#pragma acc parallel copy (gangs_actual) \
+#pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
+  copy (gangs_actual) \
   reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) \
   num_gangs (gangs)
-    /* { dg-bogus "warning: region is gang partitioned but does not contain gang partitioned code" "TODO 'reduction'" { xfail *-*-* } .-3 } */
+    /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
+    /* { dg-bogus {warning: region is gang partitioned but does not contain gang partitioned code} {TODO 'reduction'} { xfail *-*-* } l_compute$c_compute } */
     {
       if (acc_on_device (acc_device_host))
+	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
+	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 	{
 	  /* We're actually executing with num_gangs (1).  */
 	  gangs_actual = 1;
@@ -214,15 +272,23 @@  int main ()
     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     gangs_min = workers_min = vectors_min = INT_MAX;
     gangs_max = workers_max = vectors_max = INT_MIN;
-#pragma acc parallel copy (gangs_actual) \
+#pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
+  copy (gangs_actual) \
   num_gangs (gangs)
+    /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
     {
       if (acc_on_device (acc_device_host))
+	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
+	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 	{
 	  /* We're actually executing with num_gangs (1).  */
 	  gangs_actual = 1;
 	}
-#pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
+  gang \
+  reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+      /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
+      /* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
       for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i)
 	{
 	  gangs_min = gangs_max = acc_gang ();
@@ -246,27 +312,40 @@  int main ()
     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     gangs_min = workers_min = vectors_min = INT_MAX;
     gangs_max = workers_max = vectors_max = INT_MIN;
-#pragma acc parallel copy (workers_actual) /* { dg-warning "using .num_workers \\(32\\)., ignoring 2097152" "" { target openacc_nvidia_accel_selected } } */ \
+#pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
+  copy (workers_actual) \
   num_workers (WORKERS)
+    /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
+    /* { dg-warning {using 'num_workers \(32\)', ignoring 2097152} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
     {
       if (acc_on_device (acc_device_host))
+	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
+	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 	{
 	  /* We're actually executing with num_workers (1).  */
 	  workers_actual = 1;
 	}
       else if (acc_on_device (acc_device_nvidia))
+	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
+	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 	{
 	  /* The GCC nvptx back end enforces num_workers (32).  */
 	  workers_actual = 32;
 	}
       else if (acc_on_device (acc_device_radeon))
+	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
+	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 	{
 	  /* The GCC GCN back end is limited to num_workers (16).  */
 	  workers_actual = 16;
 	}
       else
 	__builtin_abort ();
-#pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
+  worker \
+  reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+      /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
+      /* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
       for (int i = 100 * workers_actual; i > -100 * workers_actual; --i)
 	{
 	  gangs_min = gangs_max = acc_gang ();
@@ -297,27 +376,39 @@  int main ()
     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     gangs_min = workers_min = vectors_min = INT_MAX;
     gangs_max = workers_max = vectors_max = INT_MIN;
-#pragma acc parallel copy (workers_actual) \
+#pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
+  copy (workers_actual) \
   num_workers (workers)
+    /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
     {
       if (acc_on_device (acc_device_host))
+	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
+	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 	{
 	  /* We're actually executing with num_workers (1).  */
 	  workers_actual = 1;
 	}
       else if (acc_on_device (acc_device_nvidia))
+	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
+	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 	{
 	  /* We're actually executing with num_workers (32).  */
 	  /* workers_actual = 32; */
 	}
       else if (acc_on_device (acc_device_radeon))
+	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
+	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 	{
 	  /* The GCC GCN back end is limited to num_workers (16).  */
 	  workers_actual = 16;
 	}
       else
 	__builtin_abort ();
-#pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
+  worker \
+  reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+      /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
+      /* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
       for (int i = 100 * workers_actual; i > -100 * workers_actual; --i)
 	{
 	  gangs_min = gangs_max = acc_gang ();
@@ -341,27 +432,40 @@  int main ()
     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     gangs_min = workers_min = vectors_min = INT_MAX;
     gangs_max = workers_max = vectors_max = INT_MIN;
-#pragma acc parallel copy (vectors_actual) /* { dg-warning "using .vector_length \\(1024\\)., ignoring 2097152" "" { target openacc_nvidia_accel_selected } } */ \
+#pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
+  copy (vectors_actual) \
   vector_length (VECTORS)
+    /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
+    /* { dg-warning {using 'vector_length \(1024\)', ignoring 2097152} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
     {
       if (acc_on_device (acc_device_host))
+	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
+	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 	{
 	  /* We're actually executing with vector_length (1).  */
 	  vectors_actual = 1;
 	}
       else if (acc_on_device (acc_device_nvidia))
+	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
+	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 	{
 	  /* The GCC nvptx back end reduces to vector_length (1024).  */
 	  vectors_actual = 1024;
 	}
       else if (acc_on_device (acc_device_radeon))
+	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
+	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 	{
 	  /* The GCC GCN back end enforces vector_length (1): autovectorize. */
 	  vectors_actual = 1;
 	}
       else
 	__builtin_abort ();
-#pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
+  vector \
+  reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+      /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
+      /* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
       for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i)
 	{
 	  gangs_min = gangs_max = acc_gang ();
@@ -386,20 +490,29 @@  int main ()
     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     gangs_min = workers_min = vectors_min = INT_MAX;
     gangs_max = workers_max = vectors_max = INT_MIN;
-#pragma acc parallel copy (vectors_actual) /* { dg-warning "using .vector_length \\(32\\)., ignoring runtime setting" "" { target openacc_nvidia_accel_selected } } */ \
+#pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
+  copy (vectors_actual) \
   vector_length (vectors)
+    /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
+    /* { dg-warning {using 'vector_length \(32\)', ignoring runtime setting} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
     {
       if (acc_on_device (acc_device_host))
+	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
+	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 	{
 	  /* We're actually executing with vector_length (1).  */
 	  vectors_actual = 1;
 	}
       else if (acc_on_device (acc_device_nvidia))
+	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
+	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 	{
 	  /* The GCC nvptx back end enforces vector_length (32).  */
 	  vectors_actual = 32;
 	}
       else if (acc_on_device (acc_device_radeon))
+	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
+	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 	{
 	  /* Because of the way vectors are implemented for GCN, a vector loop
 	     containing a seq routine call will not vectorize calls to that
@@ -408,7 +521,11 @@  int main ()
 	}
       else
 	__builtin_abort ();
-#pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
+  vector \
+  reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+      /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
+      /* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
       for (int i = 100 * vectors_actual; i > -100 * vectors_actual; --i)
 	{
 	  gangs_min = gangs_max = acc_gang ();
@@ -443,12 +560,17 @@  int main ()
     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     gangs_min = workers_min = vectors_min = INT_MAX;
     gangs_max = workers_max = vectors_max = INT_MIN;
-#pragma acc parallel copy (gangs_actual, workers_actual, vectors_actual) /* { dg-warning "using .vector_length \\(32\\)., ignoring 11" "" { target openacc_nvidia_accel_selected } } */ \
+#pragma acc parallel /* { dg-line l_compute[incr c_compute] } */ \
+  copy (gangs_actual, workers_actual, vectors_actual) \
   num_gangs (gangs) \
   num_workers (WORKERS) \
   vector_length (VECTORS)
+    /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
+    /* { dg-warning {using 'vector_length \(32\)', ignoring 11} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
     {
       if (acc_on_device (acc_device_host))
+	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
+	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 	{
 	  /* We're actually executing with num_gangs (1), num_workers (1),
 	     vector_length (1).  */
@@ -457,22 +579,40 @@  int main ()
 	  vectors_actual = 1;
 	}
       else if (acc_on_device (acc_device_nvidia))
+	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
+	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 	{
 	  /* The GCC nvptx back end enforces vector_length (32).  */
 	  vectors_actual = 32;
 	}
       else if (acc_on_device (acc_device_radeon))
+	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
+	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 	{
 	  /* See above comments about GCN vectors_actual.  */
 	  vectors_actual = 1;
 	}
       else
 	__builtin_abort ();
-#pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
+  gang \
+  reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+      /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
+      /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
+      /* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
       for (int i = 100 * gangs_actual; i > -100 * gangs_actual; --i)
-#pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+#pragma acc loop /* { dg-line l_loop_j[incr c_loop_j] } */ \
+  worker \
+  reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+	/* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_j$c_loop_j } */
+	/* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_j$c_loop_j } */
+	/* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_j$c_loop_j } */
 	for (int j = 100 * workers_actual; j > -100 * workers_actual; --j)
-#pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+#pragma acc loop /* { dg-line l_loop_k[incr c_loop_k] } */ \
+  vector \
+  reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+	  /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_k$c_loop_k } */
+	  /* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_k$c_loop_k } */
 	  for (int k = 100 * vectors_actual; k > -100 * vectors_actual; --k)
 	    {
 	      gangs_min = gangs_max = acc_gang ();
@@ -502,12 +642,16 @@  int main ()
     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     gangs_min = workers_min = vectors_min = INT_MAX;
     gangs_max = workers_max = vectors_max = INT_MIN;
-#pragma acc kernels
+#pragma acc kernels /* { dg-line l_compute[incr c_compute] } */
+    /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
+    /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute$c_compute } */
     {
       /* This is to make the OpenACC kernels construct unparallelizable.  */
       asm volatile ("" : : : "memory");
 
-#pragma acc loop reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
+  reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+      /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
       for (int i = 100; i > -100; --i)
 	{
 	  gangs_min = gangs_max = acc_gang ();
@@ -532,15 +676,19 @@  int main ()
     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     gangs_min = workers_min = vectors_min = INT_MAX;
     gangs_max = workers_max = vectors_max = INT_MIN;
-#pragma acc kernels \
+#pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ \
   num_gangs (gangs) \
   num_workers (WORKERS) \
   vector_length (VECTORS)
+    /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
+    /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute$c_compute } */
     {
       /* This is to make the OpenACC kernels construct unparallelizable.  */
       asm volatile ("" : : : "memory");
 
-#pragma acc loop reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
+  reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+      /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
       for (int i = 100; i > -100; --i)
 	{
 	  gangs_min = gangs_max = acc_gang ();
@@ -564,8 +712,10 @@  int main ()
     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     gangs_min = workers_min = vectors_min = INT_MAX;
     gangs_max = workers_max = vectors_max = INT_MIN;
-#pragma acc serial /* { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } } */ \
+#pragma acc serial /* { dg-line l_compute[incr c_compute] } */ \
   reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+    /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
+    /* { dg-warning {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
     {
       for (int i = 100; i > -100; i--)
 	{
@@ -586,13 +736,18 @@  int main ()
     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max;
     gangs_min = workers_min = vectors_min = INT_MAX;
     gangs_max = workers_max = vectors_max = INT_MIN;
-#pragma acc serial copy (vectors_actual) /* { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } } */ \
+#pragma acc serial /* { dg-line l_compute[incr c_compute] } */ \
+  copy (vectors_actual) \
   copy (gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max)
-    /* { dg-bogus "warning: region contains gang partitioned code but is not gang partitioned" "TODO 'serial'" { xfail *-*-* } .-2 }
-       { dg-bogus "warning: region contains worker partitioned code but is not worker partitioned" "TODO 'serial'" { xfail *-*-* } .-3 }
-       { dg-bogus "warning: region contains vector partitioned code but is not vector partitioned" "TODO 'serial'" { xfail *-*-* } .-4 } */
+    /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_compute$c_compute } */
+    /* { dg-bogus {warning: region contains gang partitioned code but is not gang partitioned} {TODO 'serial'} { xfail *-*-* } l_compute$c_compute }
+       { dg-bogus {warning: region contains worker partitioned code but is not worker partitioned} {TODO 'serial'} { xfail *-*-* } l_compute$c_compute }
+       { dg-bogus {warning: region contains vector partitioned code but is not vector partitioned} {TODO 'serial'} { xfail *-*-* } l_compute$c_compute } */
+    /* { dg-warning {using 'vector_length \(32\)', ignoring 1} {} { target openacc_nvidia_accel_selected } l_compute$c_compute } */
     {
       if (acc_on_device (acc_device_nvidia))
+	/* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target { c++ && { ! __OPTIMIZE__ } } } .-1 }
+	   ..., as without optimizations, we're not inlining the C++ 'acc_on_device' wrapper.  */
 	{
 	  /* The GCC nvptx back end enforces vector_length (32).  */
 	  /* It's unclear if that's actually permissible here;
@@ -600,11 +755,25 @@  int main ()
 	     'serial' construct might not actually be serial".  */
 	  vectors_actual = 32;
 	}
-#pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ \
+  gang \
+  reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+      /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
+      /* { dg-note {variable 'j' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
+      /* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_loop_i$c_loop_i } */
       for (int i = 100; i > -100; i--)
-#pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+#pragma acc loop /* { dg-line l_loop_j[incr c_loop_j] } */ \
+  worker \
+  reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+	/* { dg-note {variable 'j' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_j$c_loop_j } */
+	/* { dg-note {variable 'k' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_j$c_loop_j } */
+	/* { dg-optimized {assigned OpenACC worker loop parallelism} {} { target *-*-* } l_loop_j$c_loop_j } */
 	for (int j = 100; j > -100; j--)
-#pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+#pragma acc loop /* { dg-line l_loop_k[incr c_loop_k] } */ \
+  vector \
+  reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
+	  /* { dg-note {variable 'k' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_k$c_loop_k } */
+	  /* { dg-optimized {assigned OpenACC vector loop parallelism} {} { target *-*-* } l_loop_k$c_loop_k } */
 	  for (int k = 100 * vectors_actual; k > -100 * vectors_actual; k--)
 	    {
 	      gangs_min = gangs_max = acc_gang ();
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90
index 4b85608f0de..6ff740efc32 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90
@@ -2,14 +2,24 @@ 
 
 ! { dg-do run }
 
+! { dg-additional-options "-fopt-info-all-omp" }
+! { dg-additional-options "-foffload=-fopt-info-all-omp" } */
+
+! { dg-additional-options "--param=openacc-privatization=noisy" }
+! { dg-additional-options "-foffload=--param=openacc-privatization=noisy" }
+! Prune a few: uninteresting, and potentially varying depending on GCC configuration (data types):
+! { dg-prune-output {note: variable 'D\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} } */
+
 program reduction
   integer, parameter     :: n = 20
   integer                :: i, red
 
   red = 0
 
-  !$acc kernels
-  !$acc loop reduction (+:red)
+  !$acc kernels ! { dg-line l_compute1 } */
+  ! { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_compute1 }
+  !$acc loop reduction (+:red) ! { dg-line l_loop_i1 }
+  ! { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i1 }
   do i = 1, n
      red = red + 1
   end do
-- 
2.34.1