On Fri, Jul 01, 2022 at 03:06:05PM +0200, Tobias Burnus wrote:
> --- a/gcc/fortran/parse.cc
> +++ b/gcc/fortran/parse.cc
> @@ -1168,7 +1168,8 @@ decode_omp_directive (void)
> }
> switch (ret)
> {
> - case ST_OMP_DECLARE_TARGET:
> + /* Set omp_target_seen; exclude ST_OMP_DECLARE_TARGET.
> + FIXME: Get clarification, cf. OpenMP Spec Issue #3240. */
> case ST_OMP_TARGET:
> case ST_OMP_TARGET_DATA:
> case ST_OMP_TARGET_ENTER_DATA:
> @@ -6879,11 +6880,14 @@ done:
>
> /* Fixup for external procedures and resolve 'omp requires'. */
> int omp_requires;
> + bool omp_target_seen;
> omp_requires = 0;
> + omp_target_seen = false;
> for (gfc_current_ns = gfc_global_ns_list; gfc_current_ns;
> gfc_current_ns = gfc_current_ns->sibling)
> {
> omp_requires |= gfc_current_ns->omp_requires;
> + omp_target_seen |= gfc_current_ns->omp_target_seen;
> gfc_check_externals (gfc_current_ns);
> }
> for (gfc_current_ns = gfc_global_ns_list; gfc_current_ns;
> @@ -6908,6 +6912,22 @@ done:
> break;
> }
>
> + if (omp_target_seen)
> + omp_requires_mask = (enum omp_requires) (omp_requires_mask
> + | OMP_REQUIRES_TARGET_USED);
> + if (omp_requires & OMP_REQ_REVERSE_OFFLOAD)
> + omp_requires_mask = (enum omp_requires) (omp_requires_mask
> + | OMP_REQUIRES_REVERSE_OFFLOAD);
> + if (omp_requires & OMP_REQ_UNIFIED_ADDRESS)
> + omp_requires_mask = (enum omp_requires) (omp_requires_mask
> + | OMP_REQUIRES_UNIFIED_ADDRESS);
> + if (omp_requires & OMP_REQ_UNIFIED_SHARED_MEMORY)
> + omp_requires_mask
> + = (enum omp_requires) (omp_requires_mask
> + | OMP_REQUIRES_UNIFIED_SHARED_MEMORY);
> + if (omp_requires & OMP_REQ_DYNAMIC_ALLOCATORS)
> + omp_requires_mask = (enum omp_requires) (omp_requires_mask
> + | OMP_REQUIRES_DYNAMIC_ALLOCATORS);
> /* Do the parse tree dump. */
> gfc_current_ns = flag_dump_fortran_original ? gfc_global_ns_list : NULL;
Will Fortran diagnose:
subroutine foo
!$omp requires unified_shared_memory
!$omp target
!$omp end target
end subroutine foo
subroutine bar
!$omp requires reverse_offload
!$omp target
!$omp end target
end subroutine bar
or just merge it from the different namespaces?
This is something that can be handled separately if it isn't resolved
and might need clarification from omp-lang.
> @@ -1764,6 +1781,20 @@ input_symtab (void)
> }
> }
>
> +static void
> +omp_requires_to_name (char *buf, size_t size, unsigned int requires_mask)
> +{
> + char *end = buf + size, *p = buf;
> + if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS)
> + p += snprintf (p, end - p, "unified_address");
> + if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
> + p += snprintf (p, end - p, "%sunified_shared_memory",
> + (p == buf ? "" : ", "));
> + if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
> + p += snprintf (p, end - p, "%sreverse_offload",
> + (p == buf ? "" : ", "));
So, what does this print if requires_mask is 0 (or just the target used bit
set but not unified_address, unified_shared_memory nor reverse_offload)?
Say in case of:
a.c
#pragma omp requires unified_address
void foo (void) {
#pragma omp target
;
}
b.c:
void bar (void) {
#pragma omp target
;
}
gcc -fopenmp -shared -o a.so a.c b.c
?
> @@ -1810,6 +1847,54 @@ input_offload_tables (bool do_force_output)
> may be no refs to var_decl in offload LTO mode. */
> if (do_force_output)
> varpool_node::get (var_decl)->force_output = 1;
> + tmp_decl = var_decl;
> + }
> + else if (tag == LTO_symtab_edge)
> + {
> + static bool error_emitted = false;
> + HOST_WIDE_INT val = streamer_read_hwi (ib);
> +
> + if (omp_requires_mask == 0)
> + {
> + omp_requires_mask = (omp_requires) val;
> + requires_decl = tmp_decl;
> + requires_fn = file_data->file_name;
And similarly here, if some device construct is seen but requires
directive isn't, not sure if in this version val would be 0 or something
with the TARGET_USED bit set. In the latter case, only what is printed
for no requires or just atomic related requires is a problem, in the former
case due to the == 0 check mixing of 0 with non-zero would be ignored
but mixing of non-zero with 0 wouldn't be.
> + }
> + else if (omp_requires_mask != val && !error_emitted)
> + {
> + char buf[64], buf2[64];
Perhaps cleaner would be to size the buffers as
sizeof ("unified_address,unified_shared_memory,reverse_offload")
64 is more, but just a wild guess and if further clauses are added later,
it might be too small.
> + (p == buf ? "" : ", "));
> + if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
> + p += snprintf (p, end - p, "%sreverse_offload
> + omp_requires_to_name (buf, sizeof (buf), omp_requires_mask);
> + omp_requires_to_name (buf2, sizeof (buf2), val);
> + error ("OpenMP %<requires%> directive with non-identical "
> + "clauses in multiple compilation units: %qs vs. %qs",
> + buf, buf2);
> @@ -1821,6 +1906,18 @@ input_offload_tables (bool do_force_output)
> lto_destroy_simple_input_block (file_data, LTO_section_offload_table,
> ib, data, len);
> }
> +#ifdef ACCEL_COMPILER
> + char *omp_requires_file = getenv ("GCC_OFFLOAD_OMP_REQUIRES_FILE");
> + if (omp_requires_file == NULL || omp_requires_file[0] == '\0')
> + fatal_error (input_location, "GCC_OFFLOAD_OMP_REQUIRES_FILE unset");
> + FILE *f = fopen (omp_requires_file, "wb");
> + if (!f)
> + fatal_error (input_location, "Cannot open omp_requires file %qs",
> + omp_requires_file);
> + uint32_t req_mask = omp_requires_mask & ~OMP_REQUIRES_TARGET_USED;
Perhaps it is better to also store the TARGET_USED bit and on the library
side completely ignore values of 0.
> --- a/gcc/omp-low.cc
> +++ b/gcc/omp-low.cc
> @@ -12701,6 +12701,11 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
> gcc_unreachable ();
> }
>
> + /* Ensure that requires map is written via output_offload_tables, even if only
> + 'target (enter/exit) data' is used in the translation unit. */
> + if (ENABLE_OFFLOADING && (omp_requires_mask & OMP_REQUIRES_TARGET_USED))
> + g->have_offload = true;
Is
c.c:
#pragma omp requires unified_shared_memory
d.c:
void baz (void) {
#pragma omp target
;
}
ok? Pedantically reading current standard probably yes, but perhaps again
something to be discussed. The question is what the requires directive
in that case would do, nothing at all as there are no device constructs
etc.? In that case omp_requires_mask & OMP_REQUIRES_TARGET_USED is right.
But if it should influence the behavior anyway, the restriction should be
Either all compilation units of a program that contain ... device
constructs ... should include also requires directive with one of the
unified_shared_memory, unified_address or reverse_offload clauses.
In that case the test would be
omp_requires_mask & (OMP_REQUIRES_TARGET_USED | OMP_REQUIRES_UNIFIED* | OMP_REQUIRES_REV*)
> +static void
> +gomp_requires_to_name (char *buf, size_t size, int requires_mask)
> +{
> + char *end = buf + size, *p = buf;
> + if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS)
> + p += snprintf (p, end - p, "unified_address");
> + if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
> + p += snprintf (p, end - p, "%sunified_shared_memory",
> + (p == buf ? "" : ", "));
> + if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
> + p += snprintf (p, end - p, "%sreverse_offload",
> + (p == buf ? "" : ", "));
> +}
Same question as earlier.
> /* This function should be called from every offload image while loading.
> It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
> the target, and TARGET_DATA needed by target plugin. */
> @@ -2323,11 +2341,29 @@ GOMP_offload_register_ver (unsigned version, const void *host_table,
> int target_type, const void *target_data)
> {
> int i;
> + int omp_req = omp_requires_mask;
>
> if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
> gomp_fatal ("Library too old for offload (version %u < %u)",
> GOMP_VERSION, GOMP_VERSION_LIB (version));
> -
> +
> + if (GOMP_VERSION_LIB (version) > 1)
> + {
> + omp_req = (int) (size_t) ((void **) target_data)[0];
> + target_data = &((void **) target_data)[1];
> + if (num_devices && (omp_req & ~omp_requires_mask))
> + {
> + char buf[64];
> + gomp_requires_to_name (buf, sizeof (buf),
> + omp_req & ~omp_requires_mask);
> + gomp_error ("devices already initialized when registering additional "
> + "offload images that use the additional OpenMP 'requires'"
> + " directive clauses %s. Therefore, the program might not "
> + "run correctly", buf);
> + }
> + omp_requires_mask |= omp_req;
> + }
Both omp_requires_mask and num_devices are global vars that would be
modified concurrently in some other thread, so the above is racy.
What I'd do is int omp_req = 0; early, just the omp_req + target_data in
if (GOMP_VERSION_LIB (version) > 1) otherwise. That computes
the local omp_req only.
> +
> gomp_mutex_lock (®ister_lock);
Then under the lock, you can do the merging.
But, IMHO the runtime library should repeat what is done in the offloading
lto1, diagnose if there are differences between the masks in between
different TUs, here at runtime on the program/shared library level.
And IMHO the error you emit above is unnecessary, because (at least
hopefully) the num_devices computation / device initialization should
only happen on behalf of some device construct or device related OpenMP API
routine, so at that point the shared library or program that does that
should have its own mask and if something is dlopened later, it should
either have compatible mask (nothing is diagnosed) or incompatible, but then
it should be diagnosed like any other incompatibilities.
If you want further diagnostics after devices are initialized, it could be
just a note only in case there would be some extra devices available that
don't match it. If all available devices satisfy it, the extra message
wouldn't tell user anything interesting.
> @@ -4125,8 +4161,30 @@ gomp_target_init (void)
>
> if (gomp_load_plugin_for_device (¤t_device, plugin_name))
> {
> - new_num_devs = current_device.get_num_devices_func ();
> - if (new_num_devs >= 1)
> + new_num_devs
> + = current_device.get_num_devices_func (omp_requires_mask);
> + if (new_num_devs < 0)
> + {
> + bool found = false;
> + int type = current_device.get_type_func ();
> + for (int img = 0; img < num_offload_images; img++)
> + if (type == offload_images[img].type)
> + found = true;
> + if (found)
> + {
> + char buf[64];
> + gomp_requires_to_name (buf, sizeof (buf),
> + omp_requires_mask);
> + char *name = (char *) malloc (cur_len + 1);
> + memcpy (name, cur, cur_len);
> + name[cur_len] = '\0';
> + GOMP_PLUGIN_error ("note: %s devices present but 'omp "
> + "requires %s' cannot be fulfilled",
> + name, buf);
> + free (name);
> + }
This isn't an error, so IMNSHO it should be at least guarded with
GOMP_DEBUG=true in the environment, not all programs want the library to be
talkative and break its standard error...
Why do you need the malloc? Can't you just use %.*s ... cur_len, cur
? If malloc would be necessary, it would need to be gomp_malloc, so that
the program doesn't silently crash if malloc fails, or should handle malloc
failure itself.
> --- a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
> +++ b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
> @@ -168,8 +168,12 @@ GOMP_OFFLOAD_get_type (void)
> }
>
> extern "C" int
> -GOMP_OFFLOAD_get_num_devices (void)
> +GOMP_OFFLOAD_get_num_devices (unsigned int omp_requires_mask)
> {
> + /* Return -1 if no omp_requires_mask cannot be fulfilled but
> + devices were present. */
> + if (num_devices > 0 && omp_requires_mask != 0)
> + return -1;
> TRACE ("(): return %d", num_devices);
> return num_devices;
> }
I thought I've mentioned earlier it would be nice to rename the
get_num_devices plugin hook because its API has changed, so that
if one mixes old plugin with new libgomp or vice versa it doesn't
break silently.
Jakub
On 01.07.22 16:34, Jakub Jelinek wrote:
> On Fri, Jul 01, 2022 at 03:06:05PM +0200, Tobias Burnus wrote:
> [...]
> Will Fortran diagnose:
> subroutine foo
> !$omp requires unified_shared_memory
> !$omp target
> !$omp end target
> end subroutine foo
> subroutine bar
> !$omp requires reverse_offload
> !$omp target
> !$omp end target
> end subroutine bar
>
> or just merge it from the different namespaces?
This is done in openmp.cc during parsing. The merging you quoted (in parse.cc) happens
after the whole input file has been parsed and resolved. For your test case, the
following error is shown:
test.f90:1:15:
1 | subroutine foo
| 1
Error: Program unit at (1) has OpenMP device constructs/routines but does not set !$OMP REQUIRES REVERSE_OFFLOAD but other program units do
test.f90:6:14:
6 | subroutine bar
| 1
Error: Program unit at (1) has OpenMP device constructs/routines but does not set !$OMP REQUIRES UNIFIED_SHARED_MEMORY but other program units do
> @@ -1764,6 +1781,20 @@ input_symtab (void)
>> }
>> }
>>
>> +static void
>> +omp_requires_to_name (char *buf, size_t size, unsigned int requires_mask)
>> +{
>> + char *end = buf + size, *p = buf;
>> + if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS)
>> + p += snprintf (p, end - p, "unified_address");
>> + if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
>> + p += snprintf (p, end - p, "%sunified_shared_memory",
>> + (p == buf ? "" : ", "));
>> + if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
>> + p += snprintf (p, end - p, "%sreverse_offload",
>> + (p == buf ? "" : ", "));
> So, what does this print if requires_mask is 0 (or just the target used bit
> set but not unified_address, unified_shared_memory nor reverse_offload)?
Well, that's what libgomp/testsuite/libgomp.c-c++-common/requires-2.c (+ *-2-aux.c)
tests:
/* { dg-error "OpenMP 'requires' directive with non-identical clauses in multiple compilation units: 'unified_shared_memory' vs. ''" "" { target *-*-* } 0 } */
I hope the '' vs. 'unified_shared_memory' is clear - but if you have a better wording.
Note that both:
no 'omp requires'
and
'omp requires' with other clauses (such as the atomic ones or dynamic_allocators)
will lead to 0. Thus, if the wording is changed, it should fit for both cases.
>> @@ -1810,6 +1847,54 @@ input_offload_tables (bool do_force_output)
>> may be no refs to var_decl in offload LTO mode. */
>> if (do_force_output)
>> varpool_node::get (var_decl)->force_output = 1;
>> + tmp_decl = var_decl;
>> + }
>> + else if (tag == LTO_symtab_edge)
>> + {
>> + static bool error_emitted = false;
>> + HOST_WIDE_INT val = streamer_read_hwi (ib);
>> +
>> + if (omp_requires_mask == 0)
>> + {
>> + omp_requires_mask = (omp_requires) val;
>> + requires_decl = tmp_decl;
>> + requires_fn = file_data->file_name;
> And similarly here, if some device construct is seen but requires
> directive isn't, not sure if in this version val would be 0 or something
> with the TARGET_USED bit set. In the latter case, only what is printed
> for no requires or just atomic related requires is a problem, in the former
> case due to the == 0 check mixing of 0 with non-zero would be ignored
> but mixing of non-zero with 0 wouldn't be.
Here: 0 = "unset" in the sense that either TARGET_USE nor USM/UA/RO was
specified. If any of those is set, we get != 0.
For mkoffload, the single results are merged - and TARGET_USE is stripped,
such that it is either 0 or a combination of USM/UA/RO
>> + }
>> + else if (omp_requires_mask != val && !error_emitted)
>> + {
>> + char buf[64], buf2[64];
> Perhaps cleaner would be to size the buffers as
> sizeof ("unified_address,unified_shared_memory,reverse_offload")
> 64 is more, but just a wild guess and if further clauses are added later,
> it might be too small.
I concur – except that ',' should be ', '.
(Likewise in libgomp/target.c)
> @@ -1821,6 +1906,18 @@ input_offload_tables (bool do_force_output)
>> lto_destroy_simple_input_block (file_data, LTO_section_offload_table,
>> ib, data, len);
>> }
>> +#ifdef ACCEL_COMPILER
>> + char *omp_requires_file = getenv ("GCC_OFFLOAD_OMP_REQUIRES_FILE");
>> + if (omp_requires_file == NULL || omp_requires_file[0] == '\0')
>> + fatal_error (input_location, "GCC_OFFLOAD_OMP_REQUIRES_FILE unset");
>> + FILE *f = fopen (omp_requires_file, "wb");
>> + if (!f)
>> + fatal_error (input_location, "Cannot open omp_requires file %qs",
>> + omp_requires_file);
>> + uint32_t req_mask = omp_requires_mask & ~OMP_REQUIRES_TARGET_USED;
> Perhaps it is better to also store the TARGET_USED bit and on the library
> side completely ignore values of 0.
For the compiler side, we need to distinguish no requires vs. some
requires when checking multiple TU (to distinguish it from TU which do
not use target constructs).
But for libgomp only the result counts: no requires or some requires.
Thus, passing 0 if there are no USM/UA/RO should be fine – and the code
does so. This 0 is then passed on to the plugin to check against it.
If we pass target_used to libgomp, we need to filter it out at some point.
>> --- a/gcc/omp-low.cc
>> +++ b/gcc/omp-low.cc
>> @@ -12701,6 +12701,11 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
>> gcc_unreachable ();
>> }
>>
>> + /* Ensure that requires map is written via output_offload_tables, even if only
>> + 'target (enter/exit) data' is used in the translation unit. */
>> + if (ENABLE_OFFLOADING && (omp_requires_mask & OMP_REQUIRES_TARGET_USED))
>> + g->have_offload = true;
> Is
> c.c:
> #pragma omp requires unified_shared_memory
> d.c:
> void baz (void) {
> #pragma omp target
> ;
> }
> ok?
This one is *already* streamed out as it creates a symbol and entry in
in offload_functions (baz.omp_fn.0).
The code is rather for '#pragma omp target enter data map(x)' as this
only adds a library call and no symbol.
> Pedantically reading current standard probably yes, but perhaps again
> something to be discussed. The question is what the requires directive
> in that case would do, nothing at all as there are no device constructs
> etc.?
Isn't there a device construct – which happens to be empty?
With 'omp target map(always, to: x)' it would be even observable that
the code is run.
> In that case omp_requires_mask & OMP_REQUIRES_TARGET_USED is right.
> But if it should influence the behavior anyway, the restriction should be
> Either all compilation units of a program that contain ... device
> constructs ... should include also requires directive with one of the
> unified_shared_memory, unified_address or reverse_offload clauses.
> In that case the test would be
> omp_requires_mask & (OMP_REQUIRES_TARGET_USED | OMP_REQUIRES_UNIFIED* | OMP_REQUIRES_REV*)
I think I am lost – don't we effectively test this? We filter out
everything else in output_offload_tables. Thus, in input_offload_tables,
a single '==' will do. (We additionally know that TARGET_USED is set -
as otherwise there wouldn't be a symbol in the offload table.)
Thus, it is unclear to me what you propose here.
>> +static void
>> +gomp_requires_to_name (char *buf, size_t size, int requires_mask)
>> +{
>> + char *end = buf + size, *p = buf;
>> + if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS)
>> + p += snprintf (p, end - p, "unified_address");
>> + if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
>> + p += snprintf (p, end - p, "%sunified_shared_memory",
>> + (p == buf ? "" : ", "));
>> + if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
>> + p += snprintf (p, end - p, "%sreverse_offload",
>> + (p == buf ? "" : ", "));
>> +}
> Same question as earlier.
Same answer, except that in libgomp, this code is effectively only
reachable when omp_requires_mask != 0 as it reaches this code only if
either some additional flag was added (in register_ver) or when devices
were available, but those do not support a flag.
We just have to remember to update this, if we ever add additional flags.
>> /* This function should be called from every offload image while loading.
>> It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
>> the target, and TARGET_DATA needed by target plugin. */
>> @@ -2323,11 +2341,29 @@ GOMP_offload_register_ver (unsigned version, const void *host_table,
>> int target_type, const void *target_data)
>> {
>> int i;
>> + int omp_req = omp_requires_mask;
>>
>> if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
>> gomp_fatal ("Library too old for offload (version %u < %u)",
>> GOMP_VERSION, GOMP_VERSION_LIB (version));
>> -
>> +
>> + if (GOMP_VERSION_LIB (version) > 1)
>> + {
>> + omp_req = (int) (size_t) ((void **) target_data)[0];
>> + target_data = &((void **) target_data)[1];
>> + if (num_devices && (omp_req & ~omp_requires_mask))
>> + {
>> + char buf[64];
>> + gomp_requires_to_name (buf, sizeof (buf),
>> + omp_req & ~omp_requires_mask);
>> + gomp_error ("devices already initialized when registering additional "
>> + "offload images that use the additional OpenMP 'requires'"
>> + " directive clauses %s. Therefore, the program might not "
>> + "run correctly", buf);
>> + }
>> + omp_requires_mask |= omp_req;
>> + }
> Both omp_requires_mask and num_devices are global vars that would be
> modified concurrently in some other thread, so the above is racy.
>
> What I'd do is int omp_req = 0; early, just the omp_req + target_data in
> if (GOMP_VERSION_LIB (version) > 1) otherwise. That computes
> the local omp_req only.
>
>> +
>> gomp_mutex_lock (®ister_lock);
> Then under the lock, you can do the merging.
> But, IMHO the runtime library should repeat what is done in the offloading
> lto1, diagnose if there are differences between the masks in between
> different TUs, here at runtime on the program/shared library level.
> And IMHO the error you emit above is unnecessary, because (at least
> hopefully) the num_devices computation / device initialization should
> only happen on behalf of some device construct or device related OpenMP API
> routine, so at that point the shared library or program that does that
> should have its own mask and if something is dlopened later, it should
> either have compatible mask (nothing is diagnosed) or incompatible, but then
> it should be diagnosed like any other incompatibilities.
OK – I will diagnose it always.
Question: If it is not the same, should there just be a message to
stderr (gomp_error) or should libgomp abort (gomp_fatal)?
Downside is that I cannot really provide much data where it fails. But
on the other hand, it will probably only rarely occur.
> I thought I've mentioned earlier it would be nice to rename the
> get_num_devices plugin hook because its API has changed, so that
> if one mixes old plugin with new libgomp or vice versa it doesn't
> break silently.
As discussed off list, gomp_load_plugin_for_device calls if
(device->version_func () != GOMP_VERSION) and we did bump the GOMP_VERSION.
Tobias
-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
OpenMP: Move omp requires checks to libgomp
Handle reverse_offload, unified_address, and unified_shared_memory
requirements in libgomp by saving them alongside the offload table.
When the device lto1 runs, it extracts the data for mkoffload. The
latter than passes the value on to GOMP_offload_register_ver.
lto1 (either the host one, with -flto [+ ENABLE_OFFLOADING], or in the
offload-device lto1) also does the the consistency check is done,
erroring out when the 'omp requires' clause use is inconsistent.
For all in-principle supported devices, if a requirement cannot be fulfilled,
the device is excluded from the (supported) devices list. Currently, none of
those requirements are marked as supported for any of the non-host devices.
gcc/c/ChangeLog:
* c-parser.cc (c_parser_omp_target_data, c_parser_omp_target_update,
c_parser_omp_target_enter_data, c_parser_omp_target_exit_data): Set
OMP_REQUIRES_TARGET_USED.
(c_parser_omp_requires): Remove sorry.
gcc/ChangeLog:
* config/gcn/mkoffload.cc (process_asm): Write '#include <stdint.h>'.
(process_obj): Pass omp_requires_mask to GOMP_offload_register_ver.
(main): Ask lto1 to obtain omp_requires_mask and pass it on.
* config/nvptx/mkoffload.cc (process, main): Likewise.
* lto-cgraph.cc (omp_requires_to_name): New.
(input_offload_tables): Save omp_requires_mask.
(output_offload_tables): Read it, check for consistency,
save value for mkoffload.
* omp-low.cc (lower_omp_target): Force output_offloadtables
call for OMP_REQUIRES_TARGET_USED.
gcc/cp/ChangeLog:
* parser.cc (cp_parser_omp_target_data,
cp_parser_omp_target_enter_data, cp_parser_omp_target_exit_data,
cp_parser_omp_target_update): Set OMP_REQUIRES_TARGET_USED.
(cp_parser_omp_requires): Remove sorry.
gcc/fortran/ChangeLog:
* openmp.cc (gfc_match_omp_requires): Remove sorry.
* parse.cc (decode_omp_directive): Don't regard 'declare target'
as target usage for 'omp requires'; add more flags to
omp_requires_mask.
include/ChangeLog:
* gomp-constants.h (GOMP_VERSION): Bump to 2.
(GOMP_REQUIRES_UNIFIED_ADDRESS, GOMP_REQUIRES_UNIFIED_SHARED_MEMORY,
GOMP_REQUIRES_REVERSE_OFFLOAD): New defines.
libgomp/ChangeLog:
* libgomp-plugin.h (GOMP_OFFLOAD_get_num_devices): Add
omp_requires_mask arg.
* plugin/plugin-gcn.c (GOMP_OFFLOAD_get_num_devices): Likewise;
return -1 when device available but omp_requires_mask != 0.
* plugin/plugin-nvptx.c (GOMP_OFFLOAD_get_num_devices): Likewise.
* oacc-host.c (host_get_num_devices, host_openacc_get_property):
Update call.
* oacc-init.c (resolve_device, acc_init_1, acc_shutdown_1,
goacc_attach_host_thread_to_device, acc_get_num_devices,
acc_set_device_num, get_property_any): Likewise.
* target.c (omp_requires_mask): New global var.
(gomp_requires_to_name): New.
(GOMP_offload_register_ver): Handle passed omp_requires_mask.
(gomp_target_init): Handle omp_requires_mask.
* libgomp.texi (OpenMP 5.0): Update requires impl. status.
(OpenMP 5.1): Add a missed item.
(OpenMP 5.2): Mark linear-clause change as supported in C/C++.
* testsuite/libgomp.c-c++-common/requires-1-aux.c: New test.
* testsuite/libgomp.c-c++-common/requires-1.c: New test.
* testsuite/libgomp.c-c++-common/requires-2-aux.c: New test.
* testsuite/libgomp.c-c++-common/requires-2.c: New test.
* testsuite/libgomp.c-c++-common/requires-3-aux.c: New test.
* testsuite/libgomp.c-c++-common/requires-3.c: New test.
* testsuite/libgomp.c-c++-common/requires-4-aux.c: New test.
* testsuite/libgomp.c-c++-common/requires-4.c: New test.
* testsuite/libgomp.c-c++-common/requires-5-aux.c: New test.
* testsuite/libgomp.c-c++-common/requires-5.c: New test.
* testsuite/libgomp.c-c++-common/requires-6.c: New test.
* testsuite/libgomp.c-c++-common/requires-7-aux.c: New test.
* testsuite/libgomp.c-c++-common/requires-7.c: New test.
* testsuite/libgomp.fortran/requires-1-aux.f90: New test.
* testsuite/libgomp.fortran/requires-1.f90: New test.
liboffloadmic/ChangeLog:
* plugin/libgomp-plugin-intelmic.cpp (GOMP_OFFLOAD_get_num_devices):
Return -1 when device available but omp_requires_mask != 0.
gcc/testsuite/ChangeLog:
* c-c++-common/gomp/requires-4.c: Update dg-*.
* c-c++-common/gomp/reverse-offload-1.c: Likewise.
* c-c++-common/gomp/target-device-ancestor-2.c: Likewise.
* c-c++-common/gomp/target-device-ancestor-3.c: Likewise.
* c-c++-common/gomp/target-device-ancestor-4.c: Likewise.
* c-c++-common/gomp/target-device-ancestor-5.c: Likewise.
* gfortran.dg/gomp/target-device-ancestor-3.f90: Likewise.
* gfortran.dg/gomp/target-device-ancestor-4.f90: Likewise.
* gfortran.dg/gomp/target-device-ancestor-5.f90: Likewise.
* gfortran.dg/gomp/target-device-ancestor-2.f90: Likewise. Move
post-FE checks to ...
* gfortran.dg/gomp/target-device-ancestor-2a.f90: ... this new file.
* gfortran.dg/gomp/requires-8.f90: Update as we don't regard
'declare target' for the 'requires' usage requirement.
Co-authored-by: Chung-Lin Tang <cltang@codesourcery.com>
Co-authored-by: Thomas Schwinge <thomas@codesourcery.com>
gcc/c/c-parser.cc | 19 ++++-
gcc/config/gcn/mkoffload.cc | 27 +++++-
gcc/config/nvptx/mkoffload.cc | 29 ++++++-
gcc/cp/parser.cc | 19 ++++-
gcc/fortran/openmp.cc | 4 -
gcc/fortran/parse.cc | 22 ++++-
gcc/lto-cgraph.cc | 99 +++++++++++++++++++++-
gcc/omp-low.cc | 5 ++
gcc/testsuite/c-c++-common/gomp/requires-4.c | 2 -
.../c-c++-common/gomp/reverse-offload-1.c | 2 +-
.../c-c++-common/gomp/target-device-ancestor-2.c | 10 +--
.../c-c++-common/gomp/target-device-ancestor-3.c | 2 +-
.../c-c++-common/gomp/target-device-ancestor-4.c | 4 +-
.../c-c++-common/gomp/target-device-ancestor-5.c | 2 +-
gcc/testsuite/gfortran.dg/gomp/requires-8.f90 | 14 ++-
.../gfortran.dg/gomp/target-device-ancestor-2.f90 | 70 ++-------------
.../gfortran.dg/gomp/target-device-ancestor-2a.f90 | 80 +++++++++++++++++
.../gfortran.dg/gomp/target-device-ancestor-3.f90 | 6 +-
.../gfortran.dg/gomp/target-device-ancestor-4.f90 | 6 +-
.../gfortran.dg/gomp/target-device-ancestor-5.f90 | 8 +-
include/gomp-constants.h | 8 +-
libgomp/libgomp-plugin.h | 2 +-
libgomp/libgomp.texi | 8 +-
libgomp/oacc-host.c | 4 +-
libgomp/oacc-init.c | 16 ++--
libgomp/plugin/plugin-gcn.c | 6 +-
libgomp/plugin/plugin-nvptx.c | 9 +-
libgomp/target.c | 64 +++++++++++++-
.../libgomp.c-c++-common/requires-1-aux.c | 11 +++
.../testsuite/libgomp.c-c++-common/requires-1.c | 24 ++++++
.../libgomp.c-c++-common/requires-2-aux.c | 9 ++
.../testsuite/libgomp.c-c++-common/requires-2.c | 25 ++++++
.../libgomp.c-c++-common/requires-3-aux.c | 11 +++
.../testsuite/libgomp.c-c++-common/requires-3.c | 24 ++++++
.../libgomp.c-c++-common/requires-4-aux.c | 13 +++
.../testsuite/libgomp.c-c++-common/requires-4.c | 23 +++++
.../libgomp.c-c++-common/requires-5-aux.c | 11 +++
.../testsuite/libgomp.c-c++-common/requires-5.c | 20 +++++
.../testsuite/libgomp.c-c++-common/requires-6.c | 17 ++++
.../libgomp.c-c++-common/requires-7-aux.c | 11 +++
.../testsuite/libgomp.c-c++-common/requires-7.c | 24 ++++++
.../testsuite/libgomp.fortran/requires-1-aux.f90 | 14 +++
libgomp/testsuite/libgomp.fortran/requires-1.f90 | 26 ++++++
liboffloadmic/plugin/libgomp-plugin-intelmic.cpp | 6 +-
44 files changed, 684 insertions(+), 132 deletions(-)
@@ -20915,6 +20915,10 @@ c_parser_omp_teams (location_t loc, c_parser *parser,
static tree
c_parser_omp_target_data (location_t loc, c_parser *parser, bool *if_p)
{
+ if (flag_openmp)
+ omp_requires_mask
+ = (enum omp_requires) (omp_requires_mask | OMP_REQUIRES_TARGET_USED);
+
tree clauses
= c_parser_omp_all_clauses (parser, OMP_TARGET_DATA_CLAUSE_MASK,
"#pragma omp target data");
@@ -21010,6 +21014,10 @@ c_parser_omp_target_update (location_t loc, c_parser *parser,
return false;
}
+ if (flag_openmp)
+ omp_requires_mask
+ = (enum omp_requires) (omp_requires_mask | OMP_REQUIRES_TARGET_USED);
+
tree stmt = make_node (OMP_TARGET_UPDATE);
TREE_TYPE (stmt) = void_type_node;
OMP_TARGET_UPDATE_CLAUSES (stmt) = clauses;
@@ -21057,6 +21065,10 @@ c_parser_omp_target_enter_data (location_t loc, c_parser *parser,
return true;
}
+ if (flag_openmp)
+ omp_requires_mask
+ = (enum omp_requires) (omp_requires_mask | OMP_REQUIRES_TARGET_USED);
+
tree clauses
= c_parser_omp_all_clauses (parser, OMP_TARGET_ENTER_DATA_CLAUSE_MASK,
"#pragma omp target enter data");
@@ -21143,6 +21155,10 @@ c_parser_omp_target_exit_data (location_t loc, c_parser *parser,
return true;
}
+ if (flag_openmp)
+ omp_requires_mask
+ = (enum omp_requires) (omp_requires_mask | OMP_REQUIRES_TARGET_USED);
+
tree clauses
= c_parser_omp_all_clauses (parser, OMP_TARGET_EXIT_DATA_CLAUSE_MASK,
"#pragma omp target exit data");
@@ -22763,9 +22779,6 @@ c_parser_omp_requires (c_parser *parser)
c_parser_skip_to_pragma_eol (parser, false);
return;
}
- if (p && this_req != OMP_REQUIRES_DYNAMIC_ALLOCATORS)
- sorry_at (cloc, "%qs clause on %<requires%> directive not "
- "supported yet", p);
if (p)
c_parser_consume_token (parser);
if (this_req)
@@ -611,6 +611,7 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
struct regcount *regcounts = XOBFINISH (®counts_os, struct regcount *);
fprintf (cfile, "#include <stdlib.h>\n");
+ fprintf (cfile, "#include <stdint.h>\n");
fprintf (cfile, "#include <stdbool.h>\n\n");
fprintf (cfile, "static const int gcn_num_vars = %d;\n\n", var_count);
@@ -664,7 +665,7 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
/* Embed an object file into a C source file. */
static void
-process_obj (FILE *in, FILE *cfile)
+process_obj (FILE *in, FILE *cfile, uint32_t omp_requires)
{
size_t len = 0;
const char *input = read_file (in, &len);
@@ -692,16 +693,18 @@ process_obj (FILE *in, FILE *cfile)
fprintf (cfile,
"static const struct gcn_image_desc {\n"
+ " uintptr_t omp_requires_mask;\n"
" const struct gcn_image *gcn_image;\n"
" unsigned kernel_count;\n"
" const struct hsa_kernel_description *kernel_infos;\n"
" unsigned global_variable_count;\n"
"} target_data = {\n"
+ " %d,\n"
" &gcn_image,\n"
" sizeof (gcn_kernels) / sizeof (gcn_kernels[0]),\n"
" gcn_kernels,\n"
" gcn_num_vars\n"
- "};\n\n");
+ "};\n\n", omp_requires);
fprintf (cfile,
"#ifdef __cplusplus\n"
@@ -1077,9 +1080,27 @@ main (int argc, char **argv)
unsetenv ("COMPILER_PATH");
unsetenv ("LIBRARY_PATH");
+ char *omp_requires_file;
+ if (save_temps)
+ omp_requires_file = concat (dumppfx, ".mkoffload.omp_requires", NULL);
+ else
+ omp_requires_file = make_temp_file (".mkoffload.omp_requires");
+
/* Run the compiler pass. */
+ xputenv (concat ("GCC_OFFLOAD_OMP_REQUIRES_FILE=", omp_requires_file, NULL));
fork_execute (cc_argv[0], CONST_CAST (char **, cc_argv), true, ".gcc_args");
obstack_free (&cc_argv_obstack, NULL);
+ unsetenv("GCC_OFFLOAD_OMP_REQUIRES_FILE");
+
+ in = fopen (omp_requires_file, "rb");
+ if (!in)
+ fatal_error (input_location, "cannot open omp_requires file %qs",
+ omp_requires_file);
+ uint32_t omp_requires;
+ if (fread (&omp_requires, sizeof (omp_requires), 1, in) != 1)
+ fatal_error (input_location, "cannot read omp_requires file %qs",
+ omp_requires_file);
+ fclose (in);
in = fopen (gcn_s1_name, "r");
if (!in)
@@ -1102,7 +1123,7 @@ main (int argc, char **argv)
if (!in)
fatal_error (input_location, "cannot open intermediate gcn obj file");
- process_obj (in, cfile);
+ process_obj (in, cfile, omp_requires);
fclose (in);
@@ -231,7 +231,7 @@ access_check (const char *name, int mode)
}
static void
-process (FILE *in, FILE *out)
+process (FILE *in, FILE *out, uint32_t omp_requires)
{
size_t len = 0;
const char *input = read_file (in, &len);
@@ -240,6 +240,8 @@ process (FILE *in, FILE *out)
unsigned obj_count = 0;
unsigned ix;
+ fprintf (out, "#include <stdint.h>\n\n");
+
/* Dump out char arrays for each PTX object file. These are
terminated by a NUL. */
for (size_t i = 0; i != len;)
@@ -309,6 +311,7 @@ process (FILE *in, FILE *out)
fprintf (out,
"static const struct nvptx_tdata {\n"
+ " uintptr_t omp_requires_mask;\n"
" const struct ptx_obj *ptx_objs;\n"
" unsigned ptx_num;\n"
" const char *const *var_names;\n"
@@ -316,12 +319,12 @@ process (FILE *in, FILE *out)
" const struct nvptx_fn *fn_names;\n"
" unsigned fn_num;\n"
"} target_data = {\n"
- " ptx_objs, sizeof (ptx_objs) / sizeof (ptx_objs[0]),\n"
+ " %d, ptx_objs, sizeof (ptx_objs) / sizeof (ptx_objs[0]),\n"
" var_mappings,"
" sizeof (var_mappings) / sizeof (var_mappings[0]),\n"
" func_mappings,"
" sizeof (func_mappings) / sizeof (func_mappings[0])\n"
- "};\n\n");
+ "};\n\n", omp_requires);
fprintf (out, "#ifdef __cplusplus\n"
"extern \"C\" {\n"
@@ -583,19 +586,37 @@ main (int argc, char **argv)
unsetenv ("COMPILER_PATH");
unsetenv ("LIBRARY_PATH");
+ char *omp_requires_file;
+ if (save_temps)
+ omp_requires_file = concat (dumppfx, ".mkoffload.omp_requires", NULL);
+ else
+ omp_requires_file = make_temp_file (".mkoffload.omp_requires");
+
+ xputenv (concat ("GCC_OFFLOAD_OMP_REQUIRES_FILE=", omp_requires_file, NULL));
fork_execute (new_argv[0], CONST_CAST (char **, new_argv), true,
".gcc_args");
obstack_free (&argv_obstack, NULL);
+ unsetenv("GCC_OFFLOAD_OMP_REQUIRES_FILE");
xputenv (concat ("GCC_EXEC_PREFIX=", execpath, NULL));
xputenv (concat ("COMPILER_PATH=", cpath, NULL));
xputenv (concat ("LIBRARY_PATH=", lpath, NULL));
+ in = fopen (omp_requires_file, "rb");
+ if (!in)
+ fatal_error (input_location, "cannot open omp_requires file %qs",
+ omp_requires_file);
+ uint32_t omp_requires;
+ if (fread (&omp_requires, sizeof (omp_requires), 1, in) != 1)
+ fatal_error (input_location, "cannot read omp_requires file %qs",
+ omp_requires_file);
+ fclose (in);
+
in = fopen (ptx_name, "r");
if (!in)
fatal_error (input_location, "cannot open intermediate ptx file");
- process (in, out);
+ process (in, out, omp_requires);
fclose (in);
}
@@ -44287,6 +44287,10 @@ cp_parser_omp_teams (cp_parser *parser, cp_token *pragma_tok,
static tree
cp_parser_omp_target_data (cp_parser *parser, cp_token *pragma_tok, bool *if_p)
{
+ if (flag_openmp)
+ omp_requires_mask
+ = (enum omp_requires) (omp_requires_mask | OMP_REQUIRES_TARGET_USED);
+
tree clauses
= cp_parser_omp_all_clauses (parser, OMP_TARGET_DATA_CLAUSE_MASK,
"#pragma omp target data", pragma_tok);
@@ -44390,6 +44394,10 @@ cp_parser_omp_target_enter_data (cp_parser *parser, cp_token *pragma_tok,
return true;
}
+ if (flag_openmp)
+ omp_requires_mask
+ = (enum omp_requires) (omp_requires_mask | OMP_REQUIRES_TARGET_USED);
+
tree clauses
= cp_parser_omp_all_clauses (parser, OMP_TARGET_ENTER_DATA_CLAUSE_MASK,
"#pragma omp target enter data", pragma_tok);
@@ -44481,6 +44489,10 @@ cp_parser_omp_target_exit_data (cp_parser *parser, cp_token *pragma_tok,
return true;
}
+ if (flag_openmp)
+ omp_requires_mask
+ = (enum omp_requires) (omp_requires_mask | OMP_REQUIRES_TARGET_USED);
+
tree clauses
= cp_parser_omp_all_clauses (parser, OMP_TARGET_EXIT_DATA_CLAUSE_MASK,
"#pragma omp target exit data", pragma_tok);
@@ -44567,6 +44579,10 @@ cp_parser_omp_target_update (cp_parser *parser, cp_token *pragma_tok,
return true;
}
+ if (flag_openmp)
+ omp_requires_mask
+ = (enum omp_requires) (omp_requires_mask | OMP_REQUIRES_TARGET_USED);
+
tree stmt = make_node (OMP_TARGET_UPDATE);
TREE_TYPE (stmt) = void_type_node;
OMP_TARGET_UPDATE_CLAUSES (stmt) = clauses;
@@ -46861,9 +46877,6 @@ cp_parser_omp_requires (cp_parser *parser, cp_token *pragma_tok)
cp_parser_skip_to_pragma_eol (parser, pragma_tok);
return false;
}
- if (p && this_req != OMP_REQUIRES_DYNAMIC_ALLOCATORS)
- sorry_at (cloc, "%qs clause on %<requires%> directive not "
- "supported yet", p);
if (p)
cp_lexer_consume_token (parser->lexer);
if (this_req)
@@ -5488,10 +5488,6 @@ gfc_match_omp_requires (void)
else
goto error;
- if (requires_clause & ~(OMP_REQ_ATOMIC_MEM_ORDER_MASK
- | OMP_REQ_DYNAMIC_ALLOCATORS))
- gfc_error_now ("Sorry, %qs clause at %L on REQUIRES directive is not "
- "yet supported", clause, &old_loc);
if (!gfc_omp_requires_add_clause (requires_clause, clause, &old_loc, NULL))
goto error;
requires_clauses |= requires_clause;
@@ -1168,7 +1168,8 @@ decode_omp_directive (void)
}
switch (ret)
{
- case ST_OMP_DECLARE_TARGET:
+ /* Set omp_target_seen; exclude ST_OMP_DECLARE_TARGET.
+ FIXME: Get clarification, cf. OpenMP Spec Issue #3240. */
case ST_OMP_TARGET:
case ST_OMP_TARGET_DATA:
case ST_OMP_TARGET_ENTER_DATA:
@@ -6879,11 +6880,14 @@ done:
/* Fixup for external procedures and resolve 'omp requires'. */
int omp_requires;
+ bool omp_target_seen;
omp_requires = 0;
+ omp_target_seen = false;
for (gfc_current_ns = gfc_global_ns_list; gfc_current_ns;
gfc_current_ns = gfc_current_ns->sibling)
{
omp_requires |= gfc_current_ns->omp_requires;
+ omp_target_seen |= gfc_current_ns->omp_target_seen;
gfc_check_externals (gfc_current_ns);
}
for (gfc_current_ns = gfc_global_ns_list; gfc_current_ns;
@@ -6908,6 +6912,22 @@ done:
break;
}
+ if (omp_target_seen)
+ omp_requires_mask = (enum omp_requires) (omp_requires_mask
+ | OMP_REQUIRES_TARGET_USED);
+ if (omp_requires & OMP_REQ_REVERSE_OFFLOAD)
+ omp_requires_mask = (enum omp_requires) (omp_requires_mask
+ | OMP_REQUIRES_REVERSE_OFFLOAD);
+ if (omp_requires & OMP_REQ_UNIFIED_ADDRESS)
+ omp_requires_mask = (enum omp_requires) (omp_requires_mask
+ | OMP_REQUIRES_UNIFIED_ADDRESS);
+ if (omp_requires & OMP_REQ_UNIFIED_SHARED_MEMORY)
+ omp_requires_mask
+ = (enum omp_requires) (omp_requires_mask
+ | OMP_REQUIRES_UNIFIED_SHARED_MEMORY);
+ if (omp_requires & OMP_REQ_DYNAMIC_ALLOCATORS)
+ omp_requires_mask = (enum omp_requires) (omp_requires_mask
+ | OMP_REQUIRES_DYNAMIC_ALLOCATORS);
/* Do the parse tree dump. */
gfc_current_ns = flag_dump_fortran_original ? gfc_global_ns_list : NULL;
@@ -37,6 +37,7 @@ along with GCC; see the file COPYING3. If not see
#include "pass_manager.h"
#include "ipa-utils.h"
#include "omp-offload.h"
+#include "omp-general.h"
#include "stringpool.h"
#include "attribs.h"
#include "alloc-pool.h"
@@ -1068,7 +1069,10 @@ read_string (class lto_input_block *ib)
void
output_offload_tables (void)
{
- if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty (offload_vars))
+ bool output_requires = (flag_openmp
+ && (omp_requires_mask & OMP_REQUIRES_TARGET_USED) != 0);
+ if (vec_safe_is_empty (offload_funcs) && vec_safe_is_empty (offload_vars)
+ && !output_requires)
return;
struct lto_simple_output_block *ob
@@ -1098,6 +1102,19 @@ output_offload_tables (void)
(*offload_vars)[i]);
}
+ if (output_requires)
+ {
+ HOST_WIDE_INT val = ((HOST_WIDE_INT) omp_requires_mask
+ & (OMP_REQUIRES_UNIFIED_ADDRESS
+ | OMP_REQUIRES_UNIFIED_SHARED_MEMORY
+ | OMP_REQUIRES_REVERSE_OFFLOAD
+ | OMP_REQUIRES_TARGET_USED));
+ /* (Mis)use LTO_symtab_edge for this variable. */
+ streamer_write_enum (ob->main_stream, LTO_symtab_tags,
+ LTO_symtab_last_tag, LTO_symtab_edge);
+ streamer_write_hwi_stream (ob->main_stream, val);
+ }
+
streamer_write_uhwi_stream (ob->main_stream, 0);
lto_destroy_simple_output_block (ob);
@@ -1764,6 +1781,20 @@ input_symtab (void)
}
}
+static void
+omp_requires_to_name (char *buf, size_t size, unsigned int requires_mask)
+{
+ char *end = buf + size, *p = buf;
+ if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS)
+ p += snprintf (p, end - p, "unified_address");
+ if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
+ p += snprintf (p, end - p, "%sunified_shared_memory",
+ (p == buf ? "" : ", "));
+ if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
+ p += snprintf (p, end - p, "%sreverse_offload",
+ (p == buf ? "" : ", "));
+}
+
/* Input function/variable tables that will allow libgomp to look up offload
target code, and store them into OFFLOAD_FUNCS and OFFLOAD_VARS. */
@@ -1773,6 +1804,10 @@ input_offload_tables (bool do_force_output)
struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
struct lto_file_decl_data *file_data;
unsigned int j = 0;
+ const char *requires_fn = NULL;
+ tree requires_decl = NULL_TREE;
+
+ omp_requires_mask = (omp_requires) 0;
while ((file_data = file_data_vec[j++]))
{
@@ -1784,6 +1819,7 @@ input_offload_tables (bool do_force_output)
if (!ib)
continue;
+ tree tmp_decl = NULL_TREE;
enum LTO_symtab_tags tag
= streamer_read_enum (ib, LTO_symtab_tags, LTO_symtab_last_tag);
while (tag)
@@ -1799,6 +1835,7 @@ input_offload_tables (bool do_force_output)
LTO mode. */
if (do_force_output)
cgraph_node::get (fn_decl)->mark_force_output ();
+ tmp_decl = fn_decl;
}
else if (tag == LTO_symtab_variable)
{
@@ -1810,6 +1847,54 @@ input_offload_tables (bool do_force_output)
may be no refs to var_decl in offload LTO mode. */
if (do_force_output)
varpool_node::get (var_decl)->force_output = 1;
+ tmp_decl = var_decl;
+ }
+ else if (tag == LTO_symtab_edge)
+ {
+ static bool error_emitted = false;
+ HOST_WIDE_INT val = streamer_read_hwi (ib);
+
+ if (omp_requires_mask == 0)
+ {
+ omp_requires_mask = (omp_requires) val;
+ requires_decl = tmp_decl;
+ requires_fn = file_data->file_name;
+ }
+ else if (omp_requires_mask != val && !error_emitted)
+ {
+ char buf[64], buf2[64];
+ omp_requires_to_name (buf, sizeof (buf), omp_requires_mask);
+ omp_requires_to_name (buf2, sizeof (buf2), val);
+ error ("OpenMP %<requires%> directive with non-identical "
+ "clauses in multiple compilation units: %qs vs. %qs",
+ buf, buf2);
+ if (requires_decl != NULL_TREE)
+ {
+ while (DECL_CONTEXT (requires_decl) != NULL_TREE
+ && TREE_CODE (requires_decl) != TRANSLATION_UNIT_DECL)
+ requires_decl = DECL_CONTEXT (requires_decl);
+ if (requires_decl != NULL_TREE)
+ inform (UNKNOWN_LOCATION, "%qs has %qs",
+ IDENTIFIER_POINTER (DECL_NAME (requires_decl)),
+ buf);
+ }
+ else
+ inform (UNKNOWN_LOCATION, "%qs has %qs", requires_fn, buf);
+ if (tmp_decl != NULL_TREE)
+ {
+ while (DECL_CONTEXT (tmp_decl) != NULL_TREE
+ && TREE_CODE (tmp_decl) != TRANSLATION_UNIT_DECL)
+ tmp_decl = DECL_CONTEXT (tmp_decl);
+ if (tmp_decl != NULL_TREE)
+ inform (UNKNOWN_LOCATION, "%qs has %qs",
+ IDENTIFIER_POINTER (DECL_NAME (tmp_decl)),
+ buf2);
+ }
+ else
+ inform (UNKNOWN_LOCATION, "%qs has %qs",
+ file_data->file_name, buf2);
+ error_emitted = true;
+ }
}
else
fatal_error (input_location,
@@ -1821,6 +1906,18 @@ input_offload_tables (bool do_force_output)
lto_destroy_simple_input_block (file_data, LTO_section_offload_table,
ib, data, len);
}
+#ifdef ACCEL_COMPILER
+ char *omp_requires_file = getenv ("GCC_OFFLOAD_OMP_REQUIRES_FILE");
+ if (omp_requires_file == NULL || omp_requires_file[0] == '\0')
+ fatal_error (input_location, "GCC_OFFLOAD_OMP_REQUIRES_FILE unset");
+ FILE *f = fopen (omp_requires_file, "wb");
+ if (!f)
+ fatal_error (input_location, "Cannot open omp_requires file %qs",
+ omp_requires_file);
+ uint32_t req_mask = omp_requires_mask & ~OMP_REQUIRES_TARGET_USED;
+ fwrite (&req_mask, sizeof (req_mask), 1, f);
+ fclose (f);
+#endif
}
/* True when we need optimization summary for NODE. */
@@ -12701,6 +12701,11 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
gcc_unreachable ();
}
+ /* Ensure that requires map is written via output_offload_tables, even if only
+ 'target (enter/exit) data' is used in the translation unit. */
+ if (ENABLE_OFFLOADING && (omp_requires_mask & OMP_REQUIRES_TARGET_USED))
+ g->have_offload = true;
+
clauses = gimple_omp_target_clauses (stmt);
gimple_seq dep_ilist = NULL;
@@ -9,5 +9,3 @@ foo (void)
#pragma omp requires unified_shared_memory /* { dg-error "'unified_shared_memory' clause used lexically after first target construct or offloading API" } */
#pragma omp requires unified_address /* { dg-error "'unified_address' clause used lexically after first target construct or offloading API" } */
#pragma omp requires reverse_offload /* { dg-error "'reverse_offload' clause used lexically after first target construct or offloading API" } */
-
-/* { dg-prune-output "not supported yet" } */
@@ -43,7 +43,7 @@ tg_fn (int *x, int *y)
x2 = x2 + 2 + called_in_target1 ();
y2 = y2 + 7;
- #pragma omp target device(ancestor : 1) map(tofrom: x2)
+ #pragma omp target device(ancestor : 1) map(tofrom: x2) /* { dg-message "sorry, unimplemented: 'ancestor' not yet supported" } */
check_offload(&x2, &y2);
if (x2 != 2+2+3+42 || y2 != 3 + 7)
@@ -1,13 +1,11 @@
/* { dg-do compile } */
-#pragma omp requires reverse_offload /* { dg-message "sorry, unimplemented: 'reverse_offload' clause on 'requires' directive not supported yet" } */
+#pragma omp requires reverse_offload
void
foo (int n)
{
- /* The following test is marked with 'xfail' because a previous 'sorry' from
- 'reverse_offload' suppresses the 'sorry' for 'ancestor'. */
- #pragma omp target device (ancestor: 1) /* { dg-message "" "sorry, unimplemented: 'ancestor' not yet supported" { xfail *-*-* } } */
+ #pragma omp target device (ancestor: 1)
;
@@ -19,9 +17,9 @@ foo (int n)
#pragma omp target device (ancestor : 42) /* { dg-error "the 'device' clause expression must evaluate to '1'" } */
;
- #pragma omp target device (ancestor : n) /* { dg-message "" "sorry, unimplemented: 'ancestor' not yet supported" { xfail *-*-* } } */
+ #pragma omp target device (ancestor : n)
;
- #pragma omp target device (ancestor : n + 1) /* { dg-message "" "sorry, unimplemented: 'ancestor' not yet supported" { xfail *-*-* } } */
+ #pragma omp target device (ancestor : n + 1)
;
@@ -11,7 +11,7 @@ int bar (void);
/* { dg-do compile } */
-#pragma omp requires reverse_offload /* { dg-message "sorry, unimplemented: 'reverse_offload' clause on 'requires' directive not supported yet" } */
+#pragma omp requires reverse_offload
void
foo (void)
@@ -4,12 +4,12 @@
/* Test to ensure that device-modifier 'ancestor' is parsed correctly in
device clauses. */
-#pragma omp requires reverse_offload /* { dg-message "sorry, unimplemented: 'reverse_offload' clause on 'requires' directive not supported yet" } */
+#pragma omp requires reverse_offload
void
foo (void)
{
- #pragma omp target device (ancestor: 1) /* { dg-message "" "sorry, unimplemented: 'ancestor' not yet supported" { xfail *-*-* } } */
+ #pragma omp target device (ancestor: 1) /* { dg-message "sorry, unimplemented: 'ancestor' not yet supported" } */
;
}
@@ -1,4 +1,4 @@
-#pragma omp requires reverse_offload /* { dg-message "sorry, unimplemented: 'reverse_offload' clause on 'requires' directive not supported yet" } */
+#pragma omp requires reverse_offload
void
foo ()
@@ -1,3 +1,7 @@
+module m0
+ integer :: x
+end module m0
+
module m ! { dg-error "has OpenMP device constructs/routines but does not set !.OMP REQUIRES UNIFIED_SHARED_MEMORY but other program units do" }
!$omp requires reverse_offload
contains
@@ -13,10 +17,14 @@ contains
end subroutine foo
end module m
-subroutine bar ! { dg-error "has OpenMP device constructs/routines but does not set !.OMP REQUIRES REVERSE_OFFLOAD but other program units do" }
+subroutine bar
!use m
- !$omp requires unified_shared_memory
+ !$omp requires unified_shared_memory ! Possibly OK - needs OpenMP Lang Spec clarification (-> #3240)
!$omp declare target
end subroutine bar
-! { dg-prune-output "not yet supported" }
+subroutine foobar ! { dg-error "has OpenMP device constructs/routines but does not set !.OMP REQUIRES REVERSE_OFFLOAD but other program units do" }
+ use m0
+ !$omp requires unified_shared_memory
+ !$omp target enter data map(to:x)
+end subroutine foobar
@@ -4,19 +4,16 @@ implicit none
integer :: a, b, c
-!$omp requires reverse_offload ! { dg-error "Sorry, 'reverse_offload' clause at \\(1\\) on REQUIRES directive is not yet supported" }
+!$omp requires reverse_offload
-! The following test case is marked with 'xfail' because a previous 'sorry' from
-! 'reverse_offload' suppresses the 'sorry' for 'ancestor'.
-
-!$omp target device (ancestor: 1) ! { dg-message "" "sorry, unimplemented: 'ancestor' not yet supported" { xfail *-*-* } }
+!$omp target device (ancestor: 1)
!$omp end target
-!$omp target device (ancestor : a) ! { dg-message "" "sorry, unimplemented: 'ancestor' not yet supported" { xfail *-*-* } }
+!$omp target device (ancestor : a)
!$omp end target
-!$omp target device (ancestor : a + 1) ! { dg-message "" "sorry, unimplemented: 'ancestor' not yet supported" { xfail *-*-* } }
+!$omp target device (ancestor : a + 1)
!$omp end target
@@ -32,61 +29,4 @@ integer :: a, b, c
!$omp target device (42)
!$omp end target
-
-! Ensure that no OpenMP constructs appear inside target regions with 'ancestor'.
-! The following test case is marked with 'xfail' because a previous 'sorry' from
-! 'reverse_offload' suppresses the 'sorry' for 'ancestor'.
-
-!$omp target device (ancestor: 1)
- !$omp teams ! { dg-error "" "OpenMP constructs are not allowed in target region with 'ancestor'" { xfail *-*-* } }
- !$omp end teams
-!$omp end target
-
-!$omp target device (device_num: 1)
- !$omp teams
- !$omp end teams
-!$omp end target
-
-!$omp target device (1)
- !$omp teams
- !$omp end teams
-!$omp end target
-
-
-! Ensure that with 'ancestor' only the 'device', 'firstprivate', 'private',
-! 'defaultmap', and 'map' clauses appear on the construct.
-! The following test case is marked with 'xfail' because a previous 'sorry' from
-! 'reverse_offload' suppresses the 'sorry' for 'ancestor'.
-
-!$omp target nowait device (ancestor: 1) ! { dg-error "" "with 'ancestor', only the 'device', 'firstprivate', 'private', 'defaultmap', and 'map' clauses may appear on the construct" { xfail *-*-* } }
-!$omp end target
-
-!$omp target device (ancestor: 1) nowait ! { dg-error "" "with 'ancestor', only the 'device', 'firstprivate', 'private', 'defaultmap', and 'map' clauses may appear on the construct" { xfail *-*-* } }
-!$omp end target
-
-!$omp target nowait device (device_num: 1)
-!$omp end target
-
-!$omp target nowait device (1)
-!$omp end target
-
-!$omp target device (ancestor: 1) firstprivate (a) private (b) defaultmap (none) map (c)
-!$omp end target
-
-
-! Ensure that 'ancestor' is only used with 'target' constructs (not with
-! 'target data', 'target update' etc.).
-! The following test case is marked with 'xfail' because a previous 'sorry' from
-! 'reverse_offload' suppresses the 'sorry' for 'ancestor'.
-
-!$omp target data map (a) device (ancestor: 1) ! { dg-error "" "'device' clause with 'ancestor' is only allowed on 'target' construct" { xfail *-*-* } }
-!$omp end target data
-
-!$omp target enter data map (to: a) device (ancestor: 1) ! { dg-error "" "'device' clause with 'ancestor' is only allowed on 'target' construct" { xfail *-*-* } }
-!$omp target exit data map (from: a) device (ancestor: 1) ! { dg-error "" "'device' clause with 'ancestor' is only allowed on 'target' construct" { xfail *-*-* } }
-
-!$omp target update to (a) device (ancestor: 1) ! { dg-error "'device' clause with 'ancestor' is only allowed on 'target' construct" "" { xfail *-*-* } }
-! { dg-error "with 'ancestor', only the 'device', 'firstprivate', 'private', 'defaultmap', and 'map' clauses may appear on the construct" "" { xfail *-*-* } .-1 }
-
-
-end
\ No newline at end of file
+end
new file mode 100644
@@ -0,0 +1,80 @@
+! { dg-do compile }
+
+implicit none
+
+integer :: a, b, c
+
+!$omp requires reverse_offload
+
+!$omp target device (ancestor: 1)
+!$omp end target
+
+!$omp target device (ancestor : a)
+!$omp end target
+
+!$omp target device (ancestor : a + 1)
+!$omp end target
+
+
+!$omp target device (device_num:42)
+!$omp end target
+
+!$omp target device (42)
+!$omp end target
+
+
+! Ensure that no OpenMP constructs appear inside target regions with 'ancestor'.
+
+!$omp target device (ancestor: 1)
+ !$omp teams ! { dg-error "OpenMP constructs are not allowed in target region with 'ancestor'" }
+ !$omp end teams
+!$omp end target
+
+!$omp target device (device_num: 1)
+ !$omp teams
+ !$omp end teams
+!$omp end target
+
+!$omp target device (1)
+ !$omp teams
+ !$omp end teams
+!$omp end target
+
+
+! Ensure that with 'ancestor' only the 'device', 'firstprivate', 'private',
+! 'defaultmap', and 'map' clauses appear on the construct.
+
+!$omp target nowait device (ancestor: 1) ! { dg-error "with 'ancestor', only the 'device', 'firstprivate', 'private', 'defaultmap', and 'map' clauses may appear on the construct" }
+!$omp end target
+
+!$omp target device (ancestor: 1) nowait ! { dg-error "with 'ancestor', only the 'device', 'firstprivate', 'private', 'defaultmap', and 'map' clauses may appear on the construct" }
+!$omp end target
+
+!$omp target nowait device (device_num: 1)
+!$omp end target
+
+!$omp target nowait device (1)
+!$omp end target
+
+!$omp target device (ancestor: 1) firstprivate (a) private (b) defaultmap (none) map (c)
+!$omp end target
+
+
+! Ensure that 'ancestor' is only used with 'target' constructs (not with
+! 'target data', 'target update' etc.).
+! The following test case is marked with 'xfail' because a previous 'sorry' from
+! 'reverse_offload' suppresses the 'sorry' for 'ancestor'.
+
+!$omp target data map (a) device (ancestor: 1) ! { dg-error "'device' clause with 'ancestor' is only allowed on 'target' construct" }
+!$omp end target data
+
+!$omp target enter data map (to: a) device (ancestor: 1) ! { dg-error "'device' clause with 'ancestor' is only allowed on 'target' construct" }
+!$omp target exit data map (from: a) device (ancestor: 1) ! { dg-error "'device' clause with 'ancestor' is only allowed on 'target' construct" }
+
+!$omp target update to (a) device (ancestor: 1) ! { dg-error "'device' clause with 'ancestor' is only allowed on 'target' construct" }
+
+!$omp target device (ancestor: 1) if(.false.)
+! { dg-error "with 'ancestor', only the 'device', 'firstprivate', 'private', 'defaultmap', and 'map' clauses may appear on the construct" "" { target *-*-* } .-1 }
+!$omp end target
+
+end
@@ -16,10 +16,10 @@ subroutine f1 ()
implicit none
integer :: n
- !$omp requires reverse_offload ! { dg-error "Sorry, 'reverse_offload' clause at \\(1\\) on REQUIRES directive is not yet supported" }
+ !$omp requires reverse_offload
!$omp target device (ancestor : 1)
- n = omp_get_thread_num () ! { dg-error "" "OpenMP runtime API call 'omp_get_thread_num' in a region with 'device\\(ancestor\\)' clause" { xfail *-*-* } }
+ n = omp_get_thread_num () ! { dg-error "OpenMP runtime API call 'omp_get_thread_num' in a region with 'device\\(ancestor\\)' clause" }
!$omp end target
!$omp target device (device_num : 1)
@@ -30,4 +30,4 @@ subroutine f1 ()
n = omp_get_thread_num ()
!$omp end target
-end
\ No newline at end of file
+end
@@ -4,11 +4,11 @@
! Test to ensure that device-modifier 'ancestor' is parsed correctly in
! device clauses.
-!$omp requires reverse_offload ! { dg-error "Sorry, 'reverse_offload' clause at \\(1\\) on REQUIRES directive is not yet supported" }
+!$omp requires reverse_offload
-!$omp target device (ancestor : 1) ! { dg-message "" "sorry, unimplemented: 'ancestor' not yet supported" { xfail *-*-* } }
+!$omp target device (ancestor : 1) ! { dg-message "sorry, unimplemented: 'ancestor' not yet supported" }
!$omp end target
end
-! TODO: dg-final { scan-tree-dump-times "pragma omp target \[^\n\r)]*device\\(ancestor:1\\)" 1 "original" } }
+! { dg-final { scan-tree-dump-times "pragma omp target \[^\n\r)]*device\\(ancestor:1\\)" 1 "original" } }
@@ -6,7 +6,7 @@
!
module m
- !$omp requires reverse_offload ! { dg-error "REQUIRES directive is not yet supported" }
+ !$omp requires reverse_offload
contains
subroutine foo()
!$omp target device(ancestor:1)
@@ -17,7 +17,7 @@ contains
block
block
block
- !$omp target device(ancestor:1)
+ !$omp target device(ancestor:1) ! { dg-message "sorry, unimplemented: 'ancestor' not yet supported" }
!$omp end target
end block
end block
@@ -26,7 +26,7 @@ contains
end module m
subroutine foo()
- !$omp requires reverse_offload ! { dg-error "REQUIRES directive is not yet supported" }
+ !$omp requires reverse_offload
block
block
block
@@ -49,7 +49,7 @@ contains
end subroutine foo
program main
- !$omp requires reverse_offload ! { dg-error "REQUIRES directive is not yet supported" }
+ !$omp requires reverse_offload
contains
subroutine foo()
!$omp target device(ancestor:1)
@@ -282,7 +282,7 @@ enum gomp_map_kind
/* Versions of libgomp and device-specific plugins. GOMP_VERSION
should be incremented whenever an ABI-incompatible change is introduced
to the plugin interface defined in libgomp/libgomp.h. */
-#define GOMP_VERSION 1
+#define GOMP_VERSION 2
#define GOMP_VERSION_NVIDIA_PTX 1
#define GOMP_VERSION_INTEL_MIC 0
#define GOMP_VERSION_GCN 2
@@ -341,6 +341,12 @@ enum gomp_map_kind
#define GOMP_DEPEND_MUTEXINOUTSET 4
#define GOMP_DEPEND_INOUTSET 5
+/* Flag values for requires-directive features, must match corresponding
+ OMP_REQUIRES_* values in gcc/omp-general.h. */
+#define GOMP_REQUIRES_UNIFIED_ADDRESS 0x10
+#define GOMP_REQUIRES_UNIFIED_SHARED_MEMORY 0x20
+#define GOMP_REQUIRES_REVERSE_OFFLOAD 0x80
+
/* HSA specific data structures. */
/* Identifiers of device-specific target arguments. */
@@ -125,7 +125,7 @@ extern void GOMP_PLUGIN_fatal (const char *, ...)
extern const char *GOMP_OFFLOAD_get_name (void);
extern unsigned int GOMP_OFFLOAD_get_caps (void);
extern int GOMP_OFFLOAD_get_type (void);
-extern int GOMP_OFFLOAD_get_num_devices (void);
+extern int GOMP_OFFLOAD_get_num_devices (unsigned int);
extern bool GOMP_OFFLOAD_init_device (int);
extern bool GOMP_OFFLOAD_fini_device (int);
extern unsigned GOMP_OFFLOAD_version (void);
@@ -189,8 +189,8 @@ The OpenMP 4.5 specification is fully supported.
env variable @tab Y @tab
@item Nested-parallel changes to @emph{max-active-levels-var} ICV @tab Y @tab
@item @code{requires} directive @tab P
- @tab Only fulfillable requirement are @code{atomic_default_mem_order}
- and @code{dynamic_allocators}
+ @tab complete but no non-host devices provides @code{unified_address},
+ @code{unified_shared_memory} or @code{reverse_offload}
@item @code{teams} construct outside an enclosing target region @tab Y @tab
@item Non-rectangular loop nests @tab Y @tab
@item @code{!=} as relational-op in canonical loop form for C/C++ @tab Y @tab
@@ -344,6 +344,8 @@ The OpenMP 4.5 specification is fully supported.
@item @code{unconstrained} and @code{reproducible} modifiers on @code{order}
clause @tab Y @tab
@item Support @code{begin/end declare target} syntax in C/C++ @tab N @tab
+@item Pointer predetermined firstprivate getting initialized
+to address of matching mapped list item per 5.1, Sect. 2.21.7.2 @tab N @tab
@end multitable
@@ -361,7 +363,7 @@ The OpenMP 4.5 specification is fully supported.
@item Clauses on @code{end} directive can be on directive @tab N @tab
@item Deprecation of no-argument @code{destroy} clause on @code{depobj}
@tab N @tab
-@item @code{linear} clause syntax changes and @code{step} modifier @tab N @tab
+@item @code{linear} clause syntax changes and @code{step} modifier @tab P @tab only C/C++
@item Deprecation of minus operator for reductions @tab N @tab
@item Deprecation of separating @code{map} modifiers without comma @tab N @tab
@item @code{declare mapper} with iterator and @code{present} modifiers
@@ -54,7 +54,7 @@ host_get_type (void)
}
static int
-host_get_num_devices (void)
+host_get_num_devices (unsigned int omp_requires_mask __attribute__((unused)))
{
return 1;
}
@@ -229,7 +229,7 @@ host_openacc_get_property (int n, enum goacc_property prop)
{
union goacc_property_value nullval = { .val = 0 };
- if (n >= host_get_num_devices ())
+ if (n >= host_get_num_devices (0))
return nullval;
switch (prop)
@@ -148,7 +148,7 @@ resolve_device (acc_device_t d, bool fail_is_error)
if (dispatchers[d]
&& !strcasecmp (goacc_device_type,
get_openacc_name (dispatchers[d]->name))
- && dispatchers[d]->get_num_devices_func () > 0)
+ && dispatchers[d]->get_num_devices_func (0) > 0)
goto found;
if (fail_is_error)
@@ -169,7 +169,7 @@ resolve_device (acc_device_t d, bool fail_is_error)
case acc_device_not_host:
/* Find the first available device after acc_device_not_host. */
while (known_device_type_p (++d))
- if (dispatchers[d] && dispatchers[d]->get_num_devices_func () > 0)
+ if (dispatchers[d] && dispatchers[d]->get_num_devices_func (0) > 0)
goto found;
if (d_arg == acc_device_default)
{
@@ -302,7 +302,7 @@ acc_init_1 (acc_device_t d, acc_construct_t parent_construct, int implicit)
base_dev = resolve_device (d, true);
- ndevs = base_dev->get_num_devices_func ();
+ ndevs = base_dev->get_num_devices_func (0);
if (ndevs <= 0 || goacc_device_num >= ndevs)
acc_dev_num_out_of_range (d, goacc_device_num, ndevs);
@@ -351,7 +351,7 @@ acc_shutdown_1 (acc_device_t d)
/* Get the base device for this device type. */
base_dev = resolve_device (d, true);
- ndevs = base_dev->get_num_devices_func ();
+ ndevs = base_dev->get_num_devices_func (0);
/* Unload all the devices of this type that have been opened. */
for (i = 0; i < ndevs; i++)
@@ -520,7 +520,7 @@ goacc_attach_host_thread_to_device (int ord)
base_dev = cached_base_dev;
}
- num_devices = base_dev->get_num_devices_func ();
+ num_devices = base_dev->get_num_devices_func (0);
if (num_devices <= 0 || ord >= num_devices)
acc_dev_num_out_of_range (acc_device_type (base_dev->type), ord,
num_devices);
@@ -599,7 +599,7 @@ acc_get_num_devices (acc_device_t d)
if (!acc_dev)
return 0;
- n = acc_dev->get_num_devices_func ();
+ n = acc_dev->get_num_devices_func (0);
if (n < 0)
n = 0;
@@ -779,7 +779,7 @@ acc_set_device_num (int ord, acc_device_t d)
cached_base_dev = base_dev = resolve_device (d, true);
- num_devices = base_dev->get_num_devices_func ();
+ num_devices = base_dev->get_num_devices_func (0);
if (num_devices <= 0 || ord >= num_devices)
acc_dev_num_out_of_range (d, ord, num_devices);
@@ -814,7 +814,7 @@ get_property_any (int ord, acc_device_t d, acc_device_property_t prop)
struct gomp_device_descr *dev = resolve_device (d, true);
- int num_devices = dev->get_num_devices_func ();
+ int num_devices = dev->get_num_devices_func (0);
if (num_devices <= 0 || ord >= num_devices)
acc_dev_num_out_of_range (d, ord, num_devices);
@@ -3221,10 +3221,14 @@ GOMP_OFFLOAD_version (void)
/* Return the number of GCN devices on the system. */
int
-GOMP_OFFLOAD_get_num_devices (void)
+GOMP_OFFLOAD_get_num_devices (unsigned int omp_requires_mask)
{
if (!init_hsa_context ())
return 0;
+ /* Return -1 if no omp_requires_mask cannot be fulfilled but
+ devices were present. */
+ if (hsa_context.agent_count > 0 && omp_requires_mask != 0)
+ return -1;
return hsa_context.agent_count;
}
@@ -1175,9 +1175,14 @@ GOMP_OFFLOAD_get_type (void)
}
int
-GOMP_OFFLOAD_get_num_devices (void)
+GOMP_OFFLOAD_get_num_devices (unsigned int omp_requires_mask)
{
- return nvptx_get_num_devices ();
+ int num_devices = nvptx_get_num_devices ();
+ /* Return -1 if no omp_requires_mask cannot be fulfilled but
+ devices were present. */
+ if (num_devices > 0 && omp_requires_mask != 0)
+ return -1;
+ return num_devices;
}
bool
@@ -36,6 +36,7 @@
# include <inttypes.h> /* For PRIu64. */
#endif
#include <string.h>
+#include <stdio.h> /* For snprintf. */
#include <assert.h>
#include <errno.h>
@@ -98,6 +99,9 @@ static int num_devices;
/* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
static int num_devices_openmp;
+/* OpenMP requires mask. */
+static int omp_requires_mask;
+
/* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
static void *
@@ -2314,6 +2318,20 @@ gomp_unload_image_from_device (struct gomp_device_descr *devicep,
}
}
+static void
+gomp_requires_to_name (char *buf, size_t size, int requires_mask)
+{
+ char *end = buf + size, *p = buf;
+ if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS)
+ p += snprintf (p, end - p, "unified_address");
+ if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY)
+ p += snprintf (p, end - p, "%sunified_shared_memory",
+ (p == buf ? "" : ", "));
+ if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD)
+ p += snprintf (p, end - p, "%sreverse_offload",
+ (p == buf ? "" : ", "));
+}
+
/* This function should be called from every offload image while loading.
It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
the target, and TARGET_DATA needed by target plugin. */
@@ -2323,11 +2341,29 @@ GOMP_offload_register_ver (unsigned version, const void *host_table,
int target_type, const void *target_data)
{
int i;
+ int omp_req = omp_requires_mask;
if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
gomp_fatal ("Library too old for offload (version %u < %u)",
GOMP_VERSION, GOMP_VERSION_LIB (version));
-
+
+ if (GOMP_VERSION_LIB (version) > 1)
+ {
+ omp_req = (int) (size_t) ((void **) target_data)[0];
+ target_data = &((void **) target_data)[1];
+ if (num_devices && (omp_req & ~omp_requires_mask))
+ {
+ char buf[64];
+ gomp_requires_to_name (buf, sizeof (buf),
+ omp_req & ~omp_requires_mask);
+ gomp_error ("devices already initialized when registering additional "
+ "offload images that use the additional OpenMP 'requires'"
+ " directive clauses %s. Therefore, the program might not "
+ "run correctly", buf);
+ }
+ omp_requires_mask |= omp_req;
+ }
+
gomp_mutex_lock (®ister_lock);
/* Load image to all initialized devices. */
@@ -4125,8 +4161,30 @@ gomp_target_init (void)
if (gomp_load_plugin_for_device (¤t_device, plugin_name))
{
- new_num_devs = current_device.get_num_devices_func ();
- if (new_num_devs >= 1)
+ new_num_devs
+ = current_device.get_num_devices_func (omp_requires_mask);
+ if (new_num_devs < 0)
+ {
+ bool found = false;
+ int type = current_device.get_type_func ();
+ for (int img = 0; img < num_offload_images; img++)
+ if (type == offload_images[img].type)
+ found = true;
+ if (found)
+ {
+ char buf[64];
+ gomp_requires_to_name (buf, sizeof (buf),
+ omp_requires_mask);
+ char *name = (char *) malloc (cur_len + 1);
+ memcpy (name, cur, cur_len);
+ name[cur_len] = '\0';
+ GOMP_PLUGIN_error ("note: %s devices present but 'omp "
+ "requires %s' cannot be fulfilled",
+ name, buf);
+ free (name);
+ }
+ }
+ else if (new_num_devs >= 1)
{
/* Augment DEVICES and NUM_DEVICES. */
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-skip-if "" { *-*-* } } */
+
+#pragma omp requires unified_address
+
+int x;
+
+void foo (void)
+{
+ #pragma omp target
+ x = 1;
+}
new file mode 100644
@@ -0,0 +1,24 @@
+/* { dg-do link { target { offload_target_nvptx || offload_target_amdgcn } } } */
+/* { dg-additional-sources requires-1-aux.c } */
+
+/* Check diagnostic by device-compiler's lto1.
+ Other file uses: 'requires unified_address'. */
+
+#pragma omp requires unified_shared_memory
+
+int a[10];
+extern void foo (void);
+
+int
+main (void)
+{
+ #pragma omp target
+ for (int i = 0; i < 10; i++)
+ a[i] = 0;
+
+ foo ();
+ return 0;
+}
+
+/* { dg-error "OpenMP 'requires' directive with non-identical clauses in multiple compilation units: 'unified_shared_memory' vs. 'unified_address'" "" { target *-*-* } 0 } */
+/* { dg-excess-errors "Ignore messages like: errors during merging of translation units|mkoffload returned 1 exit status" } */
new file mode 100644
@@ -0,0 +1,9 @@
+/* { dg-skip-if "" { *-*-* } } */
+
+int x;
+
+void foo (void)
+{
+ #pragma omp target
+ x = 1;
+}
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do link { target offloading_enabled } } */
+/* { dg-additional-options "-foffload=disable -flto" } */
+/* { dg-additional-sources requires-2-aux.c } */
+
+/* Check diagnostic by host's lto1.
+ Other file does not have any 'omp requires'. */
+
+#pragma omp requires unified_shared_memory
+
+int a[10];
+extern void foo (void);
+
+int
+main (void)
+{
+ #pragma omp target
+ for (int i = 0; i < 10; i++)
+ a[i] = 0;
+
+ foo ();
+ return 0;
+}
+
+/* { dg-error "OpenMP 'requires' directive with non-identical clauses in multiple compilation units: 'unified_shared_memory' vs. ''" "" { target *-*-* } 0 } */
+/* { dg-excess-errors "Ignore messages like: errors during merging of translation units|mkoffload returned 1 exit status" } */
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-skip-if "" { *-*-* } } */
+
+#pragma omp requires unified_address
+
+int x;
+
+void foo (void)
+{
+ #pragma omp target
+ x = 1;
+}
new file mode 100644
@@ -0,0 +1,24 @@
+/* { dg-do link { target offloading_enabled } } */
+/* { dg-additional-sources requires-3-aux.c } */
+
+/* Check diagnostic by device-compiler's lto1.
+ Other file uses: 'requires unified_address'. */
+
+#pragma omp requires unified_address,unified_shared_memory
+
+int a[10];
+extern void foo (void);
+
+int
+main (void)
+{
+ #pragma omp target
+ for (int i = 0; i < 10; i++)
+ a[i] = 0;
+
+ foo ();
+ return 0;
+}
+
+/* { dg-error "OpenMP 'requires' directive with non-identical clauses in multiple compilation units: 'unified_address, unified_shared_memory' vs. 'unified_address'" "" { target *-*-* } 0 } */
+/* { dg-excess-errors "Ignore messages like: errors during merging of translation units|mkoffload returned 1 exit status" } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-skip-if "" { *-*-* } } */
+
+#pragma omp requires reverse_offload
+
+/* Note: The file does not have neither of:
+ declare target directives, device constructs or device routines. */
+
+int x;
+
+void foo (void)
+{
+ x = 1;
+}
new file mode 100644
@@ -0,0 +1,23 @@
+/* { dg-do link { target offloading_enabled } } */
+/* { dg-additional-options "-flto" } */
+/* { dg-additional-sources requires-4-aux.c } */
+
+/* Check diagnostic by device-compiler's or host compiler's lto1.
+ Other file uses: 'requires reverse_offload', but that's inactive as
+ there are no declare target directives, device constructs nor device routines */
+
+#pragma omp requires unified_address,unified_shared_memory
+
+int a[10];
+extern void foo (void);
+
+int
+main (void)
+{
+ #pragma omp target
+ for (int i = 0; i < 10; i++)
+ a[i] = 0;
+
+ foo ();
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-skip-if "" { *-*-* } } */
+
+#pragma omp requires unified_shared_memory, unified_address, reverse_offload
+
+int x;
+
+void foo (void)
+{
+ #pragma omp target
+ x = 1;
+}
new file mode 100644
@@ -0,0 +1,20 @@
+/* { dg-do run { target { offload_target_nvptx || offload_target_amdgcn } } } */
+/* { dg-additional-sources requires-5-aux.c } */
+
+#pragma omp requires unified_shared_memory, unified_address, reverse_offload
+
+int a[10];
+extern void foo (void);
+
+int
+main (void)
+{
+ #pragma omp target
+ for (int i = 0; i < 10; i++)
+ a[i] = 0;
+
+ foo ();
+ return 0;
+}
+
+/* { dg-output "devices present but 'omp requires unified_address, unified_shared_memory, reverse_offload' cannot be fulfilled" } */
new file mode 100644
@@ -0,0 +1,17 @@
+#pragma omp requires unified_shared_memory, unified_address, reverse_offload
+
+/* The requires line is not active as there is none of:
+ declare target directives, device constructs or device routines.
+ Thus, this code is expected to work everywhere. */
+
+int a[10];
+extern void foo (void);
+
+int
+main (void)
+{
+ for (int i = 0; i < 10; i++)
+ a[i] = 0;
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-skip-if "" { *-*-* } } */
+
+#pragma omp requires unified_address
+
+int x;
+
+void foo (void)
+{
+ x = 1;
+ #pragma omp target enter data map(always,to: x)
+}
new file mode 100644
@@ -0,0 +1,24 @@
+/* { dg-do link { target { offload_target_nvptx || offload_target_amdgcn } } } */
+/* { dg-additional-sources requires-7-aux.c } */
+
+/* Check diagnostic by device-compiler's lto1.
+ Other file uses: 'requires unified_address'. */
+
+#pragma omp requires unified_shared_memory
+
+int a[10];
+extern void foo (void);
+
+int
+main (void)
+{
+ #pragma omp target
+ for (int i = 0; i < 10; i++)
+ a[i] = 0;
+
+ foo ();
+ return 0;
+}
+
+/* { dg-error "OpenMP 'requires' directive with non-identical clauses in multiple compilation units: 'unified_shared_memory' vs. 'unified_address'" "" { target *-*-* } 0 } */
+/* { dg-excess-errors "Ignore messages like: errors during merging of translation units|mkoffload returned 1 exit status" } */
new file mode 100644
@@ -0,0 +1,14 @@
+! { dg-skip-if "" { *-*-* } }
+
+module m
+ integer x
+end module m
+
+subroutine foo
+ use m
+ implicit none
+ !$omp requires unified_address
+
+ x = 1
+ !$omp target enter data map(always,to: x)
+end
new file mode 100644
@@ -0,0 +1,26 @@
+! { dg-do link { target { offload_target_nvptx || offload_target_amdgcn } } }
+! { dg-additional-sources requires-1-aux.f90 }
+
+! Check diagnostic by device-compiler's lto1.
+! Other file uses: 'requires unified_address'.
+
+module m
+ integer :: a(10)
+ interface
+ subroutine foo
+ end
+ end interface
+end
+
+program main
+ !$omp requires unified_shared_memory
+
+ !$omp target
+ a = 0
+ !$omp end target
+
+ call foo ()
+end
+
+! { dg-error "OpenMP 'requires' directive with non-identical clauses in multiple compilation units: 'unified_shared_memory' vs. 'unified_address'" "" { target *-*-* } 0 }
+! { dg-excess-errors "Ignore messages like: errors during merging of translation units|mkoffload returned 1 exit status" }
@@ -168,8 +168,12 @@ GOMP_OFFLOAD_get_type (void)
}
extern "C" int
-GOMP_OFFLOAD_get_num_devices (void)
+GOMP_OFFLOAD_get_num_devices (unsigned int omp_requires_mask)
{
+ /* Return -1 if no omp_requires_mask cannot be fulfilled but
+ devices were present. */
+ if (num_devices > 0 && omp_requires_mask != 0)
+ return -1;
TRACE ("(): return %d", num_devices);
return num_devices;
}