libgomp: Fix declare target link with offset array-section mapping [PR116107]
Checks
Context |
Check |
Description |
linaro-tcwg-bot/tcwg_gcc_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-arm |
fail
|
Test failed
|
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 |
fail
|
Test failed
|
Commit Message
The main idea of 'link' is to permit putting only a subset of a
huge array on the device. Well, in order to make this work properly,
it requires that one can map an array section, which does not
start with the first element.
This patch adjusts the pointers such, that this actually works.
(Tested on x86-64-gnu-linux with Nvptx offloading.)
Comments, suggestions, remarks before I commit it?
Tobias
Comments
On Fri, Jul 26, 2024 at 08:05:43PM +0200, Tobias Burnus wrote:
> --- a/libgomp/target.c
> +++ b/libgomp/target.c
> @@ -1820,8 +1820,11 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
> if (k->aux && k->aux->link_key)
> {
> /* Set link pointer on target to the device address of the
> - mapped object. */
> - void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
> + mapped object. Also deal with offsets due to
> + array-section mapping. */
Formatting. Two spaces after . in both spots.
> + void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset
> + - (k->host_start
> + - k->aux->link_key->host_start));
Otherwise LGTM.
Jakub
Hi Tobias!
On 2024-07-26T20:05:43+0200, Tobias Burnus <tburnus@baylibre.com> wrote:
> The main idea of 'link' is to permit putting only a subset of a
> huge array on the device. Well, in order to make this work properly,
> it requires that one can map an array section, which does not
> start with the first element.
>
> This patch adjusts the pointers such, that this actually works.
>
> (Tested on x86-64-gnu-linux with Nvptx offloading.)
> Comments, suggestions, remarks before I commit it?
> libgomp: Fix declare target link with offset array-section mapping [PR116107]
>
> Assume that 'int var[100]' is 'omp declare target link(var)'. When now
> mapping an array section with offset such as 'map(to:var[20:10])',
> the device-side link pointer has to store &<device-storage-data>[0] minus
> the offset such that var[20] will access <device-storage-data>[0]. But
> the offset calculation was missed such that the device-side 'var' pointed
> to the first element of the mapped data - and var[20] points beyond at
> some invalid memory.
>
> PR middle-end/116107
>
> libgomp/ChangeLog:
>
> * target.c (gomp_map_vars_internal): Honor array mapping offsets
> with declare-target 'link' variables.
> * testsuite/libgomp.c-c++-common/target-link-2.c: New test.
>
> libgomp/target.c | 7 ++-
> .../testsuite/libgomp.c-c++-common/target-link-2.c | 59 ++++++++++++++++++++++
> 2 files changed, 64 insertions(+), 2 deletions(-)
The new test case 'libgomp.c-c++-common/target-link-2.c' generally PASSes
on one-GPU systems, but on a multi-GPU system (tested nvidia5):
$ nvidia-smi -L
GPU 0: Tesla K80 (UUID: [...])
GPU 1: Tesla K80 (UUID: [...])
..., I see:
+PASS: libgomp.c/../libgomp.c-c++-common/target-link-2.c (test for excess errors)
+FAIL: libgomp.c/../libgomp.c-c++-common/target-link-2.c execution test
+PASS: libgomp.c++/../libgomp.c-c++-common/target-link-2.c (test for excess errors)
+FAIL: libgomp.c++/../libgomp.c-c++-common/target-link-2.c execution test
[...]
#2 0x00007ffff7b548fc in __GI_abort () at abort.c:79
#3 0x0000000010000bd4 in main () at [...]/libgomp.c-c++-common/target-link-2.c:38
(gdb) frame 3
#3 0x0000000010000bd4 in main () at [...]/libgomp.c-c++-common/target-link-2.c:38
38 __builtin_abort ();
(gdb) list
33
34 #pragma omp target map(from: res2) device(dev)
35 res2 = arr[5];
36
37 if (res2 != 6)
38 __builtin_abort ();
[...]
(gdb) print res2
$1 = 60
I first thought that maybe just:
--- libgomp/testsuite/libgomp.c-c++-common/target-link-2.c
+++ libgomp/testsuite/libgomp.c-c++-common/target-link-2.c
@@ -54,6 +54,8 @@ int main()
for (int i = 0; i < 10; i++)
if (res[i] != (4 + i)*10)
__builtin_abort ();
+
+ #pragma omp target exit data map(release:arr[3:10]) device(dev)
}
return 0;
}
... was missing, but that doesn't resolve the issue: same error state.
Could you please have a look what other state needs to be reset, in which
way?
Grüße
Thomas
> diff --git a/libgomp/target.c b/libgomp/target.c
> index aa01c1367b9..e3e648f5443 100644
> --- a/libgomp/target.c
> +++ b/libgomp/target.c
> @@ -1820,8 +1820,11 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
> if (k->aux && k->aux->link_key)
> {
> /* Set link pointer on target to the device address of the
> - mapped object. */
> - void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
> + mapped object. Also deal with offsets due to
> + array-section mapping. */
> + void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset
> + - (k->host_start
> + - k->aux->link_key->host_start));
> /* We intentionally do not use coalescing here, as it's not
> data allocated by the current call to this function. */
> gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
> diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-link-2.c b/libgomp/testsuite/libgomp.c-c++-common/target-link-2.c
> new file mode 100644
> index 00000000000..4ff4080da76
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.c-c++-common/target-link-2.c
> @@ -0,0 +1,59 @@
> +/* PR middle-end/116107 */
> +
> +#include <omp.h>
> +
> +int arr[15] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
> +#pragma omp declare target link(arr)
> +
> +#pragma omp begin declare target
> +void f(int *res)
> +{
> + __builtin_memcpy (res, &arr[5], sizeof(int)*10);
> +}
> +
> +void g(int *res)
> +{
> + __builtin_memcpy (res, &arr[3], sizeof(int)*10);
> +}
> +#pragma omp end declare target
> +
> +int main()
> +{
> + int res[10], res2;
> + for (int dev = 0; dev < omp_get_num_devices(); dev++)
> + {
> + __builtin_memset (res, 0, sizeof (res));
> + res2 = 99;
> +
> + #pragma omp target enter data map(arr[5:10]) device(dev)
> +
> + #pragma omp target map(from: res) device(dev)
> + f (res);
> +
> + #pragma omp target map(from: res2) device(dev)
> + res2 = arr[5];
> +
> + if (res2 != 6)
> + __builtin_abort ();
> + for (int i = 0; i < 10; i++)
> + if (res[i] != 6 + i)
> + __builtin_abort ();
> +
> + #pragma omp target exit data map(release:arr[5:10]) device(dev)
> +
> + for (int i = 0; i < 15; i++)
> + res[i] *= 10;
> + __builtin_abort ();
> +
> + #pragma omp target enter data map(arr[3:10]) device(dev)
> + __builtin_memset (res, 0, sizeof (res));
> +
> + #pragma omp target map(from: res) device(dev)
> + g (res);
> +
> + for (int i = 0; i < 10; i++)
> + if (res[i] != (4 + i)*10)
> + __builtin_abort ();
> + }
> + return 0;
> +}
libgomp: Fix declare target link with offset array-section mapping [PR116107]
Assume that 'int var[100]' is 'omp declare target link(var)'. When now
mapping an array section with offset such as 'map(to:var[20:10])',
the device-side link pointer has to store &<device-storage-data>[0] minus
the offset such that var[20] will access <device-storage-data>[0]. But
the offset calculation was missed such that the device-side 'var' pointed
to the first element of the mapped data - and var[20] points beyond at
some invalid memory.
PR middle-end/116107
libgomp/ChangeLog:
* target.c (gomp_map_vars_internal): Honor array mapping offsets
with declare-target 'link' variables.
* testsuite/libgomp.c-c++-common/target-link-2.c: New test.
libgomp/target.c | 7 ++-
.../testsuite/libgomp.c-c++-common/target-link-2.c | 59 ++++++++++++++++++++++
2 files changed, 64 insertions(+), 2 deletions(-)
@@ -1820,8 +1820,11 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
if (k->aux && k->aux->link_key)
{
/* Set link pointer on target to the device address of the
- mapped object. */
- void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
+ mapped object. Also deal with offsets due to
+ array-section mapping. */
+ void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset
+ - (k->host_start
+ - k->aux->link_key->host_start));
/* We intentionally do not use coalescing here, as it's not
data allocated by the current call to this function. */
gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
new file mode 100644
@@ -0,0 +1,59 @@
+/* PR middle-end/116107 */
+
+#include <omp.h>
+
+int arr[15] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+#pragma omp declare target link(arr)
+
+#pragma omp begin declare target
+void f(int *res)
+{
+ __builtin_memcpy (res, &arr[5], sizeof(int)*10);
+}
+
+void g(int *res)
+{
+ __builtin_memcpy (res, &arr[3], sizeof(int)*10);
+}
+#pragma omp end declare target
+
+int main()
+{
+ int res[10], res2;
+ for (int dev = 0; dev < omp_get_num_devices(); dev++)
+ {
+ __builtin_memset (res, 0, sizeof (res));
+ res2 = 99;
+
+ #pragma omp target enter data map(arr[5:10]) device(dev)
+
+ #pragma omp target map(from: res) device(dev)
+ f (res);
+
+ #pragma omp target map(from: res2) device(dev)
+ res2 = arr[5];
+
+ if (res2 != 6)
+ __builtin_abort ();
+ for (int i = 0; i < 10; i++)
+ if (res[i] != 6 + i)
+ __builtin_abort ();
+
+ #pragma omp target exit data map(release:arr[5:10]) device(dev)
+
+ for (int i = 0; i < 15; i++)
+ res[i] *= 10;
+ __builtin_abort ();
+
+ #pragma omp target enter data map(arr[3:10]) device(dev)
+ __builtin_memset (res, 0, sizeof (res));
+
+ #pragma omp target map(from: res) device(dev)
+ g (res);
+
+ for (int i = 0; i < 10; i++)
+ if (res[i] != (4 + i)*10)
+ __builtin_abort ();
+ }
+ return 0;
+}