libgomp/gcn: fix/improve struct output (was: [Patch] libgomp/gcn: Prepare for reverse-offload callback handling)

Message ID 23880d62-9f02-8073-a8ea-52032f979089@codesourcery.com
State Committed
Headers
Series libgomp/gcn: fix/improve struct output (was: [Patch] libgomp/gcn: Prepare for reverse-offload callback handling) |

Commit Message

Tobias Burnus Nov. 21, 2022, 1:40 p.m. UTC
  Working on the builtins, I realized that I mixed up (again) bits and byes.
While 'uint64_t var[2]' has a size of 128 bits, 'char var[128]' has a size of 128 bytes.
Thus, there is sufficient space for 16 pointer-size/uin64_t values but I only need 6.

This patch now makes use of the available space, avoiding one device-to-host memory copy;
additionally, it avoids a 32bit vs 64bit alignment issue which I somehow missed :-(

Tested with libgomp on gfx908 offloading and getting only the known fails:
(libgomp.c-c++-common/teams-2.c, libgomp.fortran/async_io_*.f90,
libgomp.oacc-c-c++-common/{deep-copy-10.c,static-variable-1.c,vprop.c})

OK for mainline?

Tobias
-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
  

Comments

Andrew Stubbs Nov. 21, 2022, 2:19 p.m. UTC | #1
On 21/11/2022 13:40, Tobias Burnus wrote:
> Working on the builtins, I realized that I mixed up (again) bits and byes.
> While 'uint64_t var[2]' has a size of 128 bits, 'char var[128]' has a 
> size of 128 bytes.
> Thus, there is sufficient space for 16 pointer-size/uin64_t values but I 
> only need 6.
> 
> This patch now makes use of the available space, avoiding one 
> device-to-host memory copy;
> additionally, it avoids a 32bit vs 64bit alignment issue which I somehow 
> missed :-(
> 
> Tested with libgomp on gfx908 offloading and getting only the known fails:
> (libgomp.c-c++-common/teams-2.c, libgomp.fortran/async_io_*.f90,
> libgomp.oacc-c-c++-common/{deep-copy-10.c,static-variable-1.c,vprop.c})
> 
> OK for mainline?

OK, although why not set value64 to 16 entries, even though reverse 
offload only uses 6?

Andrew
  

Patch

libgomp/gcn: fix/improve struct output

output.printf_data.(value union) contains text[128], which has the size
of 128 bytes, sufficient for 16 uint64_t variables; hence value_u64[2]
could be extended to value_u64[6] - sufficient for all required arguments
to gomp_target_rev.  Additionally, next_output.printf_data.(msg union)
contained msg_u64 which then is no longer needed and also caused 32bit
vs 64bit alignment issues.

libgomp/
	* config/gcn/libgomp-gcn.h (struct output):
	Remove 'msg_u64' from the union, change
	value_u64[2] to value_u64[6].
	* config/gcn/target.c (GOMP_target_ext): Update accordingly.
	* plugin/plugin-gcn.c (process_reverse_offload, console_output):
	Likewise.

 libgomp/config/gcn/libgomp-gcn.h |  7 ++-----
 libgomp/config/gcn/target.c      | 12 ++++++------
 libgomp/plugin/plugin-gcn.c      | 17 +++++++----------
 3 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/libgomp/config/gcn/libgomp-gcn.h b/libgomp/config/gcn/libgomp-gcn.h
index 91560be787f..3933e846a86 100644
--- a/libgomp/config/gcn/libgomp-gcn.h
+++ b/libgomp/config/gcn/libgomp-gcn.h
@@ -37,16 +37,13 @@  struct output
   unsigned int next_output;
   struct printf_data {
     int written;
-    union {
-      char msg[128];
-      uint64_t msg_u64[2];
-    };
+    char msg[128];
     int type;
     union {
       int64_t ivalue;
       double dvalue;
       char text[128];
-      uint64_t value_u64[2];
+      uint64_t value_u64[6];
     };
   } queue[1024];
   unsigned int consumed;
diff --git a/libgomp/config/gcn/target.c b/libgomp/config/gcn/target.c
index 27854565d40..11ae6ec9833 100644
--- a/libgomp/config/gcn/target.c
+++ b/libgomp/config/gcn/target.c
@@ -102,12 +102,12 @@  GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
       asm ("s_sleep 64");
 
   unsigned int slot = index % 1024;
-  uint64_t addrs_sizes_kind[3] = {(uint64_t) hostaddrs, (uint64_t) sizes,
-				  (uint64_t) kinds};
-  data->queue[slot].msg_u64[0] = (uint64_t) fn;
-  data->queue[slot].msg_u64[1] = (uint64_t) mapnum;
-  data->queue[slot].value_u64[0] = (uint64_t) &addrs_sizes_kind[0];
-  data->queue[slot].value_u64[1] = (uint64_t) GOMP_ADDITIONAL_ICVS.device_num;
+  data->queue[slot].value_u64[0] = (uint64_t) fn;
+  data->queue[slot].value_u64[1] = (uint64_t) mapnum;
+  data->queue[slot].value_u64[2] = (uint64_t) hostaddrs;
+  data->queue[slot].value_u64[3] = (uint64_t) sizes;
+  data->queue[slot].value_u64[4] = (uint64_t) kinds;
+  data->queue[slot].value_u64[5] = (uint64_t) GOMP_ADDITIONAL_ICVS.device_num;
 
   data->queue[slot].type = 4; /* Reverse offload.  */
   __atomic_store_n (&data->queue[slot].written, 1, __ATOMIC_RELEASE);
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index ffe5cf5af2c..388e87b7765 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -1919,16 +1919,12 @@  create_kernel_dispatch (struct kernel_info *kernel, int num_teams)
 }
 
 static void
-process_reverse_offload (uint64_t fn, uint64_t mapnum, uint64_t rev_data,
-			 uint64_t dev_num64)
+process_reverse_offload (uint64_t fn, uint64_t mapnum, uint64_t hostaddrs,
+			 uint64_t sizes, uint64_t kinds, uint64_t dev_num64)
 {
   int dev_num = dev_num64;
-  uint64_t addrs_sizes_kinds[3];
-  GOMP_OFFLOAD_host2dev (dev_num, &addrs_sizes_kinds, (void *) rev_data,
-			 sizeof (addrs_sizes_kinds));
-  GOMP_PLUGIN_target_rev (fn, mapnum, addrs_sizes_kinds[0],
-			  addrs_sizes_kinds[1], addrs_sizes_kinds[2],
-			  dev_num, NULL, NULL, NULL);
+  GOMP_PLUGIN_target_rev (fn, mapnum, hostaddrs, sizes, kinds, dev_num,
+			  NULL, NULL, NULL);
 }
 
 /* Output any data written to console output from the kernel.  It is expected
@@ -1976,8 +1972,9 @@  console_output (struct kernel_info *kernel, struct kernargs *kernargs,
 	case 2: printf ("%.128s%.128s\n", data->msg, data->text); break;
 	case 3: printf ("%.128s%.128s", data->msg, data->text); break;
 	case 4:
-	  process_reverse_offload (data->msg_u64[0], data->msg_u64[1],
-				   data->value_u64[0],data->value_u64[1]);
+	  process_reverse_offload (data->value_u64[0], data->value_u64[1],
+				   data->value_u64[2], data->value_u64[3],
+				   data->value_u64[4], data->value_u64[5]);
 	  break;
 	default: printf ("GCN print buffer error!\n"); break;
 	}