openmp: Add support for thread_limit clause on target

Message ID 20211115120540.GV2710@tucnak
State Committed
Headers
Series openmp: Add support for thread_limit clause on target |

Commit Message

Jakub Jelinek Nov. 15, 2021, 12:05 p.m. UTC
  Hi!

OpenMP 5.1 says that thread_limit clause can also appear on target,
and similarly to teams should affect the thread-limit-var ICV.
On combined target teams, the clause goes to both.

We actually passed thread_limit internally on target already before,
but only used it for gcn/ptx offloading to hint how many threads should be
created and for ptx didn't set thread_limit_var in that case.
Similarly for host fallback.
Also, I found that we weren't copying the args array that contains encoded
thread_limit and num_teams clause for target (etc.) for async target.

Will commit to trunk once testing finishes.

2021-11-15  Jakub Jelinek  <jakub@redhat.com>

gcc/
	* gimplify.c (optimize_target_teams): Only add OMP_CLAUSE_THREAD_LIMIT
	to OMP_TARGET_CLAUSES if it isn't there already.
gcc/c-family/
	* c-omp.c (c_omp_split_clauses) <case OMP_CLAUSE_THREAD_LIMIT>:
	Duplicate to both OMP_TARGET and OMP_TEAMS.
gcc/c/
	* c-parser.c (OMP_TARGET_CLAUSE_MASK): Add
	PRAGMA_OMP_CLAUSE_THREAD_LIMIT.
gcc/cp/
	* parser.c (OMP_TARGET_CLAUSE_MASK): Add
	PRAGMA_OMP_CLAUSE_THREAD_LIMIT.
libgomp/
	* task.c (gomp_create_target_task): Copy args array as well.
	* target.c (gomp_target_fallback): Add args argument.
	Set gomp_icv (true)->thread_limit_var if thread_limit is present.
	(GOMP_target): Adjust gomp_target_fallback caller.
	(GOMP_target_ext): Likewise.
	(gomp_target_task_fn): Likewise.
	* config/nvptx/team.c (gomp_nvptx_main): Set
	gomp_global_icv.thread_limit_var.
	* testsuite/libgomp.c-c++-common/thread-limit-1.c: New test.



	Jakub
  

Comments

Tobias Burnus Nov. 15, 2021, 1 p.m. UTC | #1
Hi,

On 15.11.21 13:05, Jakub Jelinek wrote:
> OpenMP 5.1 says that thread_limit clause can also appear on target,
> and similarly to teams should affect the thread-limit-var ICV.
> On combined target teams, the clause goes to both.

This patch does this also for Fortran.

OK, once the post-bootstap testing finished successfully?

Tobias
-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
  
Jakub Jelinek Nov. 15, 2021, 1:01 p.m. UTC | #2
On Mon, Nov 15, 2021 at 02:00:42PM +0100, Tobias Burnus wrote:
> Hi,
> 
> On 15.11.21 13:05, Jakub Jelinek wrote:
> > OpenMP 5.1 says that thread_limit clause can also appear on target,
> > and similarly to teams should affect the thread-limit-var ICV.
> > On combined target teams, the clause goes to both.
> 
> This patch does this also for Fortran.
> 
> OK, once the post-bootstap testing finished successfully?

Ok, thanks.

> gcc/fortran/ChangeLog:
> 
> 	* openmp.c (OMP_TARGET_CLAUSES): Add thread_limit.
> 	* trans-openmp.c (gfc_split_omp_clauses): Add thread_limit also to
> 	teams.
> 
> libgomp/ChangeLog:
> 
> 	* testsuite/libgomp.fortran/thread-limit-1.f90: New test.

	Jakub
  

Patch

--- gcc/gimplify.c.jj	2021-11-12 15:13:09.030919433 +0100
+++ gcc/gimplify.c	2021-11-15 11:06:20.021516251 +0100
@@ -13637,10 +13637,13 @@  optimize_target_teams (tree target, gimp
 	if (!DECL_P (expr) && TREE_CODE (expr) != TARGET_EXPR)
 	  OMP_CLAUSE_OPERAND (c, 0) = *p;
       }
-  c = build_omp_clause (thread_limit_loc, OMP_CLAUSE_THREAD_LIMIT);
-  OMP_CLAUSE_THREAD_LIMIT_EXPR (c) = thread_limit;
-  OMP_CLAUSE_CHAIN (c) = OMP_TARGET_CLAUSES (target);
-  OMP_TARGET_CLAUSES (target) = c;
+  if (!omp_find_clause (OMP_TARGET_CLAUSES (target), OMP_CLAUSE_THREAD_LIMIT))
+    {
+      c = build_omp_clause (thread_limit_loc, OMP_CLAUSE_THREAD_LIMIT);
+      OMP_CLAUSE_THREAD_LIMIT_EXPR (c) = thread_limit;
+      OMP_CLAUSE_CHAIN (c) = OMP_TARGET_CLAUSES (target);
+      OMP_TARGET_CLAUSES (target) = c;
+    }
   c = build_omp_clause (num_teams_loc, OMP_CLAUSE_NUM_TEAMS);
   OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (c) = num_teams_upper;
   OMP_CLAUSE_NUM_TEAMS_LOWER_EXPR (c) = num_teams_lower;
--- gcc/c-family/c-omp.c.jj	2021-10-27 09:21:50.756247410 +0200
+++ gcc/c-family/c-omp.c	2021-11-15 11:03:28.400918573 +0100
@@ -1867,7 +1867,6 @@  c_omp_split_clauses (location_t loc, enu
 	  s = C_OMP_CLAUSE_SPLIT_TARGET;
 	  break;
 	case OMP_CLAUSE_NUM_TEAMS:
-	case OMP_CLAUSE_THREAD_LIMIT:
 	  s = C_OMP_CLAUSE_SPLIT_TEAMS;
 	  break;
 	case OMP_CLAUSE_DIST_SCHEDULE:
@@ -2531,6 +2530,30 @@  c_omp_split_clauses (location_t loc, enu
 	  else
 	    s = C_OMP_CLAUSE_SPLIT_FOR;
 	  break;
+	  /* thread_limit is allowed on target and teams.  Distribute it
+	     to all.  */
+	case OMP_CLAUSE_THREAD_LIMIT:
+	  if ((mask & (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_MAP))
+	      != 0)
+	    {
+	      if ((mask & (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_NUM_TEAMS))
+		  != 0)
+		{
+		  c = build_omp_clause (OMP_CLAUSE_LOCATION (clauses),
+					OMP_CLAUSE_THREAD_LIMIT);
+		  OMP_CLAUSE_THREAD_LIMIT_EXPR (c)
+		    = OMP_CLAUSE_THREAD_LIMIT_EXPR (clauses);
+		  OMP_CLAUSE_CHAIN (c) = cclauses[C_OMP_CLAUSE_SPLIT_TARGET];
+		  cclauses[C_OMP_CLAUSE_SPLIT_TARGET] = c;
+		}
+	      else
+		{
+		  s = C_OMP_CLAUSE_SPLIT_TARGET;
+		  break;
+		}
+	    }
+	  s = C_OMP_CLAUSE_SPLIT_TEAMS;
+	  break;
 	/* Allocate clause is allowed on target, teams, distribute, parallel,
 	   for, sections and taskloop.  Distribute it to all.  */
 	case OMP_CLAUSE_ALLOCATE:
--- gcc/c/c-parser.c.jj	2021-11-11 14:35:37.465350510 +0100
+++ gcc/c/c-parser.c	2021-11-15 10:51:27.257024830 +0100
@@ -20963,6 +20963,7 @@  c_parser_omp_target_exit_data (location_
 	| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_ALLOCATE)	\
 	| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEFAULTMAP)	\
 	| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IN_REDUCTION)	\
+	| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_THREAD_LIMIT)	\
 	| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR))
 
 static bool
--- gcc/cp/parser.c.jj	2021-11-11 14:35:37.550349286 +0100
+++ gcc/cp/parser.c	2021-11-15 10:52:00.026564979 +0100
@@ -44015,6 +44015,7 @@  cp_parser_omp_target_update (cp_parser *
 	| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEFAULTMAP)	\
 	| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_ALLOCATE)	\
 	| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IN_REDUCTION)	\
+	| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_THREAD_LIMIT)	\
 	| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR))
 
 static bool
--- libgomp/task.c.jj	2021-05-18 10:04:31.543435200 +0200
+++ libgomp/task.c	2021-11-15 12:02:38.336697281 +0100
@@ -745,6 +745,7 @@  gomp_create_target_task (struct gomp_dev
   size_t depend_size = 0;
   uintptr_t depend_cnt = 0;
   size_t tgt_align = 0, tgt_size = 0;
+  uintptr_t args_cnt = 0;
 
   if (depend != NULL)
     {
@@ -769,10 +770,22 @@  gomp_create_target_task (struct gomp_dev
 	tgt_size += tgt_align - 1;
       else
 	tgt_size = 0;
+      if (args)
+	{
+	  void **cargs = args;
+	  while (*cargs)
+	    {
+	      intptr_t id = (intptr_t) *cargs++;
+	      if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
+		cargs++;
+	    }
+	  args_cnt = cargs + 1 - args;
+	}
     }
 
   task = gomp_malloc (sizeof (*task) + depend_size
 		      + sizeof (*ttask)
+		      + args_cnt * sizeof (void *)
 		      + mapnum * (sizeof (void *) + sizeof (size_t)
 				  + sizeof (unsigned short))
 		      + tgt_size);
@@ -785,9 +798,18 @@  gomp_create_target_task (struct gomp_dev
   ttask->devicep = devicep;
   ttask->fn = fn;
   ttask->mapnum = mapnum;
-  ttask->args = args;
   memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
-  ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
+  if (args_cnt)
+    {
+      ttask->args = (void **) &ttask->hostaddrs[mapnum];
+      memcpy (ttask->args, args, args_cnt * sizeof (void *));
+      ttask->sizes = (size_t *) &ttask->args[args_cnt];
+    }
+  else
+    {
+      ttask->args = args;
+      ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
+    }
   memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
   ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
   memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
--- libgomp/target.c.jj	2021-11-12 15:13:09.113918200 +0100
+++ libgomp/target.c	2021-11-15 11:43:42.358162423 +0100
@@ -2362,7 +2362,7 @@  gomp_unload_device (struct gomp_device_d
 
 static void
 gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
-		      struct gomp_device_descr *devicep)
+		      struct gomp_device_descr *devicep, void **args)
 {
   struct gomp_thread old_thr, *thr = gomp_thread ();
 
@@ -2378,6 +2378,25 @@  gomp_target_fallback (void (*fn) (void *
       thr->place = old_thr.place;
       thr->ts.place_partition_len = gomp_places_list_len;
     }
+  if (args)
+    while (*args)
+      {
+	intptr_t id = (intptr_t) *args++, val;
+	if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
+	  val = (intptr_t) *args++;
+	else
+	  val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
+	if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
+	  continue;
+	id &= GOMP_TARGET_ARG_ID_MASK;
+	if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
+	  continue;
+	val = val > INT_MAX ? INT_MAX : val;
+	if (val)
+	  gomp_icv (true)->thread_limit_var = val;
+	break;
+      }
+
   fn (hostaddrs);
   gomp_free_thread (thr);
   *thr = old_thr;
@@ -2478,7 +2497,7 @@  GOMP_target (int device, void (*fn) (voi
       /* All shared memory devices should use the GOMP_target_ext function.  */
       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
       || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
-    return gomp_target_fallback (fn, hostaddrs, devicep);
+    return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
 
   htab_t refcount_set = htab_create (mapnum);
   struct target_mem_desc *tgt_vars
@@ -2617,7 +2636,7 @@  GOMP_target_ext (int device, void (*fn)
 				      tgt_align, tgt_size);
 	    }
 	}
-      gomp_target_fallback (fn, hostaddrs, devicep);
+      gomp_target_fallback (fn, hostaddrs, devicep, args);
       return;
     }
 
@@ -3052,7 +3071,8 @@  gomp_target_task_fn (void *data)
 	  || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
 	{
 	  ttask->state = GOMP_TARGET_TASK_FALLBACK;
-	  gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep);
+	  gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
+				ttask->args);
 	  return false;
 	}
 
--- libgomp/config/nvptx/team.c.jj	2021-11-15 09:20:47.966837531 +0100
+++ libgomp/config/nvptx/team.c	2021-11-15 11:14:36.477567443 +0100
@@ -55,6 +55,7 @@  gomp_nvptx_main (void (*fn) (void *), vo
   if (tid == 0)
     {
       gomp_global_icv.nthreads_var = ntids;
+      gomp_global_icv.thread_limit_var = ntids;
       /* Starting additional threads is not supported.  */
       gomp_global_icv.dyn_var = true;
 
--- libgomp/testsuite/libgomp.c-c++-common/thread-limit-1.c.jj	2021-11-15 12:24:59.643001103 +0100
+++ libgomp/testsuite/libgomp.c-c++-common/thread-limit-1.c	2021-11-15 12:24:52.865095292 +0100
@@ -0,0 +1,23 @@ 
+#include <omp.h>
+#include <stdlib.h>
+
+void
+foo ()
+{
+  {
+    #pragma omp target parallel nowait thread_limit (4) num_threads (1)
+    if (omp_get_thread_limit () > 4)
+      abort ();
+  }
+  #pragma omp taskwait
+}
+
+int
+main ()
+{
+  #pragma omp target thread_limit (6)
+  if (omp_get_thread_limit () > 6)
+    abort ();
+  foo ();
+  return 0;
+}