[libgomp,openacc] Add terminating spinlock test-cases

Message ID 20220210091452.GA20962@delia.home
State New
Headers
Series [libgomp,openacc] Add terminating spinlock test-cases |

Commit Message

Tom de Vries Feb. 10, 2022, 9:14 a.m. UTC
  Hi,

The OpenACC execution model states that implementing a critical
section across workers using atomic operations and a busy-wait loop may never
succeed, since the scheduler may suspend the worker that owns the lock, in
which case the worker waiting on the lock can never complete.

Add a test-case that implements the next best thing: a spinlock using a
busy-wait loop that gives up after a certain number of tries.

This ensures termination, and makes the test-case a valid one, while still
excercising atomic exchange and atomic store.

OK for trunk?

Thanks,
- Tom

[libgomp, openacc] Add terminating spinlock test-cases

libgomp/ChangeLog:

2022-02-02  Tom de Vries  <tdevries@suse.de>

	* testsuite/libgomp.oacc-c/spin-lock-global.c: New test.
	* testsuite/libgomp.oacc-c/spin-lock-global.h: New test.
	* testsuite/libgomp.oacc-c/spin-lock-shared.c: New test.
	* testsuite/libgomp.oacc-c/spin-lock-shared.h: New test.

---
 .../testsuite/libgomp.oacc-c/spin-lock-global.c    |  43 ++++++
 .../testsuite/libgomp.oacc-c/spin-lock-global.h    | 169 +++++++++++++++++++++
 .../testsuite/libgomp.oacc-c/spin-lock-shared.c    |  35 +++++
 .../testsuite/libgomp.oacc-c/spin-lock-shared.h    | 135 ++++++++++++++++
 4 files changed, 382 insertions(+)
  

Patch

diff --git a/libgomp/testsuite/libgomp.oacc-c/spin-lock-global.c b/libgomp/testsuite/libgomp.oacc-c/spin-lock-global.c
new file mode 100644
index 00000000000..0c1da9e842f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/spin-lock-global.c
@@ -0,0 +1,43 @@ 
+#include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
+#include <assert.h>
+
+enum memmodel
+  {
+    MEMMODEL_RELAXED = 0,
+    MEMMODEL_ACQUIRE = 2,
+    MEMMODEL_RELEASE = 3,
+    MEMMODEL_SEQ_CST = 5,
+  };
+
+#define TYPE unsigned int
+#define LOCKVAR1 lock_32_1
+#define LOCKVAR2 lock_32_2
+#define TESTS tests_32
+#include "spin-lock-global.h"
+#undef TYPE
+#undef LOCKVAR1
+#undef LOCKVAR2
+#undef TESTS
+
+#define TYPE unsigned long long int
+#define LOCKVAR1 lock_64_1
+#define LOCKVAR2 lock_64_2
+#define TESTS tests_64
+#include "spin-lock-global.h"
+#undef TYPE
+#undef LOCKVAR1
+#undef LOCKVAR2
+#undef TESTS
+
+#define N (7 * 1000)
+
+int
+main (void)
+{
+  tests_32 (N);
+  tests_64 (N);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/spin-lock-global.h b/libgomp/testsuite/libgomp.oacc-c/spin-lock-global.h
new file mode 100644
index 00000000000..ea63fafccb9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/spin-lock-global.h
@@ -0,0 +1,169 @@ 
+#define XSTR(S) STR (S)
+#define STR(S) #S
+
+#define PRINTF(...)				\
+  {						\
+    printf (__VA_ARGS__);			\
+    fflush (NULL);				\
+  }
+
+#define DO_PRAGMA(x) _Pragma (#x)
+
+#ifndef SPIN_CNT_MAX
+/* Define to have limited-spin spinlock.
+   Ensures that the program will terminate.  */
+#define SPIN_CNT_MAX 0x8000U
+#endif
+
+#define TEST_1(N, LOCKVAR, VERIFY, N_GANGS, N_WORKERS)			\
+  assert (N % N_GANGS == 0);						\
+									\
+  DO_PRAGMA (acc parallel						\
+	     num_gangs(N_GANGS)						\
+	     num_workers(N_WORKERS)					\
+	     copy (lock_cnt)						\
+	     copy (spin_cnt_max_hit)					\
+	     present (LOCKVAR))						\
+  {									\
+    TYPE unlocked = (TYPE)0;						\
+    TYPE locked = ~unlocked;						\
+									\
+    LOCKVAR = unlocked;							\
+									\
+    unsigned int n_gangs						\
+      = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);			\
+									\
+    DO_PRAGMA (acc loop worker)						\
+      for (unsigned int i = 0; i < N / n_gangs; i++)			\
+	{								\
+	  TYPE res;							\
+									\
+	  unsigned int spin_cnt = 0;					\
+	  while (1)							\
+	    {								\
+	      res = __atomic_exchange_n (&LOCKVAR, locked,		\
+					 MEMMODEL_ACQUIRE);		\
+	      if (res == locked)					\
+		{							\
+		  if (SPIN_CNT_MAX > 0)					\
+		    {							\
+		      spin_cnt++;					\
+		      if (spin_cnt == SPIN_CNT_MAX)			\
+			{						\
+			  if (VERIFY)					\
+			    __atomic_fetch_add (&spin_cnt_max_hit, 1,	\
+						MEMMODEL_RELAXED);	\
+			  break;					\
+			}						\
+		    }							\
+		  continue;						\
+									\
+		}							\
+	      else							\
+		{							\
+		  if (res != unlocked)					\
+		    __builtin_abort ();					\
+									\
+		  if (VERIFY)						\
+		    __atomic_fetch_add (&lock_cnt, 1,			\
+					MEMMODEL_RELAXED);		\
+									\
+		  __atomic_store_n (&LOCKVAR, unlocked,			\
+				    MEMMODEL_RELEASE);			\
+		  break;						\
+		}							\
+	    }								\
+	}								\
+  }
+
+#define TEST(N, LOCKVAR, VERIFY, N_GANGS, N_WORKERS)			\
+  {									\
+    spin_cnt_max_hit = 0;						\
+									\
+    if (VERIFY)								\
+      lock_cnt = 0;							\
+									\
+    PRINTF ("%s - verify=%u - lock=%s - gangs=%u - workers=%u ... ",	\
+	    XSTR (TYPE), VERIFY, STR(LOCKVAR), N_GANGS, N_WORKERS);	\
+    TEST_1 (N, LOCKVAR, VERIFY, N_GANGS, N_WORKERS);			\
+    PRINTF ("done\n");							\
+									\
+    if (VERIFY && SPIN_CNT_MAX)						\
+      PRINTF ("spin_cnt_max_hit: %llu\n", spin_cnt_max_hit);		\
+									\
+    if (VERIFY && (lock_cnt + spin_cnt_max_hit != N))			\
+      {									\
+	PRINTF ("lock_cnt: %llu\n", lock_cnt);				\
+	PRINTF ("lock_cnt + spin_cnt_max_hit: %llu\n",			\
+		lock_cnt + spin_cnt_max_hit);				\
+	PRINTF ("N: %u\n", N);						\
+	__builtin_abort ();						\
+      }									\
+  }
+
+/* Uses .global addressing on nvptx.  */
+TYPE LOCKVAR1;
+#pragma acc declare create (LOCKVAR1)
+
+void
+TESTS (unsigned int n)
+{
+  unsigned long long int lock_cnt;
+  unsigned long long int spin_cnt_max_hit;
+
+  /* Uses generic addressing on nvptx.  */
+  TYPE LOCKVAR2;
+#pragma acc declare create (LOCKVAR2)
+
+#define N_GANGS 1
+#define N_WORKERS 8
+#define VERIFY 0
+  TEST (n, LOCKVAR1, VERIFY, N_GANGS, N_WORKERS);
+  TEST (n, LOCKVAR2, VERIFY, N_GANGS, N_WORKERS);
+#undef VERIFY
+#define VERIFY 1
+  TEST (n, LOCKVAR1, VERIFY, N_GANGS, N_WORKERS);
+  TEST (n, LOCKVAR2, VERIFY, N_GANGS, N_WORKERS);
+#undef VERIFY
+#undef N_WORKERS
+#undef N_GANGS
+
+#define N_GANGS 2
+#define N_WORKERS 4
+#define VERIFY 0
+  TEST (n, LOCKVAR1, VERIFY, N_GANGS, N_WORKERS);
+  TEST (n, LOCKVAR2, VERIFY, N_GANGS, N_WORKERS);
+#undef VERIFY
+#define VERIFY 1
+  TEST (n, LOCKVAR1, VERIFY, N_GANGS, N_WORKERS);
+  TEST (n, LOCKVAR2, VERIFY, N_GANGS, N_WORKERS);
+#undef VERIFY
+#undef N_WORKERS
+#undef N_GANGS
+
+#define N_GANGS 4
+#define N_WORKERS 2
+#define VERIFY 0
+  TEST (n, LOCKVAR1, VERIFY, N_GANGS, N_WORKERS);
+  TEST (n, LOCKVAR2, VERIFY, N_GANGS, N_WORKERS);
+#undef VERIFY
+#define VERIFY 1
+  TEST (n, LOCKVAR1, VERIFY, N_GANGS, N_WORKERS);
+  TEST (n, LOCKVAR2, VERIFY, N_GANGS, N_WORKERS);
+#undef VERIFY
+#undef N_WORKERS
+#undef N_GANGS
+
+#define N_GANGS 8
+#define N_WORKERS 1
+#define VERIFY 0
+  TEST (n, LOCKVAR1, VERIFY, N_GANGS, N_WORKERS);
+  TEST (n, LOCKVAR2, VERIFY, N_GANGS, N_WORKERS);
+#undef VERIFY
+#define VERIFY 1
+  TEST (n, LOCKVAR1, VERIFY, N_GANGS, N_WORKERS);
+  TEST (n, LOCKVAR2, VERIFY, N_GANGS, N_WORKERS);
+#undef VERIFY
+#undef N_WORKERS
+#undef N_GANGS
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/spin-lock-shared.c b/libgomp/testsuite/libgomp.oacc-c/spin-lock-shared.c
new file mode 100644
index 00000000000..81d18fcc798
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/spin-lock-shared.c
@@ -0,0 +1,35 @@ 
+#include <stdio.h>
+#include <openacc.h>
+#include <gomp-constants.h>
+#include <assert.h>
+
+enum memmodel
+  {
+    MEMMODEL_RELAXED = 0,
+    MEMMODEL_ACQUIRE = 2,
+    MEMMODEL_RELEASE = 3,
+    MEMMODEL_SEQ_CST = 5,
+  };
+
+#define TYPE unsigned int
+#define TESTS tests_32
+#include "spin-lock-shared.h"
+#undef TYPE
+#undef TESTS
+
+#define TYPE unsigned long long int
+#define TESTS tests_64
+#include "spin-lock-shared.h"
+#undef TYPE
+#undef TESTS
+
+#define N (50 * 1000)
+
+int
+main (void)
+{
+  tests_32 (N);
+  tests_64 (N);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c/spin-lock-shared.h b/libgomp/testsuite/libgomp.oacc-c/spin-lock-shared.h
new file mode 100644
index 00000000000..923f38c60fe
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c/spin-lock-shared.h
@@ -0,0 +1,135 @@ 
+#define XSTR(S) STR (S)
+#define STR(S) #S
+
+#define PRINTF(...)				\
+  {						\
+    printf (__VA_ARGS__);			\
+    fflush (NULL);				\
+  }
+
+#define DO_PRAGMA(x) _Pragma (#x)
+
+#ifndef SPIN_CNT_MAX
+/* Define to have limited-spin spinlock.
+   Ensures that the program will terminate.  */
+#define SPIN_CNT_MAX 0x20000U
+#endif
+
+#define TEST_1(N, LOCKREF)						\
+  DO_PRAGMA (acc parallel						\
+	     num_gangs(1)						\
+	     num_workers(N_WORKERS)					\
+	     copy (lock_cnt)						\
+	     copy (spin_cnt_max_hit))					\
+  {									\
+    TYPE unlocked = (TYPE)0;						\
+    TYPE locked = ~unlocked;						\
+    TYPE lock;								\
+    TYPE *volatile lock_ptr = &lock;					\
+    unsigned long long int lock_cnt_1;					\
+    unsigned long long int spin_cnt_max_hit_1;				\
+									\
+    if (VERIFY)								\
+      {									\
+	lock_cnt_1 = 0;							\
+									\
+	if (SPIN_CNT_MAX)						\
+	  spin_cnt_max_hit_1 = 0;					\
+      }									\
+									\
+    *(LOCKREF) = unlocked;						\
+									\
+    DO_PRAGMA (acc loop worker)						\
+      for (unsigned int i = 0; i < N; i++)				\
+	{								\
+	  TYPE res;							\
+									\
+	  unsigned int spin_cnt = 0;					\
+	  while (1)							\
+	    {								\
+	      res = __atomic_exchange_n (LOCKREF, locked,		\
+					 MEMMODEL_ACQUIRE);		\
+	      if (res == locked)					\
+		{							\
+		  if (SPIN_CNT_MAX > 0)					\
+		    {							\
+		      spin_cnt++;					\
+		      if (spin_cnt == SPIN_CNT_MAX)			\
+			{						\
+			  if (VERIFY)					\
+			    __atomic_fetch_add (&spin_cnt_max_hit_1, 1,	\
+						MEMMODEL_RELAXED);	\
+			  break;					\
+			}						\
+		    }							\
+		  continue;						\
+		}							\
+	      else							\
+		{							\
+		  if (res != unlocked)					\
+		    __builtin_abort ();					\
+									\
+		  if (VERIFY)						\
+		    __atomic_fetch_add (&lock_cnt_1, 1,			\
+					MEMMODEL_RELAXED);		\
+									\
+		  __atomic_store_n (LOCKREF, unlocked,			\
+				    MEMMODEL_RELEASE);			\
+									\
+		  break;						\
+		}							\
+	    }								\
+	}								\
+									\
+    if (VERIFY)								\
+      {									\
+	lock_cnt += lock_cnt_1;						\
+									\
+	if (SPIN_CNT_MAX)						\
+	  spin_cnt_max_hit += spin_cnt_max_hit_1;			\
+      }									\
+  }
+
+#define TEST(N, LOCKREF)					\
+  {								\
+    spin_cnt_max_hit = 0;					\
+								\
+    if (VERIFY)							\
+      lock_cnt = 0;						\
+								\
+    PRINTF ("%s - verify=%u - LOCKREF=%s ... ",			\
+	    XSTR (TYPE), VERIFY, #LOCKREF);			\
+    TEST_1 (N, LOCKREF);					\
+    PRINTF ("done\n");						\
+								\
+    if (VERIFY && SPIN_CNT_MAX)					\
+      PRINTF ("spin_cnt_max_hit: %llu\n", spin_cnt_max_hit);	\
+								\
+    if (VERIFY && (lock_cnt + spin_cnt_max_hit != N))		\
+      {								\
+	PRINTF ("lock_cnt: %llu\n", lock_cnt);			\
+	PRINTF ("lock_cnt + spin_cnt_max_hit: %llu\n",		\
+		lock_cnt + spin_cnt_max_hit);			\
+	PRINTF ("N: %u\n", N);					\
+	__builtin_abort ();					\
+      }								\
+  }
+
+void
+TESTS (unsigned int n)
+{
+  unsigned long long int lock_cnt;
+  unsigned long long int spin_cnt_max_hit;
+
+#define N_WORKERS 8
+
+#define VERIFY 0
+  TEST (n, &lock);
+  TEST (n, lock_ptr);
+#undef VERIFY
+
+#define VERIFY 1
+  TEST (n, &lock);
+  TEST (n, lock_ptr);
+#undef VERIFY
+}