[RFC,2/2] sparc32: Use cas for atomic_* operations and use general pthread_barrier_wait

Message ID 1478012867-6031-3-git-send-email-andreas@gaisler.com
State New, archived
Headers

Commit Message

Andreas Larsson Nov. 1, 2016, 3:07 p.m. UTC
  This uses the CASA compare and swap with user space data access ASI 0xa
that is present on many LEON3 and LEON4 systems and that is implied by
gcc's -mcpu=leon3.

The CASA instruction is used not only for atomic compare and exchange
functions, but also atomic exchange functions and atomic write
functions. This is to allow the OS kernel to emulate that instruction on
systems where it is missing and to get atomicity between all atomic
writing functions without having to resort to stop all CPU:s in an SMP
system.

	* sysdeps/sparc/sparc32/atomic-machine.h: Use CASA instruction
	instead of spinlocks for sparcv8.
	* sysdeps/sparc/sparc32/pthread_barrier_wait.c: Remove file
---
 sysdeps/sparc/sparc32/atomic-machine.h       |  228 ++++++--------------------
 sysdeps/sparc/sparc32/pthread_barrier_wait.c |    1 -
 2 files changed, 50 insertions(+), 179 deletions(-)
 delete mode 100644 sysdeps/sparc/sparc32/pthread_barrier_wait.c
  

Comments

David Miller Nov. 4, 2016, 6:37 p.m. UTC | #1
From: Andreas Larsson <andreas@gaisler.com>
Date: Tue,  1 Nov 2016 16:07:47 +0100

> This uses the CASA compare and swap with user space data access ASI 0xa
> that is present on many LEON3 and LEON4 systems and that is implied by
> gcc's -mcpu=leon3.
> 
> The CASA instruction is used not only for atomic compare and exchange
> functions, but also atomic exchange functions and atomic write
> functions. This is to allow the OS kernel to emulate that instruction on
> systems where it is missing and to get atomicity between all atomic
> writing functions without having to resort to stop all CPU:s in an SMP
> system.

Ok, this is fine.  I'll work on the instruction emulation code for the
kernel side.
  
David Miller Nov. 4, 2016, 6:44 p.m. UTC | #2
From: David Miller <davem@davemloft.net>
Date: Fri, 04 Nov 2016 14:37:10 -0400 (EDT)

> From: Andreas Larsson <andreas@gaisler.com>
> Date: Tue,  1 Nov 2016 16:07:47 +0100
> 
>> This uses the CASA compare and swap with user space data access ASI 0xa
>> that is present on many LEON3 and LEON4 systems and that is implied by
>> gcc's -mcpu=leon3.
>> 
>> The CASA instruction is used not only for atomic compare and exchange
>> functions, but also atomic exchange functions and atomic write
>> functions. This is to allow the OS kernel to emulate that instruction on
>> systems where it is missing and to get atomicity between all atomic
>> writing functions without having to resort to stop all CPU:s in an SMP
>> system.
> 
> Ok, this is fine.  I'll work on the instruction emulation code for the
> kernel side.

Actually, this might cause some problems actually.

We don't always have access to a proper _dl_hwcap value.  Which means
that we will emit the LEON CAS sometimes when running on a v9 chip
which will not work properly.

I need to think about this a bit more.

Probably what we need to do is have three cases:

1) We explicitly know we are on a v9 chip via dl_hwcap, emit v9 CAS

2) We explicitly know we are on a v8 LEON chip via dl_hwcap, emit LEON CAS

3) Else, we emit a special trap instruction which the kernel fixes up

I think this is necessary because we cannot attempt to execute one of
the two CAS cases on the opposing CAS cpu type.
  

Patch

diff --git a/sysdeps/sparc/sparc32/atomic-machine.h b/sysdeps/sparc/sparc32/atomic-machine.h
index d6e68f9..818f4e2 100644
--- a/sysdeps/sparc/sparc32/atomic-machine.h
+++ b/sysdeps/sparc/sparc32/atomic-machine.h
@@ -50,9 +50,8 @@  typedef uintmax_t uatomic_max_t;
 #define __HAVE_64B_ATOMICS 0
 #define USE_ATOMIC_COMPILER_BUILTINS 0
 
-
-/* We have no compare and swap, just test and set.
-   The following implementation contends on 64 global locks
+/* We might have no hardware compare and swap, just test and set.
+   The following __sparc32_atomic implementation contends on 64 global locks
    per library and assumes no variable will be accessed using atomic.h
    macros from two different libraries.  */
 
@@ -110,6 +109,30 @@  volatile unsigned char __sparc32_atomic_locks[64]
     }								      \
   while (0)
 
+#define __arch_compare_and_exchange_val_8_acq(mem, newval, oldval) \
+  (abort (), (__typeof (*mem)) 0)
+
+#define __arch_compare_and_exchange_val_16_acq(mem, newval, oldval) \
+  (abort (), (__typeof (*mem)) 0)
+
+#define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \
+  (abort (), (__typeof (*mem)) 0)
+
+#define __v7_compare_and_exchange_val_32_acq(mem, newval, oldval) \
+({union { __typeof (oldval) a; uint32_t v; } oldval_arg = { .a = (oldval) };  \
+  union { __typeof (newval) a; uint32_t v; } newval_arg = { .a = (newval) };  \
+  register uint32_t __acev_tmp __asm ("%g6");			              \
+  register __typeof (mem) __acev_mem __asm ("%g1") = (mem);		      \
+  register uint32_t __acev_oldval __asm ("%g5");		              \
+  __acev_tmp = newval_arg.v;						      \
+  __acev_oldval = oldval_arg.v;						      \
+  /* .word 0xcde04145 is casa [%g1] 0xa, %g5, %g6. Can't use casa here	      \
+     though because assembler will not allow it for plain V8 arch.  */	      \
+  __asm __volatile (".word 0xcde04145"					      \
+		    : "+r" (__acev_tmp), "=m" (*__acev_mem)		      \
+		    : "r" (__acev_oldval), "m" (*__acev_mem),		      \
+		      "r" (__acev_mem) : "memory");			      \
+  (__typeof (oldval)) __acev_tmp; })
 
 #ifndef SHARED
 # define __v9_compare_and_exchange_val_32_acq(mem, newval, oldval) \
@@ -127,82 +150,31 @@  volatile unsigned char __sparc32_atomic_locks[64]
 		    : "r" (__acev_oldval), "m" (*__acev_mem),		      \
 		      "r" (__acev_mem) : "memory");			      \
   (__typeof (oldval)) __acev_tmp; })
-#endif
 
-/* The only basic operation needed is compare and exchange.  */
-#define __v7_compare_and_exchange_val_acq(mem, newval, oldval) \
-  ({ __typeof (mem) __acev_memp = (mem);			      \
-     __typeof (*mem) __acev_ret;				      \
-     __typeof (*mem) __acev_newval = (newval);			      \
-								      \
-     __sparc32_atomic_do_lock (__acev_memp);			      \
-     __acev_ret = *__acev_memp;					      \
-     if (__acev_ret == (oldval))				      \
-       *__acev_memp = __acev_newval;				      \
-     __sparc32_atomic_do_unlock (__acev_memp);			      \
-     __acev_ret; })
-
-#define __v7_compare_and_exchange_bool_acq(mem, newval, oldval) \
-  ({ __typeof (mem) __aceb_memp = (mem);			      \
-     int __aceb_ret;						      \
-     __typeof (*mem) __aceb_newval = (newval);			      \
-								      \
-     __sparc32_atomic_do_lock (__aceb_memp);			      \
-     __aceb_ret = 0;						      \
-     if (*__aceb_memp == (oldval))				      \
-       *__aceb_memp = __aceb_newval;				      \
-     else							      \
-       __aceb_ret = 1;						      \
-     __sparc32_atomic_do_unlock (__aceb_memp);			      \
-     __aceb_ret; })
-
-#define __v7_exchange_acq(mem, newval) \
-  ({ __typeof (mem) __acev_memp = (mem);			      \
-     __typeof (*mem) __acev_ret;				      \
-     __typeof (*mem) __acev_newval = (newval);			      \
-								      \
-     __sparc32_atomic_do_lock (__acev_memp);			      \
-     __acev_ret = *__acev_memp;					      \
-     *__acev_memp = __acev_newval;				      \
-     __sparc32_atomic_do_unlock (__acev_memp);			      \
-     __acev_ret; })
-
-#define __v7_exchange_and_add(mem, value) \
-  ({ __typeof (mem) __acev_memp = (mem);			      \
-     __typeof (*mem) __acev_ret;				      \
-								      \
-     __sparc32_atomic_do_lock (__acev_memp);			      \
-     __acev_ret = *__acev_memp;					      \
-     *__acev_memp = __acev_ret + (value);			      \
-     __sparc32_atomic_do_unlock (__acev_memp);			      \
-     __acev_ret; })
-
-/* Special versions, which guarantee that top 8 bits of all values
-   are cleared and use those bits as the ldstub lock.  */
-#define __v7_compare_and_exchange_val_24_acq(mem, newval, oldval) \
-  ({ __typeof (mem) __acev_memp = (mem);			      \
-     __typeof (*mem) __acev_ret;				      \
-     __typeof (*mem) __acev_newval = (newval);			      \
-								      \
-     __sparc32_atomic_do_lock24 (__acev_memp);			      \
-     __acev_ret = *__acev_memp & 0xffffff;			      \
-     if (__acev_ret == (oldval))				      \
-       *__acev_memp = __acev_newval;				      \
+# define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \
+  ({ __typeof (oldval) __acev_wret;				      \
+     if (__atomic_is_v9)					      \
+	 __acev_wret						      \
+	   = __v9_compare_and_exchange_val_32_acq (mem, newval,	      \
+						   oldval);	      \
      else							      \
-       __sparc32_atomic_do_unlock24 (__acev_memp);		      \
-     __asm __volatile ("" ::: "memory");			      \
-     __acev_ret; })
-
-#define __v7_exchange_24_rel(mem, newval) \
-  ({ __typeof (mem) __acev_memp = (mem);			      \
-     __typeof (*mem) __acev_ret;				      \
-     __typeof (*mem) __acev_newval = (newval);			      \
-								      \
-     __sparc32_atomic_do_lock24 (__acev_memp);			      \
-     __acev_ret = *__acev_memp & 0xffffff;			      \
-     *__acev_memp = __acev_newval;				      \
-     __asm __volatile ("" ::: "memory");			      \
-     __acev_ret; })
+	 __acev_wret						      \
+	   = __v7_compare_and_exchange_val_32_acq (mem, newval,	      \
+						   oldval);	      \
+     __acev_wret; })
+#else
+# define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \
+  __v7_compare_and_exchange_val_32_acq(mem, newval, oldval)
+#endif
+
+#define atomic_compare_and_exchange_val_24_acq(mem, newval, oldval) \
+  atomic_compare_and_exchange_val_acq (mem, newval, oldval)
+
+#define atomic_exchange_24_rel(mem, newval) \
+  atomic_exchange_rel (mem, newval)
+
+#define atomic_store_relaxed(mem, newval) \
+  do { (void) atomic_exchange_rel(mem, newval); } while (0)
 
 #ifdef SHARED
 
@@ -210,30 +182,6 @@  volatile unsigned char __sparc32_atomic_locks[64]
    used on pre-v9 CPU.  */
 # define __atomic_is_v9 0
 
-# define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \
-  __v7_compare_and_exchange_val_acq (mem, newval, oldval)
-
-# define atomic_compare_and_exchange_bool_acq(mem, newval, oldval) \
-  __v7_compare_and_exchange_bool_acq (mem, newval, oldval)
-
-# define atomic_exchange_acq(mem, newval) \
-  __v7_exchange_acq (mem, newval)
-
-# define atomic_exchange_and_add(mem, value) \
-  __v7_exchange_and_add (mem, value)
-
-# define atomic_compare_and_exchange_val_24_acq(mem, newval, oldval) \
-  ({								      \
-     if (sizeof (*mem) != 4)					      \
-       abort ();						      \
-     __v7_compare_and_exchange_val_24_acq (mem, newval, oldval); })
-
-# define atomic_exchange_24_rel(mem, newval) \
-  ({								      \
-     if (sizeof (*mem) != 4)					      \
-       abort ();						      \
-     __v7_exchange_24_rel (mem, newval); })
-
 # define atomic_full_barrier() __asm ("" ::: "memory")
 # define atomic_read_barrier() atomic_full_barrier ()
 # define atomic_write_barrier() atomic_full_barrier ()
@@ -250,82 +198,6 @@  extern uint64_t _dl_hwcap __attribute__((weak));
   (__builtin_expect (&_dl_hwcap != 0, 1) \
    && __builtin_expect (_dl_hwcap & HWCAP_SPARC_V9, HWCAP_SPARC_V9))
 
-# define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \
-  ({								      \
-     __typeof (*mem) __acev_wret;				      \
-     if (sizeof (*mem) != 4)					      \
-       abort ();						      \
-     if (__atomic_is_v9)					      \
-       __acev_wret						      \
-	 = __v9_compare_and_exchange_val_32_acq (mem, newval, oldval);\
-     else							      \
-       __acev_wret						      \
-	 = __v7_compare_and_exchange_val_acq (mem, newval, oldval);   \
-     __acev_wret; })
-
-# define atomic_compare_and_exchange_bool_acq(mem, newval, oldval) \
-  ({								      \
-     int __acev_wret;						      \
-     if (sizeof (*mem) != 4)					      \
-       abort ();						      \
-     if (__atomic_is_v9)					      \
-       {							      \
-	 __typeof (oldval) __acev_woldval = (oldval);		      \
-	 __acev_wret						      \
-	   = __v9_compare_and_exchange_val_32_acq (mem, newval,	      \
-						   __acev_woldval)    \
-	     != __acev_woldval;					      \
-       }							      \
-     else							      \
-       __acev_wret						      \
-	 = __v7_compare_and_exchange_bool_acq (mem, newval, oldval);  \
-     __acev_wret; })
-
-# define atomic_exchange_rel(mem, newval) \
-  ({								      \
-     __typeof (*mem) __acev_wret;				      \
-     if (sizeof (*mem) != 4)					      \
-       abort ();						      \
-     if (__atomic_is_v9)					      \
-       {							      \
-	 __typeof (mem) __acev_wmemp = (mem);			      \
-	 __typeof (*(mem)) __acev_wval = (newval);		      \
-	 do							      \
-	   __acev_wret = *__acev_wmemp;				      \
-	 while (__builtin_expect				      \
-		  (__v9_compare_and_exchange_val_32_acq (__acev_wmemp,\
-							 __acev_wval, \
-							 __acev_wret) \
-		   != __acev_wret, 0));				      \
-       }							      \
-     else							      \
-       __acev_wret = __v7_exchange_acq (mem, newval);		      \
-     __acev_wret; })
-
-# define atomic_compare_and_exchange_val_24_acq(mem, newval, oldval) \
-  ({								      \
-     __typeof (*mem) __acev_wret;				      \
-     if (sizeof (*mem) != 4)					      \
-       abort ();						      \
-     if (__atomic_is_v9)					      \
-       __acev_wret						      \
-	 = __v9_compare_and_exchange_val_32_acq (mem, newval, oldval);\
-     else							      \
-       __acev_wret						      \
-	 = __v7_compare_and_exchange_val_24_acq (mem, newval, oldval);\
-     __acev_wret; })
-
-# define atomic_exchange_24_rel(mem, newval) \
-  ({								      \
-     __typeof (*mem) __acev_w24ret;				      \
-     if (sizeof (*mem) != 4)					      \
-       abort ();						      \
-     if (__atomic_is_v9)					      \
-       __acev_w24ret = atomic_exchange_rel (mem, newval);	      \
-     else							      \
-       __acev_w24ret = __v7_exchange_24_rel (mem, newval);	      \
-     __acev_w24ret; })
-
 #define atomic_full_barrier()						\
   do {									\
      if (__atomic_is_v9)						\
@@ -355,6 +227,6 @@  extern uint64_t _dl_hwcap __attribute__((weak));
 
 #endif
 
-#include <sysdep.h>
+#include <sys/auxv.h>
 
 #endif	/* atomic-machine.h */
diff --git a/sysdeps/sparc/sparc32/pthread_barrier_wait.c b/sysdeps/sparc/sparc32/pthread_barrier_wait.c
deleted file mode 100644
index e5ef911..0000000
--- a/sysdeps/sparc/sparc32/pthread_barrier_wait.c
+++ /dev/null
@@ -1 +0,0 @@ 
-#error No support for pthread barriers on pre-v9 sparc.