[1/4] S390: Use C11-like atomics instead of plain memory accesses in lock elision code.

Message ID 1481032315-12420-1-git-send-email-stli@linux.vnet.ibm.com
State Committed
Headers

Commit Message

Stefan Liebler Dec. 6, 2016, 1:51 p.m. UTC
  This uses atomic operations to access lock elision metadata that is accessed
concurrently (ie, adapt_count fields).  The size of the data is less than a
word but accessed only with atomic loads and stores.

See also x86 commit ca6e601a9d4a72b3699cca15bad12ac1716bf49a:
"Use C11-like atomics instead of plain memory accesses in x86 lock elision."

ChangeLog:

	* sysdeps/unix/sysv/linux/s390/elision-lock.c
	(__lll_lock_elision): Use atomics to load / store adapt_count.
	* sysdeps/unix/sysv/linux/s390/elision-trylock.c
	(__lll_trylock_elision): Likewise.
---
 sysdeps/unix/sysv/linux/s390/elision-lock.c    | 25 ++++++++++++++++++-------
 sysdeps/unix/sysv/linux/s390/elision-trylock.c | 14 +++++++++-----
 2 files changed, 27 insertions(+), 12 deletions(-)
  

Comments

Stefan Liebler Dec. 13, 2016, 8:38 a.m. UTC | #1
ping.

On 12/06/2016 02:51 PM, Stefan Liebler wrote:
> This uses atomic operations to access lock elision metadata that is accessed
> concurrently (ie, adapt_count fields).  The size of the data is less than a
> word but accessed only with atomic loads and stores.
>
> See also x86 commit ca6e601a9d4a72b3699cca15bad12ac1716bf49a:
> "Use C11-like atomics instead of plain memory accesses in x86 lock elision."
>
> ChangeLog:
>
> 	* sysdeps/unix/sysv/linux/s390/elision-lock.c
> 	(__lll_lock_elision): Use atomics to load / store adapt_count.
> 	* sysdeps/unix/sysv/linux/s390/elision-trylock.c
> 	(__lll_trylock_elision): Likewise.
> ---
>  sysdeps/unix/sysv/linux/s390/elision-lock.c    | 25 ++++++++++++++++++-------
>  sysdeps/unix/sysv/linux/s390/elision-trylock.c | 14 +++++++++-----
>  2 files changed, 27 insertions(+), 12 deletions(-)
>
> diff --git a/sysdeps/unix/sysv/linux/s390/elision-lock.c b/sysdeps/unix/sysv/linux/s390/elision-lock.c
> index ecb507e..1876d21 100644
> --- a/sysdeps/unix/sysv/linux/s390/elision-lock.c
> +++ b/sysdeps/unix/sysv/linux/s390/elision-lock.c
> @@ -45,11 +45,18 @@
>  int
>  __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
>  {
> -  if (*adapt_count > 0)
> +  /* adapt_count can be accessed concurrently; these accesses can be both
> +     inside of transactions (if critical sections are nested and the outer
> +     critical section uses lock elision) and outside of transactions.  Thus,
> +     we need to use atomic accesses to avoid data races.  However, the
> +     value of adapt_count is just a hint, so relaxed MO accesses are
> +     sufficient.  */
> +  if (atomic_load_relaxed (adapt_count) > 0)
>      {
>        /* Lost updates are possible, but harmless.  Due to races this might lead
>  	 to *adapt_count becoming less than zero.  */
> -      (*adapt_count)--;
> +      atomic_store_relaxed (adapt_count,
> +			    atomic_load_relaxed (adapt_count) - 1);
>        goto use_lock;
>      }
>
> @@ -74,8 +81,10 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
>  	      /* In a non-nested transaction there is no need to abort,
>  		 which is expensive.  */
>  	      __builtin_tend ();
> +	      /* Don't try to use transactions for the next couple of times.
> +		 See above for why relaxed MO is sufficient.  */
>  	      if (aconf.skip_lock_busy > 0)
> -		*adapt_count = aconf.skip_lock_busy;
> +		atomic_store_relaxed (adapt_count, aconf.skip_lock_busy);
>  	      goto use_lock;
>  	    }
>  	  else /* nesting depth is > 1 */
> @@ -101,18 +110,20 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
>  	      /* A persistent abort (cc 1 or 3) indicates that a retry is
>  		 probably futile.  Use the normal locking now and for the
>  		 next couple of calls.
> -		 Be careful to avoid writing to the lock.  */
> +		 Be careful to avoid writing to the lock.  See above for why
> +		 relaxed MO is sufficient.  */
>  	      if (aconf.skip_lock_internal_abort > 0)
> -		*adapt_count = aconf.skip_lock_internal_abort;
> +		atomic_store_relaxed (adapt_count,
> +				      aconf.skip_lock_internal_abort);
>  	      goto use_lock;
>  	    }
>  	}
>      }
>
>    /* Same logic as above, but for for a number of temporary failures in a
> -     row.  */
> +     row.  See above for why relaxed MO is sufficient.  */
>    if (aconf.skip_lock_out_of_tbegin_retries > 0 && aconf.try_tbegin > 0)
> -    *adapt_count = aconf.skip_lock_out_of_tbegin_retries;
> +    atomic_store_relaxed (adapt_count, aconf.skip_lock_out_of_tbegin_retries);
>
>    use_lock:
>    return LLL_LOCK ((*futex), private);
> diff --git a/sysdeps/unix/sysv/linux/s390/elision-trylock.c b/sysdeps/unix/sysv/linux/s390/elision-trylock.c
> index 3d5a994..a3252b8 100644
> --- a/sysdeps/unix/sysv/linux/s390/elision-trylock.c
> +++ b/sysdeps/unix/sysv/linux/s390/elision-trylock.c
> @@ -49,8 +49,10 @@ __lll_trylock_elision (int *futex, short *adapt_count)
>        __builtin_tabort (_HTM_FIRST_USER_ABORT_CODE | 1);
>      }
>
> -  /* Only try a transaction if it's worth it.  */
> -  if (*adapt_count <= 0)
> +  /* Only try a transaction if it's worth it.  See __lll_lock_elision for
> +     why we need atomic accesses.  Relaxed MO is sufficient because this is
> +     just a hint.  */
> +  if (atomic_load_relaxed (adapt_count) <= 0)
>      {
>        unsigned status;
>
> @@ -65,9 +67,10 @@ __lll_trylock_elision (int *futex, short *adapt_count)
>  	  __builtin_tend ();
>  	  /* Note: Changing the adapt_count here might abort a transaction on a
>  	     different cpu, but that could happen anyway when the futex is
> -	     acquired, so there's no need to check the nesting depth here.  */
> +	     acquired, so there's no need to check the nesting depth here.
> +	     See above for why relaxed MO is sufficient.  */
>  	  if (aconf.skip_lock_busy > 0)
> -	    *adapt_count = aconf.skip_lock_busy;
> +	    atomic_store_relaxed (adapt_count, aconf.skip_lock_busy);
>  	}
>        else
>  	{
> @@ -87,7 +90,8 @@ __lll_trylock_elision (int *futex, short *adapt_count)
>      {
>        /* Lost updates are possible, but harmless.  Due to races this might lead
>  	 to *adapt_count becoming less than zero.  */
> -      (*adapt_count)--;
> +      atomic_store_relaxed (adapt_count,
> +			    atomic_load_relaxed (adapt_count) - 1);
>      }
>
>    return lll_trylock (*futex);
>
  
Stefan Liebler Dec. 16, 2016, 5:12 p.m. UTC | #2
On 12/13/2016 09:38 AM, Stefan Liebler wrote:
> ping.
>
Any comments, objections?
Otherwise I plan to commit this s390 specific patches next week.
  
Torvald Riegel Dec. 16, 2016, 6:14 p.m. UTC | #3
On Tue, 2016-12-13 at 09:38 +0100, Stefan Liebler wrote:
> ping.

Sorry, I've been busy with the robust mutex bugs.

This patch is OK.  Thanks.
  
Stefan Liebler Dec. 20, 2016, 2:28 p.m. UTC | #4
On 12/16/2016 07:14 PM, Torvald Riegel wrote:
> On Tue, 2016-12-13 at 09:38 +0100, Stefan Liebler wrote:
>> ping.
>
> Sorry, I've been busy with the robust mutex bugs.
>
> This patch is OK.  Thanks.
>
Thanks. Committed.
  
Torvald Riegel Dec. 22, 2016, 9:34 a.m. UTC | #5
On Tue, 2016-12-20 at 15:28 +0100, Stefan Liebler wrote:
> On 12/16/2016 07:14 PM, Torvald Riegel wrote:
> > On Tue, 2016-12-13 at 09:38 +0100, Stefan Liebler wrote:
> >> ping.
> >
> > Sorry, I've been busy with the robust mutex bugs.
> >
> > This patch is OK.  Thanks.
> >
> Thanks. Committed.

I've reviewed and ack'ed patch 1 of 4, but it looks like you committed
patches 2-4 as well.  Did anyone review these?
  
Torvald Riegel Jan. 10, 2017, 11:34 a.m. UTC | #6
On Thu, 2016-12-22 at 10:34 +0100, Torvald Riegel wrote:
> On Tue, 2016-12-20 at 15:28 +0100, Stefan Liebler wrote:
> > On 12/16/2016 07:14 PM, Torvald Riegel wrote:
> > > On Tue, 2016-12-13 at 09:38 +0100, Stefan Liebler wrote:
> > >> ping.
> > >
> > > Sorry, I've been busy with the robust mutex bugs.
> > >
> > > This patch is OK.  Thanks.
> > >
> > Thanks. Committed.
> 
> I've reviewed and ack'ed patch 1 of 4, but it looks like you committed
> patches 2-4 as well.  Did anyone review these?
> 

Ping.
  
Stefan Liebler Jan. 11, 2017, 4:06 p.m. UTC | #7
On 12/22/2016 10:34 AM, Torvald Riegel wrote:
> On Tue, 2016-12-20 at 15:28 +0100, Stefan Liebler wrote:
>> On 12/16/2016 07:14 PM, Torvald Riegel wrote:
>>> On Tue, 2016-12-13 at 09:38 +0100, Stefan Liebler wrote:
>>>> ping.
>>>
>>> Sorry, I've been busy with the robust mutex bugs.
>>>
>>> This patch is OK.  Thanks.
>>>
>> Thanks. Committed.
>
> I've reviewed and ack'ed patch 1 of 4, but it looks like you committed
> patches 2-4 as well.  Did anyone review these?
>
Sorry. That was my fault.
Shall I revert the other patches and continue after the release?
  
Torvald Riegel Jan. 12, 2017, 9:43 a.m. UTC | #8
On Wed, 2017-01-11 at 17:06 +0100, Stefan Liebler wrote:
> On 12/22/2016 10:34 AM, Torvald Riegel wrote:
> > On Tue, 2016-12-20 at 15:28 +0100, Stefan Liebler wrote:
> >> On 12/16/2016 07:14 PM, Torvald Riegel wrote:
> >>> On Tue, 2016-12-13 at 09:38 +0100, Stefan Liebler wrote:
> >>>> ping.
> >>>
> >>> Sorry, I've been busy with the robust mutex bugs.
> >>>
> >>> This patch is OK.  Thanks.
> >>>
> >> Thanks. Committed.
> >
> > I've reviewed and ack'ed patch 1 of 4, but it looks like you committed
> > patches 2-4 as well.  Did anyone review these?
> >
> Sorry. That was my fault.
> Shall I revert the other patches and continue after the release?
> 

Let's do the normal review process first.  I've reviewed the other 3 by
now; maybe we can just fix them up where necessary.  Sounds good?
  

Patch

diff --git a/sysdeps/unix/sysv/linux/s390/elision-lock.c b/sysdeps/unix/sysv/linux/s390/elision-lock.c
index ecb507e..1876d21 100644
--- a/sysdeps/unix/sysv/linux/s390/elision-lock.c
+++ b/sysdeps/unix/sysv/linux/s390/elision-lock.c
@@ -45,11 +45,18 @@ 
 int
 __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
 {
-  if (*adapt_count > 0)
+  /* adapt_count can be accessed concurrently; these accesses can be both
+     inside of transactions (if critical sections are nested and the outer
+     critical section uses lock elision) and outside of transactions.  Thus,
+     we need to use atomic accesses to avoid data races.  However, the
+     value of adapt_count is just a hint, so relaxed MO accesses are
+     sufficient.  */
+  if (atomic_load_relaxed (adapt_count) > 0)
     {
       /* Lost updates are possible, but harmless.  Due to races this might lead
 	 to *adapt_count becoming less than zero.  */
-      (*adapt_count)--;
+      atomic_store_relaxed (adapt_count,
+			    atomic_load_relaxed (adapt_count) - 1);
       goto use_lock;
     }
 
@@ -74,8 +81,10 @@  __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
 	      /* In a non-nested transaction there is no need to abort,
 		 which is expensive.  */
 	      __builtin_tend ();
+	      /* Don't try to use transactions for the next couple of times.
+		 See above for why relaxed MO is sufficient.  */
 	      if (aconf.skip_lock_busy > 0)
-		*adapt_count = aconf.skip_lock_busy;
+		atomic_store_relaxed (adapt_count, aconf.skip_lock_busy);
 	      goto use_lock;
 	    }
 	  else /* nesting depth is > 1 */
@@ -101,18 +110,20 @@  __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
 	      /* A persistent abort (cc 1 or 3) indicates that a retry is
 		 probably futile.  Use the normal locking now and for the
 		 next couple of calls.
-		 Be careful to avoid writing to the lock.  */
+		 Be careful to avoid writing to the lock.  See above for why
+		 relaxed MO is sufficient.  */
 	      if (aconf.skip_lock_internal_abort > 0)
-		*adapt_count = aconf.skip_lock_internal_abort;
+		atomic_store_relaxed (adapt_count,
+				      aconf.skip_lock_internal_abort);
 	      goto use_lock;
 	    }
 	}
     }
 
   /* Same logic as above, but for for a number of temporary failures in a
-     row.  */
+     row.  See above for why relaxed MO is sufficient.  */
   if (aconf.skip_lock_out_of_tbegin_retries > 0 && aconf.try_tbegin > 0)
-    *adapt_count = aconf.skip_lock_out_of_tbegin_retries;
+    atomic_store_relaxed (adapt_count, aconf.skip_lock_out_of_tbegin_retries);
 
   use_lock:
   return LLL_LOCK ((*futex), private);
diff --git a/sysdeps/unix/sysv/linux/s390/elision-trylock.c b/sysdeps/unix/sysv/linux/s390/elision-trylock.c
index 3d5a994..a3252b8 100644
--- a/sysdeps/unix/sysv/linux/s390/elision-trylock.c
+++ b/sysdeps/unix/sysv/linux/s390/elision-trylock.c
@@ -49,8 +49,10 @@  __lll_trylock_elision (int *futex, short *adapt_count)
       __builtin_tabort (_HTM_FIRST_USER_ABORT_CODE | 1);
     }
 
-  /* Only try a transaction if it's worth it.  */
-  if (*adapt_count <= 0)
+  /* Only try a transaction if it's worth it.  See __lll_lock_elision for
+     why we need atomic accesses.  Relaxed MO is sufficient because this is
+     just a hint.  */
+  if (atomic_load_relaxed (adapt_count) <= 0)
     {
       unsigned status;
 
@@ -65,9 +67,10 @@  __lll_trylock_elision (int *futex, short *adapt_count)
 	  __builtin_tend ();
 	  /* Note: Changing the adapt_count here might abort a transaction on a
 	     different cpu, but that could happen anyway when the futex is
-	     acquired, so there's no need to check the nesting depth here.  */
+	     acquired, so there's no need to check the nesting depth here.
+	     See above for why relaxed MO is sufficient.  */
 	  if (aconf.skip_lock_busy > 0)
-	    *adapt_count = aconf.skip_lock_busy;
+	    atomic_store_relaxed (adapt_count, aconf.skip_lock_busy);
 	}
       else
 	{
@@ -87,7 +90,8 @@  __lll_trylock_elision (int *futex, short *adapt_count)
     {
       /* Lost updates are possible, but harmless.  Due to races this might lead
 	 to *adapt_count becoming less than zero.  */
-      (*adapt_count)--;
+      atomic_store_relaxed (adapt_count,
+			    atomic_load_relaxed (adapt_count) - 1);
     }
 
   return lll_trylock (*futex);