[1/4] S390: Use C11-like atomics instead of plain memory accesses in lock elision code.
Commit Message
This uses atomic operations to access lock elision metadata that is accessed
concurrently (ie, adapt_count fields). The size of the data is less than a
word but accessed only with atomic loads and stores.
See also x86 commit ca6e601a9d4a72b3699cca15bad12ac1716bf49a:
"Use C11-like atomics instead of plain memory accesses in x86 lock elision."
ChangeLog:
* sysdeps/unix/sysv/linux/s390/elision-lock.c
(__lll_lock_elision): Use atomics to load / store adapt_count.
* sysdeps/unix/sysv/linux/s390/elision-trylock.c
(__lll_trylock_elision): Likewise.
---
sysdeps/unix/sysv/linux/s390/elision-lock.c | 25 ++++++++++++++++++-------
sysdeps/unix/sysv/linux/s390/elision-trylock.c | 14 +++++++++-----
2 files changed, 27 insertions(+), 12 deletions(-)
Comments
ping.
On 12/06/2016 02:51 PM, Stefan Liebler wrote:
> This uses atomic operations to access lock elision metadata that is accessed
> concurrently (ie, adapt_count fields). The size of the data is less than a
> word but accessed only with atomic loads and stores.
>
> See also x86 commit ca6e601a9d4a72b3699cca15bad12ac1716bf49a:
> "Use C11-like atomics instead of plain memory accesses in x86 lock elision."
>
> ChangeLog:
>
> * sysdeps/unix/sysv/linux/s390/elision-lock.c
> (__lll_lock_elision): Use atomics to load / store adapt_count.
> * sysdeps/unix/sysv/linux/s390/elision-trylock.c
> (__lll_trylock_elision): Likewise.
> ---
> sysdeps/unix/sysv/linux/s390/elision-lock.c | 25 ++++++++++++++++++-------
> sysdeps/unix/sysv/linux/s390/elision-trylock.c | 14 +++++++++-----
> 2 files changed, 27 insertions(+), 12 deletions(-)
>
> diff --git a/sysdeps/unix/sysv/linux/s390/elision-lock.c b/sysdeps/unix/sysv/linux/s390/elision-lock.c
> index ecb507e..1876d21 100644
> --- a/sysdeps/unix/sysv/linux/s390/elision-lock.c
> +++ b/sysdeps/unix/sysv/linux/s390/elision-lock.c
> @@ -45,11 +45,18 @@
> int
> __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
> {
> - if (*adapt_count > 0)
> + /* adapt_count can be accessed concurrently; these accesses can be both
> + inside of transactions (if critical sections are nested and the outer
> + critical section uses lock elision) and outside of transactions. Thus,
> + we need to use atomic accesses to avoid data races. However, the
> + value of adapt_count is just a hint, so relaxed MO accesses are
> + sufficient. */
> + if (atomic_load_relaxed (adapt_count) > 0)
> {
> /* Lost updates are possible, but harmless. Due to races this might lead
> to *adapt_count becoming less than zero. */
> - (*adapt_count)--;
> + atomic_store_relaxed (adapt_count,
> + atomic_load_relaxed (adapt_count) - 1);
> goto use_lock;
> }
>
> @@ -74,8 +81,10 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
> /* In a non-nested transaction there is no need to abort,
> which is expensive. */
> __builtin_tend ();
> + /* Don't try to use transactions for the next couple of times.
> + See above for why relaxed MO is sufficient. */
> if (aconf.skip_lock_busy > 0)
> - *adapt_count = aconf.skip_lock_busy;
> + atomic_store_relaxed (adapt_count, aconf.skip_lock_busy);
> goto use_lock;
> }
> else /* nesting depth is > 1 */
> @@ -101,18 +110,20 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
> /* A persistent abort (cc 1 or 3) indicates that a retry is
> probably futile. Use the normal locking now and for the
> next couple of calls.
> - Be careful to avoid writing to the lock. */
> + Be careful to avoid writing to the lock. See above for why
> + relaxed MO is sufficient. */
> if (aconf.skip_lock_internal_abort > 0)
> - *adapt_count = aconf.skip_lock_internal_abort;
> + atomic_store_relaxed (adapt_count,
> + aconf.skip_lock_internal_abort);
> goto use_lock;
> }
> }
> }
>
> /* Same logic as above, but for for a number of temporary failures in a
> - row. */
> + row. See above for why relaxed MO is sufficient. */
> if (aconf.skip_lock_out_of_tbegin_retries > 0 && aconf.try_tbegin > 0)
> - *adapt_count = aconf.skip_lock_out_of_tbegin_retries;
> + atomic_store_relaxed (adapt_count, aconf.skip_lock_out_of_tbegin_retries);
>
> use_lock:
> return LLL_LOCK ((*futex), private);
> diff --git a/sysdeps/unix/sysv/linux/s390/elision-trylock.c b/sysdeps/unix/sysv/linux/s390/elision-trylock.c
> index 3d5a994..a3252b8 100644
> --- a/sysdeps/unix/sysv/linux/s390/elision-trylock.c
> +++ b/sysdeps/unix/sysv/linux/s390/elision-trylock.c
> @@ -49,8 +49,10 @@ __lll_trylock_elision (int *futex, short *adapt_count)
> __builtin_tabort (_HTM_FIRST_USER_ABORT_CODE | 1);
> }
>
> - /* Only try a transaction if it's worth it. */
> - if (*adapt_count <= 0)
> + /* Only try a transaction if it's worth it. See __lll_lock_elision for
> + why we need atomic accesses. Relaxed MO is sufficient because this is
> + just a hint. */
> + if (atomic_load_relaxed (adapt_count) <= 0)
> {
> unsigned status;
>
> @@ -65,9 +67,10 @@ __lll_trylock_elision (int *futex, short *adapt_count)
> __builtin_tend ();
> /* Note: Changing the adapt_count here might abort a transaction on a
> different cpu, but that could happen anyway when the futex is
> - acquired, so there's no need to check the nesting depth here. */
> + acquired, so there's no need to check the nesting depth here.
> + See above for why relaxed MO is sufficient. */
> if (aconf.skip_lock_busy > 0)
> - *adapt_count = aconf.skip_lock_busy;
> + atomic_store_relaxed (adapt_count, aconf.skip_lock_busy);
> }
> else
> {
> @@ -87,7 +90,8 @@ __lll_trylock_elision (int *futex, short *adapt_count)
> {
> /* Lost updates are possible, but harmless. Due to races this might lead
> to *adapt_count becoming less than zero. */
> - (*adapt_count)--;
> + atomic_store_relaxed (adapt_count,
> + atomic_load_relaxed (adapt_count) - 1);
> }
>
> return lll_trylock (*futex);
>
On 12/13/2016 09:38 AM, Stefan Liebler wrote:
> ping.
>
Any comments, objections?
Otherwise I plan to commit this s390 specific patches next week.
On Tue, 2016-12-13 at 09:38 +0100, Stefan Liebler wrote:
> ping.
Sorry, I've been busy with the robust mutex bugs.
This patch is OK. Thanks.
On 12/16/2016 07:14 PM, Torvald Riegel wrote:
> On Tue, 2016-12-13 at 09:38 +0100, Stefan Liebler wrote:
>> ping.
>
> Sorry, I've been busy with the robust mutex bugs.
>
> This patch is OK. Thanks.
>
Thanks. Committed.
On Tue, 2016-12-20 at 15:28 +0100, Stefan Liebler wrote:
> On 12/16/2016 07:14 PM, Torvald Riegel wrote:
> > On Tue, 2016-12-13 at 09:38 +0100, Stefan Liebler wrote:
> >> ping.
> >
> > Sorry, I've been busy with the robust mutex bugs.
> >
> > This patch is OK. Thanks.
> >
> Thanks. Committed.
I've reviewed and ack'ed patch 1 of 4, but it looks like you committed
patches 2-4 as well. Did anyone review these?
On Thu, 2016-12-22 at 10:34 +0100, Torvald Riegel wrote:
> On Tue, 2016-12-20 at 15:28 +0100, Stefan Liebler wrote:
> > On 12/16/2016 07:14 PM, Torvald Riegel wrote:
> > > On Tue, 2016-12-13 at 09:38 +0100, Stefan Liebler wrote:
> > >> ping.
> > >
> > > Sorry, I've been busy with the robust mutex bugs.
> > >
> > > This patch is OK. Thanks.
> > >
> > Thanks. Committed.
>
> I've reviewed and ack'ed patch 1 of 4, but it looks like you committed
> patches 2-4 as well. Did anyone review these?
>
Ping.
On 12/22/2016 10:34 AM, Torvald Riegel wrote:
> On Tue, 2016-12-20 at 15:28 +0100, Stefan Liebler wrote:
>> On 12/16/2016 07:14 PM, Torvald Riegel wrote:
>>> On Tue, 2016-12-13 at 09:38 +0100, Stefan Liebler wrote:
>>>> ping.
>>>
>>> Sorry, I've been busy with the robust mutex bugs.
>>>
>>> This patch is OK. Thanks.
>>>
>> Thanks. Committed.
>
> I've reviewed and ack'ed patch 1 of 4, but it looks like you committed
> patches 2-4 as well. Did anyone review these?
>
Sorry. That was my fault.
Shall I revert the other patches and continue after the release?
On Wed, 2017-01-11 at 17:06 +0100, Stefan Liebler wrote:
> On 12/22/2016 10:34 AM, Torvald Riegel wrote:
> > On Tue, 2016-12-20 at 15:28 +0100, Stefan Liebler wrote:
> >> On 12/16/2016 07:14 PM, Torvald Riegel wrote:
> >>> On Tue, 2016-12-13 at 09:38 +0100, Stefan Liebler wrote:
> >>>> ping.
> >>>
> >>> Sorry, I've been busy with the robust mutex bugs.
> >>>
> >>> This patch is OK. Thanks.
> >>>
> >> Thanks. Committed.
> >
> > I've reviewed and ack'ed patch 1 of 4, but it looks like you committed
> > patches 2-4 as well. Did anyone review these?
> >
> Sorry. That was my fault.
> Shall I revert the other patches and continue after the release?
>
Let's do the normal review process first. I've reviewed the other 3 by
now; maybe we can just fix them up where necessary. Sounds good?
@@ -45,11 +45,18 @@
int
__lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
{
- if (*adapt_count > 0)
+ /* adapt_count can be accessed concurrently; these accesses can be both
+ inside of transactions (if critical sections are nested and the outer
+ critical section uses lock elision) and outside of transactions. Thus,
+ we need to use atomic accesses to avoid data races. However, the
+ value of adapt_count is just a hint, so relaxed MO accesses are
+ sufficient. */
+ if (atomic_load_relaxed (adapt_count) > 0)
{
/* Lost updates are possible, but harmless. Due to races this might lead
to *adapt_count becoming less than zero. */
- (*adapt_count)--;
+ atomic_store_relaxed (adapt_count,
+ atomic_load_relaxed (adapt_count) - 1);
goto use_lock;
}
@@ -74,8 +81,10 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
/* In a non-nested transaction there is no need to abort,
which is expensive. */
__builtin_tend ();
+ /* Don't try to use transactions for the next couple of times.
+ See above for why relaxed MO is sufficient. */
if (aconf.skip_lock_busy > 0)
- *adapt_count = aconf.skip_lock_busy;
+ atomic_store_relaxed (adapt_count, aconf.skip_lock_busy);
goto use_lock;
}
else /* nesting depth is > 1 */
@@ -101,18 +110,20 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
/* A persistent abort (cc 1 or 3) indicates that a retry is
probably futile. Use the normal locking now and for the
next couple of calls.
- Be careful to avoid writing to the lock. */
+ Be careful to avoid writing to the lock. See above for why
+ relaxed MO is sufficient. */
if (aconf.skip_lock_internal_abort > 0)
- *adapt_count = aconf.skip_lock_internal_abort;
+ atomic_store_relaxed (adapt_count,
+ aconf.skip_lock_internal_abort);
goto use_lock;
}
}
}
/* Same logic as above, but for for a number of temporary failures in a
- row. */
+ row. See above for why relaxed MO is sufficient. */
if (aconf.skip_lock_out_of_tbegin_retries > 0 && aconf.try_tbegin > 0)
- *adapt_count = aconf.skip_lock_out_of_tbegin_retries;
+ atomic_store_relaxed (adapt_count, aconf.skip_lock_out_of_tbegin_retries);
use_lock:
return LLL_LOCK ((*futex), private);
@@ -49,8 +49,10 @@ __lll_trylock_elision (int *futex, short *adapt_count)
__builtin_tabort (_HTM_FIRST_USER_ABORT_CODE | 1);
}
- /* Only try a transaction if it's worth it. */
- if (*adapt_count <= 0)
+ /* Only try a transaction if it's worth it. See __lll_lock_elision for
+ why we need atomic accesses. Relaxed MO is sufficient because this is
+ just a hint. */
+ if (atomic_load_relaxed (adapt_count) <= 0)
{
unsigned status;
@@ -65,9 +67,10 @@ __lll_trylock_elision (int *futex, short *adapt_count)
__builtin_tend ();
/* Note: Changing the adapt_count here might abort a transaction on a
different cpu, but that could happen anyway when the futex is
- acquired, so there's no need to check the nesting depth here. */
+ acquired, so there's no need to check the nesting depth here.
+ See above for why relaxed MO is sufficient. */
if (aconf.skip_lock_busy > 0)
- *adapt_count = aconf.skip_lock_busy;
+ atomic_store_relaxed (adapt_count, aconf.skip_lock_busy);
}
else
{
@@ -87,7 +90,8 @@ __lll_trylock_elision (int *futex, short *adapt_count)
{
/* Lost updates are possible, but harmless. Due to races this might lead
to *adapt_count becoming less than zero. */
- (*adapt_count)--;
+ atomic_store_relaxed (adapt_count,
+ atomic_load_relaxed (adapt_count) - 1);
}
return lll_trylock (*futex);