x86: Use RTM intrinsics in pthread mutex lock elision
Commit Message
Since RTM intrinsics are supported in GCC 4.9, we can use them in
pthread mutex lock elision.
* sysdeps/unix/sysv/linux/x86/Makefile (CFLAGS-elision-lock.c):
Add -mrtm.
(CFLAGS-elision-unlock.c): Likewise.
(CFLAGS-elision-timed.c): Likewise.
(CFLAGS-elision-trylock.c): Likewise.
* sysdeps/unix/sysv/linux/x86/hle.h: Rewritten.
---
sysdeps/unix/sysv/linux/x86/Makefile | 4 ++
sysdeps/unix/sysv/linux/x86/hle.h | 70 ++--------------------------
2 files changed, 7 insertions(+), 67 deletions(-)
Comments
On 01/10/2018 19:08, H.J. Lu wrote:
> Since RTM intrinsics are supported in GCC 4.9, we can use them in
> pthread mutex lock elision.
>
> * sysdeps/unix/sysv/linux/x86/Makefile (CFLAGS-elision-lock.c):
> Add -mrtm.
> (CFLAGS-elision-unlock.c): Likewise.
> (CFLAGS-elision-timed.c): Likewise.
> (CFLAGS-elision-trylock.c): Likewise.
> * sysdeps/unix/sysv/linux/x86/hle.h: Rewritten.
LGTM, thanks.
> ---
> sysdeps/unix/sysv/linux/x86/Makefile | 4 ++
> sysdeps/unix/sysv/linux/x86/hle.h | 70 ++--------------------------
> 2 files changed, 7 insertions(+), 67 deletions(-)
>
> diff --git a/sysdeps/unix/sysv/linux/x86/Makefile b/sysdeps/unix/sysv/linux/x86/Makefile
> index 7dc4e61756..02ca36c6d2 100644
> --- a/sysdeps/unix/sysv/linux/x86/Makefile
> +++ b/sysdeps/unix/sysv/linux/x86/Makefile
> @@ -14,6 +14,10 @@ endif
> ifeq ($(subdir),nptl)
> libpthread-sysdep_routines += elision-lock elision-unlock elision-timed \
> elision-trylock
> +CFLAGS-elision-lock.c += -mrtm
> +CFLAGS-elision-unlock.c += -mrtm
> +CFLAGS-elision-timed.c += -mrtm
> +CFLAGS-elision-trylock.c += -mrtm
> endif
>
> ifeq ($(subdir),elf)
> diff --git a/sysdeps/unix/sysv/linux/x86/hle.h b/sysdeps/unix/sysv/linux/x86/hle.h
> index 4a7b9e3bf7..0449026839 100644
> --- a/sysdeps/unix/sysv/linux/x86/hle.h
> +++ b/sysdeps/unix/sysv/linux/x86/hle.h
> @@ -1,75 +1,11 @@
> -/* Shared RTM header. Emulate TSX intrinsics for compilers and assemblers
> - that do not support the intrinsics and instructions yet. */
> +/* Shared RTM header. */
> #ifndef _HLE_H
> #define _HLE_H 1
>
> -#ifdef __ASSEMBLER__
> +#include <x86intrin.h>
Is it used in any configuration in assembly code?
On Tue, Oct 2, 2018 at 6:06 AM Adhemerval Zanella
<adhemerval.zanella@linaro.org> wrote:
>
>
>
> On 01/10/2018 19:08, H.J. Lu wrote:
> > Since RTM intrinsics are supported in GCC 4.9, we can use them in
> > pthread mutex lock elision.
> >
> > * sysdeps/unix/sysv/linux/x86/Makefile (CFLAGS-elision-lock.c):
> > Add -mrtm.
> > (CFLAGS-elision-unlock.c): Likewise.
> > (CFLAGS-elision-timed.c): Likewise.
> > (CFLAGS-elision-trylock.c): Likewise.
> > * sysdeps/unix/sysv/linux/x86/hle.h: Rewritten.
>
> LGTM, thanks.
>
> > ---
> > sysdeps/unix/sysv/linux/x86/Makefile | 4 ++
> > sysdeps/unix/sysv/linux/x86/hle.h | 70 ++--------------------------
> > 2 files changed, 7 insertions(+), 67 deletions(-)
> >
> > diff --git a/sysdeps/unix/sysv/linux/x86/Makefile b/sysdeps/unix/sysv/linux/x86/Makefile
> > index 7dc4e61756..02ca36c6d2 100644
> > --- a/sysdeps/unix/sysv/linux/x86/Makefile
> > +++ b/sysdeps/unix/sysv/linux/x86/Makefile
> > @@ -14,6 +14,10 @@ endif
> > ifeq ($(subdir),nptl)
> > libpthread-sysdep_routines += elision-lock elision-unlock elision-timed \
> > elision-trylock
> > +CFLAGS-elision-lock.c += -mrtm
> > +CFLAGS-elision-unlock.c += -mrtm
> > +CFLAGS-elision-timed.c += -mrtm
> > +CFLAGS-elision-trylock.c += -mrtm
> > endif
> >
> > ifeq ($(subdir),elf)
> > diff --git a/sysdeps/unix/sysv/linux/x86/hle.h b/sysdeps/unix/sysv/linux/x86/hle.h
> > index 4a7b9e3bf7..0449026839 100644
> > --- a/sysdeps/unix/sysv/linux/x86/hle.h
> > +++ b/sysdeps/unix/sysv/linux/x86/hle.h
> > @@ -1,75 +1,11 @@
> > -/* Shared RTM header. Emulate TSX intrinsics for compilers and assemblers
> > - that do not support the intrinsics and instructions yet. */
> > +/* Shared RTM header. */
> > #ifndef _HLE_H
> > #define _HLE_H 1
> >
> > -#ifdef __ASSEMBLER__
> > +#include <x86intrin.h>
>
> Is it used in any configuration in assembly code?
No:
sysdeps/unix/sysv/linux/x86/elision-lock.c:#include "hle.h"
sysdeps/unix/sysv/linux/x86/elision-trylock.c:#include "hle.h"
sysdeps/unix/sysv/linux/x86/elision-unlock.c:#include "hle.h"
sysdeps/x86/elide.h:#include <hle.h>
BTW, elide.h isn't used anywhere.
On 02/10/2018 10:49, H.J. Lu wrote:
> On Tue, Oct 2, 2018 at 6:06 AM Adhemerval Zanella
> <adhemerval.zanella@linaro.org> wrote:
>>
>>
>>
>> On 01/10/2018 19:08, H.J. Lu wrote:
>>> Since RTM intrinsics are supported in GCC 4.9, we can use them in
>>> pthread mutex lock elision.
>>>
>>> * sysdeps/unix/sysv/linux/x86/Makefile (CFLAGS-elision-lock.c):
>>> Add -mrtm.
>>> (CFLAGS-elision-unlock.c): Likewise.
>>> (CFLAGS-elision-timed.c): Likewise.
>>> (CFLAGS-elision-trylock.c): Likewise.
>>> * sysdeps/unix/sysv/linux/x86/hle.h: Rewritten.
>>
>> LGTM, thanks.
>>
>>> ---
>>> sysdeps/unix/sysv/linux/x86/Makefile | 4 ++
>>> sysdeps/unix/sysv/linux/x86/hle.h | 70 ++--------------------------
>>> 2 files changed, 7 insertions(+), 67 deletions(-)
>>>
>>> diff --git a/sysdeps/unix/sysv/linux/x86/Makefile b/sysdeps/unix/sysv/linux/x86/Makefile
>>> index 7dc4e61756..02ca36c6d2 100644
>>> --- a/sysdeps/unix/sysv/linux/x86/Makefile
>>> +++ b/sysdeps/unix/sysv/linux/x86/Makefile
>>> @@ -14,6 +14,10 @@ endif
>>> ifeq ($(subdir),nptl)
>>> libpthread-sysdep_routines += elision-lock elision-unlock elision-timed \
>>> elision-trylock
>>> +CFLAGS-elision-lock.c += -mrtm
>>> +CFLAGS-elision-unlock.c += -mrtm
>>> +CFLAGS-elision-timed.c += -mrtm
>>> +CFLAGS-elision-trylock.c += -mrtm
>>> endif
>>>
>>> ifeq ($(subdir),elf)
>>> diff --git a/sysdeps/unix/sysv/linux/x86/hle.h b/sysdeps/unix/sysv/linux/x86/hle.h
>>> index 4a7b9e3bf7..0449026839 100644
>>> --- a/sysdeps/unix/sysv/linux/x86/hle.h
>>> +++ b/sysdeps/unix/sysv/linux/x86/hle.h
>>> @@ -1,75 +1,11 @@
>>> -/* Shared RTM header. Emulate TSX intrinsics for compilers and assemblers
>>> - that do not support the intrinsics and instructions yet. */
>>> +/* Shared RTM header. */
>>> #ifndef _HLE_H
>>> #define _HLE_H 1
>>>
>>> -#ifdef __ASSEMBLER__
>>> +#include <x86intrin.h>
>>
>> Is it used in any configuration in assembly code?
>
> No:
>
> sysdeps/unix/sysv/linux/x86/elision-lock.c:#include "hle.h"
> sysdeps/unix/sysv/linux/x86/elision-trylock.c:#include "hle.h"
> sysdeps/unix/sysv/linux/x86/elision-unlock.c:#include "hle.h"
> sysdeps/x86/elide.h:#include <hle.h>
>
> BTW, elide.h isn't used anywhere.
>
It was used by HTM lock elision on pthread_rwlock_* before new implementation
(cc25c8b4c1196a8c29e9a45b1e096b99a87b7f8c). Andrew Senkevich has sent a
patch [1] to re-enable HTM rwlock elision, but his own performance results
seem mixed.
I think we can safely remove all elide.h files from now.
[1] https://sourceware.org/ml/libc-alpha/2017-04/msg00067.html
@@ -14,6 +14,10 @@ endif
ifeq ($(subdir),nptl)
libpthread-sysdep_routines += elision-lock elision-unlock elision-timed \
elision-trylock
+CFLAGS-elision-lock.c += -mrtm
+CFLAGS-elision-unlock.c += -mrtm
+CFLAGS-elision-timed.c += -mrtm
+CFLAGS-elision-trylock.c += -mrtm
endif
ifeq ($(subdir),elf)
@@ -1,75 +1,11 @@
-/* Shared RTM header. Emulate TSX intrinsics for compilers and assemblers
- that do not support the intrinsics and instructions yet. */
+/* Shared RTM header. */
#ifndef _HLE_H
#define _HLE_H 1
-#ifdef __ASSEMBLER__
+#include <x86intrin.h>
-.macro XBEGIN target
- .byte 0xc7,0xf8
- .long \target-1f
-1:
-.endm
-
-.macro XEND
- .byte 0x0f,0x01,0xd5
-.endm
-
-.macro XABORT code
- .byte 0xc6,0xf8,\code
-.endm
-
-.macro XTEST
- .byte 0x0f,0x01,0xd6
-.endm
-
-#endif
-
-/* Official RTM intrinsics interface matching gcc/icc, but works
- on older gcc compatible compilers and binutils.
- We should somehow detect if the compiler supports it, because
- it may be able to generate slightly better code. */
-
-#define _XBEGIN_STARTED (~0u)
-#define _XABORT_EXPLICIT (1 << 0)
-#define _XABORT_RETRY (1 << 1)
-#define _XABORT_CONFLICT (1 << 2)
-#define _XABORT_CAPACITY (1 << 3)
-#define _XABORT_DEBUG (1 << 4)
-#define _XABORT_NESTED (1 << 5)
-#define _XABORT_CODE(x) (((x) >> 24) & 0xff)
-
-#define _ABORT_LOCK_BUSY 0xff
+#define _ABORT_LOCK_BUSY 0xff
#define _ABORT_LOCK_IS_LOCKED 0xfe
#define _ABORT_NESTED_TRYLOCK 0xfd
-#ifndef __ASSEMBLER__
-
-#define __force_inline __attribute__((__always_inline__)) inline
-
-static __force_inline int _xbegin(void)
-{
- int ret = _XBEGIN_STARTED;
- asm volatile (".byte 0xc7,0xf8 ; .long 0" : "+a" (ret) :: "memory");
- return ret;
-}
-
-static __force_inline void _xend(void)
-{
- asm volatile (".byte 0x0f,0x01,0xd5" ::: "memory");
-}
-
-static __force_inline void _xabort(const unsigned int status)
-{
- asm volatile (".byte 0xc6,0xf8,%P0" :: "i" (status) : "memory");
-}
-
-static __force_inline int _xtest(void)
-{
- unsigned char out;
- asm volatile (".byte 0x0f,0x01,0xd6 ; setnz %0" : "=r" (out) :: "memory");
- return out;
-}
-
-#endif
#endif