Patchwork x86: Use RTM intrinsics in pthread mutex lock elision

login
register
mail settings
Submitter H.J. Lu
Date Oct. 1, 2018, 10:08 p.m.
Message ID <20181001220831.7420-1-hjl.tools@gmail.com>
Download mbox | patch
Permalink /patch/29604/
State New
Headers show

Comments

H.J. Lu - Oct. 1, 2018, 10:08 p.m.
Since RTM intrinsics are supported in GCC 4.9, we can use them in
pthread mutex lock elision.

	* sysdeps/unix/sysv/linux/x86/Makefile (CFLAGS-elision-lock.c):
	Add -mrtm.
	(CFLAGS-elision-unlock.c): Likewise.
	(CFLAGS-elision-timed.c): Likewise.
	(CFLAGS-elision-trylock.c): Likewise.
	* sysdeps/unix/sysv/linux/x86/hle.h: Rewritten.
---
 sysdeps/unix/sysv/linux/x86/Makefile |  4 ++
 sysdeps/unix/sysv/linux/x86/hle.h    | 70 ++--------------------------
 2 files changed, 7 insertions(+), 67 deletions(-)
Adhemerval Zanella Netto - Oct. 2, 2018, 1:06 p.m.
On 01/10/2018 19:08, H.J. Lu wrote:
> Since RTM intrinsics are supported in GCC 4.9, we can use them in
> pthread mutex lock elision.
> 
> 	* sysdeps/unix/sysv/linux/x86/Makefile (CFLAGS-elision-lock.c):
> 	Add -mrtm.
> 	(CFLAGS-elision-unlock.c): Likewise.
> 	(CFLAGS-elision-timed.c): Likewise.
> 	(CFLAGS-elision-trylock.c): Likewise.
> 	* sysdeps/unix/sysv/linux/x86/hle.h: Rewritten.

LGTM, thanks.

> ---
>  sysdeps/unix/sysv/linux/x86/Makefile |  4 ++
>  sysdeps/unix/sysv/linux/x86/hle.h    | 70 ++--------------------------
>  2 files changed, 7 insertions(+), 67 deletions(-)
> 
> diff --git a/sysdeps/unix/sysv/linux/x86/Makefile b/sysdeps/unix/sysv/linux/x86/Makefile
> index 7dc4e61756..02ca36c6d2 100644
> --- a/sysdeps/unix/sysv/linux/x86/Makefile
> +++ b/sysdeps/unix/sysv/linux/x86/Makefile
> @@ -14,6 +14,10 @@ endif
>  ifeq ($(subdir),nptl)
>  libpthread-sysdep_routines += elision-lock elision-unlock elision-timed \
>  			      elision-trylock
> +CFLAGS-elision-lock.c += -mrtm
> +CFLAGS-elision-unlock.c += -mrtm
> +CFLAGS-elision-timed.c += -mrtm
> +CFLAGS-elision-trylock.c += -mrtm
>  endif
>  
>  ifeq ($(subdir),elf)
> diff --git a/sysdeps/unix/sysv/linux/x86/hle.h b/sysdeps/unix/sysv/linux/x86/hle.h
> index 4a7b9e3bf7..0449026839 100644
> --- a/sysdeps/unix/sysv/linux/x86/hle.h
> +++ b/sysdeps/unix/sysv/linux/x86/hle.h
> @@ -1,75 +1,11 @@
> -/* Shared RTM header.  Emulate TSX intrinsics for compilers and assemblers
> -   that do not support the intrinsics and instructions yet.  */
> +/* Shared RTM header.  */
>  #ifndef _HLE_H
>  #define _HLE_H 1
>  
> -#ifdef __ASSEMBLER__
> +#include <x86intrin.h>

Is it used in any configuration in assembly code?
H.J. Lu - Oct. 2, 2018, 1:49 p.m.
On Tue, Oct 2, 2018 at 6:06 AM Adhemerval Zanella
<adhemerval.zanella@linaro.org> wrote:
>
>
>
> On 01/10/2018 19:08, H.J. Lu wrote:
> > Since RTM intrinsics are supported in GCC 4.9, we can use them in
> > pthread mutex lock elision.
> >
> >       * sysdeps/unix/sysv/linux/x86/Makefile (CFLAGS-elision-lock.c):
> >       Add -mrtm.
> >       (CFLAGS-elision-unlock.c): Likewise.
> >       (CFLAGS-elision-timed.c): Likewise.
> >       (CFLAGS-elision-trylock.c): Likewise.
> >       * sysdeps/unix/sysv/linux/x86/hle.h: Rewritten.
>
> LGTM, thanks.
>
> > ---
> >  sysdeps/unix/sysv/linux/x86/Makefile |  4 ++
> >  sysdeps/unix/sysv/linux/x86/hle.h    | 70 ++--------------------------
> >  2 files changed, 7 insertions(+), 67 deletions(-)
> >
> > diff --git a/sysdeps/unix/sysv/linux/x86/Makefile b/sysdeps/unix/sysv/linux/x86/Makefile
> > index 7dc4e61756..02ca36c6d2 100644
> > --- a/sysdeps/unix/sysv/linux/x86/Makefile
> > +++ b/sysdeps/unix/sysv/linux/x86/Makefile
> > @@ -14,6 +14,10 @@ endif
> >  ifeq ($(subdir),nptl)
> >  libpthread-sysdep_routines += elision-lock elision-unlock elision-timed \
> >                             elision-trylock
> > +CFLAGS-elision-lock.c += -mrtm
> > +CFLAGS-elision-unlock.c += -mrtm
> > +CFLAGS-elision-timed.c += -mrtm
> > +CFLAGS-elision-trylock.c += -mrtm
> >  endif
> >
> >  ifeq ($(subdir),elf)
> > diff --git a/sysdeps/unix/sysv/linux/x86/hle.h b/sysdeps/unix/sysv/linux/x86/hle.h
> > index 4a7b9e3bf7..0449026839 100644
> > --- a/sysdeps/unix/sysv/linux/x86/hle.h
> > +++ b/sysdeps/unix/sysv/linux/x86/hle.h
> > @@ -1,75 +1,11 @@
> > -/* Shared RTM header.  Emulate TSX intrinsics for compilers and assemblers
> > -   that do not support the intrinsics and instructions yet.  */
> > +/* Shared RTM header.  */
> >  #ifndef _HLE_H
> >  #define _HLE_H 1
> >
> > -#ifdef __ASSEMBLER__
> > +#include <x86intrin.h>
>
> Is it used in any configuration in assembly code?

No:

sysdeps/unix/sysv/linux/x86/elision-lock.c:#include "hle.h"
sysdeps/unix/sysv/linux/x86/elision-trylock.c:#include "hle.h"
sysdeps/unix/sysv/linux/x86/elision-unlock.c:#include "hle.h"
sysdeps/x86/elide.h:#include <hle.h>

BTW, elide.h isn't used anywhere.
Adhemerval Zanella Netto - Oct. 2, 2018, 5:27 p.m.
On 02/10/2018 10:49, H.J. Lu wrote:
> On Tue, Oct 2, 2018 at 6:06 AM Adhemerval Zanella
> <adhemerval.zanella@linaro.org> wrote:
>>
>>
>>
>> On 01/10/2018 19:08, H.J. Lu wrote:
>>> Since RTM intrinsics are supported in GCC 4.9, we can use them in
>>> pthread mutex lock elision.
>>>
>>>       * sysdeps/unix/sysv/linux/x86/Makefile (CFLAGS-elision-lock.c):
>>>       Add -mrtm.
>>>       (CFLAGS-elision-unlock.c): Likewise.
>>>       (CFLAGS-elision-timed.c): Likewise.
>>>       (CFLAGS-elision-trylock.c): Likewise.
>>>       * sysdeps/unix/sysv/linux/x86/hle.h: Rewritten.
>>
>> LGTM, thanks.
>>
>>> ---
>>>  sysdeps/unix/sysv/linux/x86/Makefile |  4 ++
>>>  sysdeps/unix/sysv/linux/x86/hle.h    | 70 ++--------------------------
>>>  2 files changed, 7 insertions(+), 67 deletions(-)
>>>
>>> diff --git a/sysdeps/unix/sysv/linux/x86/Makefile b/sysdeps/unix/sysv/linux/x86/Makefile
>>> index 7dc4e61756..02ca36c6d2 100644
>>> --- a/sysdeps/unix/sysv/linux/x86/Makefile
>>> +++ b/sysdeps/unix/sysv/linux/x86/Makefile
>>> @@ -14,6 +14,10 @@ endif
>>>  ifeq ($(subdir),nptl)
>>>  libpthread-sysdep_routines += elision-lock elision-unlock elision-timed \
>>>                             elision-trylock
>>> +CFLAGS-elision-lock.c += -mrtm
>>> +CFLAGS-elision-unlock.c += -mrtm
>>> +CFLAGS-elision-timed.c += -mrtm
>>> +CFLAGS-elision-trylock.c += -mrtm
>>>  endif
>>>
>>>  ifeq ($(subdir),elf)
>>> diff --git a/sysdeps/unix/sysv/linux/x86/hle.h b/sysdeps/unix/sysv/linux/x86/hle.h
>>> index 4a7b9e3bf7..0449026839 100644
>>> --- a/sysdeps/unix/sysv/linux/x86/hle.h
>>> +++ b/sysdeps/unix/sysv/linux/x86/hle.h
>>> @@ -1,75 +1,11 @@
>>> -/* Shared RTM header.  Emulate TSX intrinsics for compilers and assemblers
>>> -   that do not support the intrinsics and instructions yet.  */
>>> +/* Shared RTM header.  */
>>>  #ifndef _HLE_H
>>>  #define _HLE_H 1
>>>
>>> -#ifdef __ASSEMBLER__
>>> +#include <x86intrin.h>
>>
>> Is it used in any configuration in assembly code?
> 
> No:
> 
> sysdeps/unix/sysv/linux/x86/elision-lock.c:#include "hle.h"
> sysdeps/unix/sysv/linux/x86/elision-trylock.c:#include "hle.h"
> sysdeps/unix/sysv/linux/x86/elision-unlock.c:#include "hle.h"
> sysdeps/x86/elide.h:#include <hle.h>
> 
> BTW, elide.h isn't used anywhere.
> 

It was used by HTM lock elision on pthread_rwlock_* before new implementation
(cc25c8b4c1196a8c29e9a45b1e096b99a87b7f8c).  Andrew Senkevich has sent a 
patch [1] to re-enable HTM rwlock elision, but his own performance results
seem mixed.

I think we can safely remove all elide.h files from now.

[1] https://sourceware.org/ml/libc-alpha/2017-04/msg00067.html

Patch

diff --git a/sysdeps/unix/sysv/linux/x86/Makefile b/sysdeps/unix/sysv/linux/x86/Makefile
index 7dc4e61756..02ca36c6d2 100644
--- a/sysdeps/unix/sysv/linux/x86/Makefile
+++ b/sysdeps/unix/sysv/linux/x86/Makefile
@@ -14,6 +14,10 @@  endif
 ifeq ($(subdir),nptl)
 libpthread-sysdep_routines += elision-lock elision-unlock elision-timed \
 			      elision-trylock
+CFLAGS-elision-lock.c += -mrtm
+CFLAGS-elision-unlock.c += -mrtm
+CFLAGS-elision-timed.c += -mrtm
+CFLAGS-elision-trylock.c += -mrtm
 endif
 
 ifeq ($(subdir),elf)
diff --git a/sysdeps/unix/sysv/linux/x86/hle.h b/sysdeps/unix/sysv/linux/x86/hle.h
index 4a7b9e3bf7..0449026839 100644
--- a/sysdeps/unix/sysv/linux/x86/hle.h
+++ b/sysdeps/unix/sysv/linux/x86/hle.h
@@ -1,75 +1,11 @@ 
-/* Shared RTM header.  Emulate TSX intrinsics for compilers and assemblers
-   that do not support the intrinsics and instructions yet.  */
+/* Shared RTM header.  */
 #ifndef _HLE_H
 #define _HLE_H 1
 
-#ifdef __ASSEMBLER__
+#include <x86intrin.h>
 
-.macro XBEGIN target
-	.byte 0xc7,0xf8
-	.long \target-1f
-1:
-.endm
-
-.macro XEND
-	.byte 0x0f,0x01,0xd5
-.endm
-
-.macro XABORT code
-	.byte 0xc6,0xf8,\code
-.endm
-
-.macro XTEST
-	 .byte 0x0f,0x01,0xd6
-.endm
-
-#endif
-
-/* Official RTM intrinsics interface matching gcc/icc, but works
-   on older gcc compatible compilers and binutils.
-   We should somehow detect if the compiler supports it, because
-   it may be able to generate slightly better code.  */
-
-#define _XBEGIN_STARTED		(~0u)
-#define _XABORT_EXPLICIT	(1 << 0)
-#define _XABORT_RETRY		(1 << 1)
-#define _XABORT_CONFLICT	(1 << 2)
-#define _XABORT_CAPACITY	(1 << 3)
-#define _XABORT_DEBUG		(1 << 4)
-#define _XABORT_NESTED		(1 << 5)
-#define _XABORT_CODE(x)		(((x) >> 24) & 0xff)
-
-#define _ABORT_LOCK_BUSY 	0xff
+#define _ABORT_LOCK_BUSY	0xff
 #define _ABORT_LOCK_IS_LOCKED	0xfe
 #define _ABORT_NESTED_TRYLOCK	0xfd
 
-#ifndef __ASSEMBLER__
-
-#define __force_inline __attribute__((__always_inline__)) inline
-
-static __force_inline int _xbegin(void)
-{
-  int ret = _XBEGIN_STARTED;
-  asm volatile (".byte 0xc7,0xf8 ; .long 0" : "+a" (ret) :: "memory");
-  return ret;
-}
-
-static __force_inline void _xend(void)
-{
-  asm volatile (".byte 0x0f,0x01,0xd5" ::: "memory");
-}
-
-static __force_inline void _xabort(const unsigned int status)
-{
-  asm volatile (".byte 0xc6,0xf8,%P0" :: "i" (status) : "memory");
-}
-
-static __force_inline int _xtest(void)
-{
-  unsigned char out;
-  asm volatile (".byte 0x0f,0x01,0xd6 ; setnz %0" : "=r" (out) :: "memory");
-  return out;
-}
-
-#endif
 #endif