powerpc: Enforce compiler barriers on hardware transactions

Message ID 1451312683-4503-1-git-send-email-tuliom@linux.vnet.ibm.com
State Committed
Delegated to: Adhemerval Zanella Netto
Headers

Commit Message

Tulio Magno Quites Machado Filho Dec. 28, 2015, 2:24 p.m. UTC
  Following my previous removal of compiler barriers from this patch:
https://sourceware.org/ml/libc-alpha/2015-08/msg00858.html

8<------

Work around a GCC behavior with hardware transactional memory built-ins.
GCC doesn't treat the PowerPC transactional built-ins as compiler
barriers, moving instructions past the transaction boundaries and
altering their atomicity.

2015-12-28  Tulio Magno Quites Machado Filho  <tuliom@linux.vnet.ibm.com>

	* sysdeps/unix/sysv/linux/powerpc/htm.h (__libc_tbegin,
	__libc_tabort, __libc_tend): New wrappers that enforce compiler
	barriers to their respective compiler built-ins.
	* sysdeps/powerpc/nptl/elide.h (__get_new_count, ELIDE_LOCK,
	ELIDE_TRYLOCK, __elide_unlock): Use the new wrappers.
	* sysdeps/powerpc/sysdep.h: Likewise.
	* sysdeps/unix/sysv/linux/powerpc/elision-lock.c: Likewise.
	* sysdeps/unix/sysv/linux/powerpc/elision-trylock.c: Likewise.
	* sysdeps/unix/sysv/linux/powerpc/elision-unlock.c: Likewise.
---
P.S.: we're aware that GCC is not generating optimal code with the barriers
inserted by this patch.  However, I believe correctness is more important
than performance.  In any case, these barriers will only be used when
compiling with GCC earlier releases of GCC 4.9 and 5.0.  Current releases of
GCC 4.9 and 5.0 already have the patch to treat HTM builtins as barriers.

 sysdeps/powerpc/nptl/elide.h                      |  8 ++---
 sysdeps/powerpc/sysdep.h                          |  2 +-
 sysdeps/unix/sysv/linux/powerpc/elision-lock.c    |  4 +--
 sysdeps/unix/sysv/linux/powerpc/elision-trylock.c |  6 ++--
 sysdeps/unix/sysv/linux/powerpc/elision-unlock.c  |  2 +-
 sysdeps/unix/sysv/linux/powerpc/htm.h             | 39 ++++++++++++++++++++---
 6 files changed, 46 insertions(+), 15 deletions(-)
  

Comments

Tulio Magno Quites Machado Filho Jan. 5, 2016, 7:49 p.m. UTC | #1
Ping?

"Tulio Magno Quites Machado Filho" <tuliom@linux.vnet.ibm.com> writes:

> Following my previous removal of compiler barriers from this patch:
> https://sourceware.org/ml/libc-alpha/2015-08/msg00858.html
>
> 8<------
>
> Work around a GCC behavior with hardware transactional memory built-ins.
> GCC doesn't treat the PowerPC transactional built-ins as compiler
> barriers, moving instructions past the transaction boundaries and
> altering their atomicity.
>
> 2015-12-28  Tulio Magno Quites Machado Filho  <tuliom@linux.vnet.ibm.com>
>
> 	* sysdeps/unix/sysv/linux/powerpc/htm.h (__libc_tbegin,
> 	__libc_tabort, __libc_tend): New wrappers that enforce compiler
> 	barriers to their respective compiler built-ins.
> 	* sysdeps/powerpc/nptl/elide.h (__get_new_count, ELIDE_LOCK,
> 	ELIDE_TRYLOCK, __elide_unlock): Use the new wrappers.
> 	* sysdeps/powerpc/sysdep.h: Likewise.
> 	* sysdeps/unix/sysv/linux/powerpc/elision-lock.c: Likewise.
> 	* sysdeps/unix/sysv/linux/powerpc/elision-trylock.c: Likewise.
> 	* sysdeps/unix/sysv/linux/powerpc/elision-unlock.c: Likewise.
> ---
> P.S.: we're aware that GCC is not generating optimal code with the barriers
> inserted by this patch.  However, I believe correctness is more important
> than performance.  In any case, these barriers will only be used when
> compiling with GCC earlier releases of GCC 4.9 and 5.0.  Current releases of
> GCC 4.9 and 5.0 already have the patch to treat HTM builtins as barriers.
>
>  sysdeps/powerpc/nptl/elide.h                      |  8 ++---
>  sysdeps/powerpc/sysdep.h                          |  2 +-
>  sysdeps/unix/sysv/linux/powerpc/elision-lock.c    |  4 +--
>  sysdeps/unix/sysv/linux/powerpc/elision-trylock.c |  6 ++--
>  sysdeps/unix/sysv/linux/powerpc/elision-unlock.c  |  2 +-
>  sysdeps/unix/sysv/linux/powerpc/htm.h             | 39 ++++++++++++++++++++---
>  6 files changed, 46 insertions(+), 15 deletions(-)
>
> diff --git a/sysdeps/powerpc/nptl/elide.h b/sysdeps/powerpc/nptl/elide.h
> index 2e1e443..02f8f3b 100644
> --- a/sysdeps/powerpc/nptl/elide.h
> +++ b/sysdeps/powerpc/nptl/elide.h
> @@ -68,14 +68,14 @@ __get_new_count (uint8_t *adapt_count, int attempt)
>      else								\
>        for (int i = __elision_aconf.try_tbegin; i > 0; i--)		\
>  	{								\
> -	  if (__builtin_tbegin (0))					\
> +	  if (__libc_tbegin (0))					\
>  	    {								\
>  	      if (is_lock_free)						\
>  		{							\
>  		  ret = 1;						\
>  		  break;						\
>  		}							\
> -	      __builtin_tabort (_ABORT_LOCK_BUSY);			\
> +	      __libc_tabort (_ABORT_LOCK_BUSY);				\
>  	    }								\
>  	  else								\
>  	    if (!__get_new_count (&adapt_count,i))			\
> @@ -90,7 +90,7 @@ __get_new_count (uint8_t *adapt_count, int attempt)
>      if (__elision_aconf.try_tbegin > 0)				\
>        {								\
>  	if (write)						\
> -	  __builtin_tabort (_ABORT_NESTED_TRYLOCK);		\
> +	  __libc_tabort (_ABORT_NESTED_TRYLOCK);		\
>  	ret = ELIDE_LOCK (adapt_count, is_lock_free);		\
>        }								\
>      ret;							\
> @@ -102,7 +102,7 @@ __elide_unlock (int is_lock_free)
>  {
>    if (is_lock_free)
>      {
> -      __builtin_tend (0);
> +      __libc_tend (0);
>        return true;
>      }
>    return false;
> diff --git a/sysdeps/powerpc/sysdep.h b/sysdeps/powerpc/sysdep.h
> index e32168e..f424fe4 100644
> --- a/sysdeps/powerpc/sysdep.h
> +++ b/sysdeps/powerpc/sysdep.h
> @@ -180,7 +180,7 @@
>  # define ABORT_TRANSACTION \
>    ({ 						\
>      if (THREAD_GET_TM_CAPABLE ())		\
> -      __builtin_tabort (_ABORT_SYSCALL);	\
> +      __libc_tabort (_ABORT_SYSCALL);	\
>    })
>  #else
>  # define ABORT_TRANSACTION
> diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-lock.c b/sysdeps/unix/sysv/linux/powerpc/elision-lock.c
> index 4775fca..2a0e540 100644
> --- a/sysdeps/unix/sysv/linux/powerpc/elision-lock.c
> +++ b/sysdeps/unix/sysv/linux/powerpc/elision-lock.c
> @@ -52,12 +52,12 @@ __lll_lock_elision (int *lock, short *adapt_count, EXTRAARG int pshared)
>
>    for (int i = aconf.try_tbegin; i > 0; i--)
>      {
> -      if (__builtin_tbegin (0))
> +      if (__libc_tbegin (0))
>  	{
>  	  if (*lock == 0)
>  	    return 0;
>  	  /* Lock was busy.  Fall back to normal locking.  */
> -	  __builtin_tabort (_ABORT_LOCK_BUSY);
> +	  __libc_tabort (_ABORT_LOCK_BUSY);
>  	}
>        else
>  	{
> diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c b/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c
> index 6f61eba..b391116 100644
> --- a/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c
> +++ b/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c
> @@ -31,7 +31,7 @@ int
>  __lll_trylock_elision (int *futex, short *adapt_count)
>  {
>    /* Implement POSIX semantics by forbiding nesting elided trylocks.  */
> -  __builtin_tabort (_ABORT_NESTED_TRYLOCK);
> +  __libc_tabort (_ABORT_NESTED_TRYLOCK);
>
>    /* Only try a transaction if it's worth it.  */
>    if (*adapt_count > 0)
> @@ -39,14 +39,14 @@ __lll_trylock_elision (int *futex, short *adapt_count)
>        goto use_lock;
>      }
>
> -  if (__builtin_tbegin (0))
> +  if (__libc_tbegin (0))
>      {
>        if (*futex == 0)
>  	return 0;
>
>        /* Lock was busy.  This is never a nested transaction.
>           End it, and set the adapt count.  */
> -      __builtin_tend (0);
> +      __libc_tend (0);
>
>        if (aconf.skip_lock_busy > 0)
>  	*adapt_count = aconf.skip_lock_busy;
> diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c b/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c
> index 72b893d..4b4ae62 100644
> --- a/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c
> +++ b/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c
> @@ -25,7 +25,7 @@ __lll_unlock_elision (int *lock, short *adapt_count, int pshared)
>  {
>    /* When the lock was free we're in a transaction.  */
>    if (*lock == 0)
> -    __builtin_tend (0);
> +    __libc_tend (0);
>    else
>      {
>        lll_unlock ((*lock), pshared);
> diff --git a/sysdeps/unix/sysv/linux/powerpc/htm.h b/sysdeps/unix/sysv/linux/powerpc/htm.h
> index 5127b4b..6c05b0f 100644
> --- a/sysdeps/unix/sysv/linux/powerpc/htm.h
> +++ b/sysdeps/unix/sysv/linux/powerpc/htm.h
> @@ -118,13 +118,44 @@
>       __ret;				\
>    })
>
> -#define __builtin_tbegin(tdb)       _tbegin ()
> -#define __builtin_tend(nested)      _tend ()
> -#define __builtin_tabort(abortcode) _tabort (abortcode)
> -#define __builtin_get_texasru()     _texasru ()
> +#define __libc_tbegin(tdb)       _tbegin ()
> +#define __libc_tend(nested)      _tend ()
> +#define __libc_tabort(abortcode) _tabort (abortcode)
> +#define __builtin_get_texasru()  _texasru ()
>
>  #else
>  # include <htmintrin.h>
> +
> +# ifdef __TM_FENCE__
> +   /* New GCC behavior.  */
> +#  define __libc_tbegin(R)  __builtin_tbegin (R);
> +#  define __libc_tend(R)    __builtin_tend (R);
> +#  define __libc_tabort(R)  __builtin_tabort (R);
> +# else
> +   /* Workaround an old GCC behavior. Earlier releases of GCC 4.9 and 5.0,
> +      didn't use to treat __builtin_tbegin, __builtin_tend and
> +      __builtin_tabort as compiler barriers, moving instructions into and
> +      out the transaction.
> +      Remove this when glibc drops support for GCC 5.0.  */
> +#  define __libc_tbegin(R)			\
> +   ({ __asm__ volatile("" ::: "memory");	\
> +     unsigned int __ret = __builtin_tbegin (R);	\
> +     __asm__ volatile("" ::: "memory");		\
> +     __ret;					\
> +   })
> +#  define __libc_tabort(R)			\
> +  ({ __asm__ volatile("" ::: "memory");		\
> +    unsigned int __ret = __builtin_tabort (R);	\
> +    __asm__ volatile("" ::: "memory");		\
> +    __ret;					\
> +  })
> +#  define __libc_tend(R)			\
> +   ({ __asm__ volatile("" ::: "memory");	\
> +     unsigned int __ret = __builtin_tend (R);	\
> +     __asm__ volatile("" ::: "memory");		\
> +     __ret;					\
> +   })
> +# endif /* __TM_FENCE__  */
>  #endif /* __HTM__  */
>
>  #endif /* __ASSEMBLER__ */
  
Adhemerval Zanella Netto Jan. 5, 2016, 8:07 p.m. UTC | #2
LGTM.

On 05-01-2016 17:49, Tulio Magno Quites Machado Filho wrote:
> Ping?
> 
> "Tulio Magno Quites Machado Filho" <tuliom@linux.vnet.ibm.com> writes:
> 
>> Following my previous removal of compiler barriers from this patch:
>> https://sourceware.org/ml/libc-alpha/2015-08/msg00858.html
>>
>> 8<------
>>
>> Work around a GCC behavior with hardware transactional memory built-ins.
>> GCC doesn't treat the PowerPC transactional built-ins as compiler
>> barriers, moving instructions past the transaction boundaries and
>> altering their atomicity.
>>
>> 2015-12-28  Tulio Magno Quites Machado Filho  <tuliom@linux.vnet.ibm.com>
>>
>> 	* sysdeps/unix/sysv/linux/powerpc/htm.h (__libc_tbegin,
>> 	__libc_tabort, __libc_tend): New wrappers that enforce compiler
>> 	barriers to their respective compiler built-ins.
>> 	* sysdeps/powerpc/nptl/elide.h (__get_new_count, ELIDE_LOCK,
>> 	ELIDE_TRYLOCK, __elide_unlock): Use the new wrappers.
>> 	* sysdeps/powerpc/sysdep.h: Likewise.
>> 	* sysdeps/unix/sysv/linux/powerpc/elision-lock.c: Likewise.
>> 	* sysdeps/unix/sysv/linux/powerpc/elision-trylock.c: Likewise.
>> 	* sysdeps/unix/sysv/linux/powerpc/elision-unlock.c: Likewise.
>> ---
>> P.S.: we're aware that GCC is not generating optimal code with the barriers
>> inserted by this patch.  However, I believe correctness is more important
>> than performance.  In any case, these barriers will only be used when
>> compiling with GCC earlier releases of GCC 4.9 and 5.0.  Current releases of
>> GCC 4.9 and 5.0 already have the patch to treat HTM builtins as barriers.
>>
>>  sysdeps/powerpc/nptl/elide.h                      |  8 ++---
>>  sysdeps/powerpc/sysdep.h                          |  2 +-
>>  sysdeps/unix/sysv/linux/powerpc/elision-lock.c    |  4 +--
>>  sysdeps/unix/sysv/linux/powerpc/elision-trylock.c |  6 ++--
>>  sysdeps/unix/sysv/linux/powerpc/elision-unlock.c  |  2 +-
>>  sysdeps/unix/sysv/linux/powerpc/htm.h             | 39 ++++++++++++++++++++---
>>  6 files changed, 46 insertions(+), 15 deletions(-)
>>
>> diff --git a/sysdeps/powerpc/nptl/elide.h b/sysdeps/powerpc/nptl/elide.h
>> index 2e1e443..02f8f3b 100644
>> --- a/sysdeps/powerpc/nptl/elide.h
>> +++ b/sysdeps/powerpc/nptl/elide.h
>> @@ -68,14 +68,14 @@ __get_new_count (uint8_t *adapt_count, int attempt)
>>      else								\
>>        for (int i = __elision_aconf.try_tbegin; i > 0; i--)		\
>>  	{								\
>> -	  if (__builtin_tbegin (0))					\
>> +	  if (__libc_tbegin (0))					\
>>  	    {								\
>>  	      if (is_lock_free)						\
>>  		{							\
>>  		  ret = 1;						\
>>  		  break;						\
>>  		}							\
>> -	      __builtin_tabort (_ABORT_LOCK_BUSY);			\
>> +	      __libc_tabort (_ABORT_LOCK_BUSY);				\
>>  	    }								\
>>  	  else								\
>>  	    if (!__get_new_count (&adapt_count,i))			\
>> @@ -90,7 +90,7 @@ __get_new_count (uint8_t *adapt_count, int attempt)
>>      if (__elision_aconf.try_tbegin > 0)				\
>>        {								\
>>  	if (write)						\
>> -	  __builtin_tabort (_ABORT_NESTED_TRYLOCK);		\
>> +	  __libc_tabort (_ABORT_NESTED_TRYLOCK);		\
>>  	ret = ELIDE_LOCK (adapt_count, is_lock_free);		\
>>        }								\
>>      ret;							\
>> @@ -102,7 +102,7 @@ __elide_unlock (int is_lock_free)
>>  {
>>    if (is_lock_free)
>>      {
>> -      __builtin_tend (0);
>> +      __libc_tend (0);
>>        return true;
>>      }
>>    return false;
>> diff --git a/sysdeps/powerpc/sysdep.h b/sysdeps/powerpc/sysdep.h
>> index e32168e..f424fe4 100644
>> --- a/sysdeps/powerpc/sysdep.h
>> +++ b/sysdeps/powerpc/sysdep.h
>> @@ -180,7 +180,7 @@
>>  # define ABORT_TRANSACTION \
>>    ({ 						\
>>      if (THREAD_GET_TM_CAPABLE ())		\
>> -      __builtin_tabort (_ABORT_SYSCALL);	\
>> +      __libc_tabort (_ABORT_SYSCALL);	\
>>    })
>>  #else
>>  # define ABORT_TRANSACTION
>> diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-lock.c b/sysdeps/unix/sysv/linux/powerpc/elision-lock.c
>> index 4775fca..2a0e540 100644
>> --- a/sysdeps/unix/sysv/linux/powerpc/elision-lock.c
>> +++ b/sysdeps/unix/sysv/linux/powerpc/elision-lock.c
>> @@ -52,12 +52,12 @@ __lll_lock_elision (int *lock, short *adapt_count, EXTRAARG int pshared)
>>
>>    for (int i = aconf.try_tbegin; i > 0; i--)
>>      {
>> -      if (__builtin_tbegin (0))
>> +      if (__libc_tbegin (0))
>>  	{
>>  	  if (*lock == 0)
>>  	    return 0;
>>  	  /* Lock was busy.  Fall back to normal locking.  */
>> -	  __builtin_tabort (_ABORT_LOCK_BUSY);
>> +	  __libc_tabort (_ABORT_LOCK_BUSY);
>>  	}
>>        else
>>  	{
>> diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c b/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c
>> index 6f61eba..b391116 100644
>> --- a/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c
>> +++ b/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c
>> @@ -31,7 +31,7 @@ int
>>  __lll_trylock_elision (int *futex, short *adapt_count)
>>  {
>>    /* Implement POSIX semantics by forbiding nesting elided trylocks.  */
>> -  __builtin_tabort (_ABORT_NESTED_TRYLOCK);
>> +  __libc_tabort (_ABORT_NESTED_TRYLOCK);
>>
>>    /* Only try a transaction if it's worth it.  */
>>    if (*adapt_count > 0)
>> @@ -39,14 +39,14 @@ __lll_trylock_elision (int *futex, short *adapt_count)
>>        goto use_lock;
>>      }
>>
>> -  if (__builtin_tbegin (0))
>> +  if (__libc_tbegin (0))
>>      {
>>        if (*futex == 0)
>>  	return 0;
>>
>>        /* Lock was busy.  This is never a nested transaction.
>>           End it, and set the adapt count.  */
>> -      __builtin_tend (0);
>> +      __libc_tend (0);
>>
>>        if (aconf.skip_lock_busy > 0)
>>  	*adapt_count = aconf.skip_lock_busy;
>> diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c b/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c
>> index 72b893d..4b4ae62 100644
>> --- a/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c
>> +++ b/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c
>> @@ -25,7 +25,7 @@ __lll_unlock_elision (int *lock, short *adapt_count, int pshared)
>>  {
>>    /* When the lock was free we're in a transaction.  */
>>    if (*lock == 0)
>> -    __builtin_tend (0);
>> +    __libc_tend (0);
>>    else
>>      {
>>        lll_unlock ((*lock), pshared);
>> diff --git a/sysdeps/unix/sysv/linux/powerpc/htm.h b/sysdeps/unix/sysv/linux/powerpc/htm.h
>> index 5127b4b..6c05b0f 100644
>> --- a/sysdeps/unix/sysv/linux/powerpc/htm.h
>> +++ b/sysdeps/unix/sysv/linux/powerpc/htm.h
>> @@ -118,13 +118,44 @@
>>       __ret;				\
>>    })
>>
>> -#define __builtin_tbegin(tdb)       _tbegin ()
>> -#define __builtin_tend(nested)      _tend ()
>> -#define __builtin_tabort(abortcode) _tabort (abortcode)
>> -#define __builtin_get_texasru()     _texasru ()
>> +#define __libc_tbegin(tdb)       _tbegin ()
>> +#define __libc_tend(nested)      _tend ()
>> +#define __libc_tabort(abortcode) _tabort (abortcode)
>> +#define __builtin_get_texasru()  _texasru ()
>>
>>  #else
>>  # include <htmintrin.h>
>> +
>> +# ifdef __TM_FENCE__
>> +   /* New GCC behavior.  */
>> +#  define __libc_tbegin(R)  __builtin_tbegin (R);
>> +#  define __libc_tend(R)    __builtin_tend (R);
>> +#  define __libc_tabort(R)  __builtin_tabort (R);
>> +# else
>> +   /* Workaround an old GCC behavior. Earlier releases of GCC 4.9 and 5.0,
>> +      didn't use to treat __builtin_tbegin, __builtin_tend and
>> +      __builtin_tabort as compiler barriers, moving instructions into and
>> +      out the transaction.
>> +      Remove this when glibc drops support for GCC 5.0.  */
>> +#  define __libc_tbegin(R)			\
>> +   ({ __asm__ volatile("" ::: "memory");	\
>> +     unsigned int __ret = __builtin_tbegin (R);	\
>> +     __asm__ volatile("" ::: "memory");		\
>> +     __ret;					\
>> +   })
>> +#  define __libc_tabort(R)			\
>> +  ({ __asm__ volatile("" ::: "memory");		\
>> +    unsigned int __ret = __builtin_tabort (R);	\
>> +    __asm__ volatile("" ::: "memory");		\
>> +    __ret;					\
>> +  })
>> +#  define __libc_tend(R)			\
>> +   ({ __asm__ volatile("" ::: "memory");	\
>> +     unsigned int __ret = __builtin_tend (R);	\
>> +     __asm__ volatile("" ::: "memory");		\
>> +     __ret;					\
>> +   })
>> +# endif /* __TM_FENCE__  */
>>  #endif /* __HTM__  */
>>
>>  #endif /* __ASSEMBLER__ */
>
  
Carlos Eduardo Seo Jan. 8, 2016, 7:44 p.m. UTC | #3
LGTM.
  
Tulio Magno Quites Machado Filho Jan. 8, 2016, 7:56 p.m. UTC | #4
Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:

> LGTM.

Thanks!

Pushed as 42bf1c8.
  

Patch

diff --git a/sysdeps/powerpc/nptl/elide.h b/sysdeps/powerpc/nptl/elide.h
index 2e1e443..02f8f3b 100644
--- a/sysdeps/powerpc/nptl/elide.h
+++ b/sysdeps/powerpc/nptl/elide.h
@@ -68,14 +68,14 @@  __get_new_count (uint8_t *adapt_count, int attempt)
     else								\
       for (int i = __elision_aconf.try_tbegin; i > 0; i--)		\
 	{								\
-	  if (__builtin_tbegin (0))					\
+	  if (__libc_tbegin (0))					\
 	    {								\
 	      if (is_lock_free)						\
 		{							\
 		  ret = 1;						\
 		  break;						\
 		}							\
-	      __builtin_tabort (_ABORT_LOCK_BUSY);			\
+	      __libc_tabort (_ABORT_LOCK_BUSY);				\
 	    }								\
 	  else								\
 	    if (!__get_new_count (&adapt_count,i))			\
@@ -90,7 +90,7 @@  __get_new_count (uint8_t *adapt_count, int attempt)
     if (__elision_aconf.try_tbegin > 0)				\
       {								\
 	if (write)						\
-	  __builtin_tabort (_ABORT_NESTED_TRYLOCK);		\
+	  __libc_tabort (_ABORT_NESTED_TRYLOCK);		\
 	ret = ELIDE_LOCK (adapt_count, is_lock_free);		\
       }								\
     ret;							\
@@ -102,7 +102,7 @@  __elide_unlock (int is_lock_free)
 {
   if (is_lock_free)
     {
-      __builtin_tend (0);
+      __libc_tend (0);
       return true;
     }
   return false;
diff --git a/sysdeps/powerpc/sysdep.h b/sysdeps/powerpc/sysdep.h
index e32168e..f424fe4 100644
--- a/sysdeps/powerpc/sysdep.h
+++ b/sysdeps/powerpc/sysdep.h
@@ -180,7 +180,7 @@ 
 # define ABORT_TRANSACTION \
   ({ 						\
     if (THREAD_GET_TM_CAPABLE ())		\
-      __builtin_tabort (_ABORT_SYSCALL);	\
+      __libc_tabort (_ABORT_SYSCALL);	\
   })
 #else
 # define ABORT_TRANSACTION
diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-lock.c b/sysdeps/unix/sysv/linux/powerpc/elision-lock.c
index 4775fca..2a0e540 100644
--- a/sysdeps/unix/sysv/linux/powerpc/elision-lock.c
+++ b/sysdeps/unix/sysv/linux/powerpc/elision-lock.c
@@ -52,12 +52,12 @@  __lll_lock_elision (int *lock, short *adapt_count, EXTRAARG int pshared)
 
   for (int i = aconf.try_tbegin; i > 0; i--)
     {
-      if (__builtin_tbegin (0))
+      if (__libc_tbegin (0))
 	{
 	  if (*lock == 0)
 	    return 0;
 	  /* Lock was busy.  Fall back to normal locking.  */
-	  __builtin_tabort (_ABORT_LOCK_BUSY);
+	  __libc_tabort (_ABORT_LOCK_BUSY);
 	}
       else
 	{
diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c b/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c
index 6f61eba..b391116 100644
--- a/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c
+++ b/sysdeps/unix/sysv/linux/powerpc/elision-trylock.c
@@ -31,7 +31,7 @@  int
 __lll_trylock_elision (int *futex, short *adapt_count)
 {
   /* Implement POSIX semantics by forbiding nesting elided trylocks.  */
-  __builtin_tabort (_ABORT_NESTED_TRYLOCK);
+  __libc_tabort (_ABORT_NESTED_TRYLOCK);
 
   /* Only try a transaction if it's worth it.  */
   if (*adapt_count > 0)
@@ -39,14 +39,14 @@  __lll_trylock_elision (int *futex, short *adapt_count)
       goto use_lock;
     }
 
-  if (__builtin_tbegin (0))
+  if (__libc_tbegin (0))
     {
       if (*futex == 0)
 	return 0;
 
       /* Lock was busy.  This is never a nested transaction.
          End it, and set the adapt count.  */
-      __builtin_tend (0);
+      __libc_tend (0);
 
       if (aconf.skip_lock_busy > 0)
 	*adapt_count = aconf.skip_lock_busy;
diff --git a/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c b/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c
index 72b893d..4b4ae62 100644
--- a/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c
+++ b/sysdeps/unix/sysv/linux/powerpc/elision-unlock.c
@@ -25,7 +25,7 @@  __lll_unlock_elision (int *lock, short *adapt_count, int pshared)
 {
   /* When the lock was free we're in a transaction.  */
   if (*lock == 0)
-    __builtin_tend (0);
+    __libc_tend (0);
   else
     {
       lll_unlock ((*lock), pshared);
diff --git a/sysdeps/unix/sysv/linux/powerpc/htm.h b/sysdeps/unix/sysv/linux/powerpc/htm.h
index 5127b4b..6c05b0f 100644
--- a/sysdeps/unix/sysv/linux/powerpc/htm.h
+++ b/sysdeps/unix/sysv/linux/powerpc/htm.h
@@ -118,13 +118,44 @@ 
      __ret;				\
   })
 
-#define __builtin_tbegin(tdb)       _tbegin ()
-#define __builtin_tend(nested)      _tend ()
-#define __builtin_tabort(abortcode) _tabort (abortcode)
-#define __builtin_get_texasru()     _texasru ()
+#define __libc_tbegin(tdb)       _tbegin ()
+#define __libc_tend(nested)      _tend ()
+#define __libc_tabort(abortcode) _tabort (abortcode)
+#define __builtin_get_texasru()  _texasru ()
 
 #else
 # include <htmintrin.h>
+
+# ifdef __TM_FENCE__
+   /* New GCC behavior.  */
+#  define __libc_tbegin(R)  __builtin_tbegin (R);
+#  define __libc_tend(R)    __builtin_tend (R);
+#  define __libc_tabort(R)  __builtin_tabort (R);
+# else
+   /* Workaround an old GCC behavior. Earlier releases of GCC 4.9 and 5.0,
+      didn't use to treat __builtin_tbegin, __builtin_tend and
+      __builtin_tabort as compiler barriers, moving instructions into and
+      out the transaction.
+      Remove this when glibc drops support for GCC 5.0.  */
+#  define __libc_tbegin(R)			\
+   ({ __asm__ volatile("" ::: "memory");	\
+     unsigned int __ret = __builtin_tbegin (R);	\
+     __asm__ volatile("" ::: "memory");		\
+     __ret;					\
+   })
+#  define __libc_tabort(R)			\
+  ({ __asm__ volatile("" ::: "memory");		\
+    unsigned int __ret = __builtin_tabort (R);	\
+    __asm__ volatile("" ::: "memory");		\
+    __ret;					\
+  })
+#  define __libc_tend(R)			\
+   ({ __asm__ volatile("" ::: "memory");	\
+     unsigned int __ret = __builtin_tend (R);	\
+     __asm__ volatile("" ::: "memory");		\
+     __ret;					\
+   })
+# endif /* __TM_FENCE__  */
 #endif /* __HTM__  */
 
 #endif /* __ASSEMBLER__ */