[RFC] pthread_once: Use unified variant instead of custom x86_64/i386

Message ID 1413822987.8483.41.camel@triegel.csb
State Committed
Headers

Commit Message

Torvald Riegel Oct. 20, 2014, 4:36 p.m. UTC
  On Sun, 2014-10-19 at 18:37 -0700, Roland McGrath wrote:
> > +static int
> > +__attribute__((noinline))
> 
> Space before paren.
> 
> > +__pthread_once_slow (pthread_once_t *once_control, void (*init_routine) (void))
> 
> This needs a comment explaining why it's separate.  I would have expected
> the compiler to generate essentially the same code for the early bailout
> case marked with __glibc_likely.
> 
> > +  atomic_read_barrier();
> [...]
> > +    return __pthread_once_slow(once_control, init_routine);
> 
> Space before paren.

Updated patch attached.  OK to commit?
  

Comments

Roland McGrath Oct. 20, 2014, 6:18 p.m. UTC | #1
> +    return __pthread_once_slow 23(once_control, init_routine);

All looks good except this stray "23".  Its presence suggests you didn't
test the final version of the patch you posted, which you need to do before
committing.
  
Torvald Riegel Oct. 20, 2014, 7:35 p.m. UTC | #2
On Mon, 2014-10-20 at 11:18 -0700, Roland McGrath wrote:
> > +    return __pthread_once_slow 23(once_control, init_routine);
> 
> All looks good except this stray "23".  Its presence suggests you didn't
> test the final version of the patch you posted, which you need to do before
> committing.

Oops, sorry.  Yes, I didn't test after the whitespace fixes (but
before).  I'll make a note to self...

Committed after testing on x86_64.
  

Patch

commit de85f9aef63601448fd4ec2dfdf2099fc2c381d8
Author: Torvald Riegel <triegel@redhat.com>
Date:   Sun Oct 19 21:59:26 2014 +0200

    pthread_once: Add fast path and remove x86 variants.

diff --git a/nptl/pthread_once.c b/nptl/pthread_once.c
index 595bd7e..8c00421 100644
--- a/nptl/pthread_once.c
+++ b/nptl/pthread_once.c
@@ -58,11 +58,13 @@  clear_once_control (void *arg)
    initialization is interrupted, we then fork 2^30 times (30 bits of
    once_control are used for the fork generation), and try to initialize
    again, we can deadlock because we can't distinguish the in-progress and
-   interrupted cases anymore.  */
-int
-__pthread_once (once_control, init_routine)
-     pthread_once_t *once_control;
-     void (*init_routine) (void);
+   interrupted cases anymore.
+   XXX: We split out this slow path because current compilers do not generate
+   as efficient code when the fast path in __pthread_once below is not in a
+   separate function.  */
+static int
+__attribute__ ((noinline))
+__pthread_once_slow (pthread_once_t *once_control, void (*init_routine) (void))
 {
   while (1)
     {
@@ -72,7 +74,7 @@  __pthread_once (once_control, init_routine)
          signals that initialization has finished, we need to see any
          data modifications done during initialization.  */
       val = *once_control;
-      atomic_read_barrier();
+      atomic_read_barrier ();
       do
 	{
 	  /* Check if the initialization has already been done.  */
@@ -120,7 +122,7 @@  __pthread_once (once_control, init_routine)
       /* Mark *once_control as having finished the initialization.  We need
          release memory order here because we need to synchronize with other
          threads that want to use the initialized data.  */
-      atomic_write_barrier();
+      atomic_write_barrier ();
       *once_control = __PTHREAD_ONCE_DONE;
 
       /* Wake up all other threads.  */
@@ -130,5 +132,18 @@  __pthread_once (once_control, init_routine)
 
   return 0;
 }
+
+int
+__pthread_once (pthread_once_t *once_control, void (*init_routine) (void))
+{
+  /* Fast path.  See __pthread_once_slow.  */
+  int val;
+  val = *once_control;
+  atomic_read_barrier ();
+  if (__glibc_likely ((val & __PTHREAD_ONCE_DONE) != 0))
+    return 0;
+  else
+    return __pthread_once_slow 23(once_control, init_routine);
+}
 weak_alias (__pthread_once, pthread_once)
 hidden_def (__pthread_once)
diff --git a/sysdeps/unix/sysv/linux/i386/pthread_once.S b/sysdeps/unix/sysv/linux/i386/pthread_once.S
deleted file mode 100644
index dacd724..0000000
--- a/sysdeps/unix/sysv/linux/i386/pthread_once.S
+++ /dev/null
@@ -1,178 +0,0 @@ 
-/* Copyright (C) 2002-2014 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <unwindbuf.h>
-#include <sysdep.h>
-#include <kernel-features.h>
-#include <lowlevellock.h>
-
-
-	.comm	__fork_generation, 4, 4
-
-	.text
-
-
-	.globl	__pthread_once
-	.type	__pthread_once,@function
-	.align	16
-	cfi_startproc
-__pthread_once:
-	movl	4(%esp), %ecx
-	testl	$2, (%ecx)
-	jz	1f
-	xorl	%eax, %eax
-	ret
-
-1:	pushl	%ebx
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (3, 0)
-	pushl	%esi
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (6, 0)
-	movl	%ecx, %ebx
-	xorl	%esi, %esi
-
-	/* Not yet initialized or initialization in progress.
-	   Get the fork generation counter now.  */
-6:	movl	(%ebx), %eax
-#ifdef PIC
-	LOAD_PIC_REG(cx)
-#endif
-
-5:	movl	%eax, %edx
-
-	testl	$2, %eax
-	jnz	4f
-
-	andl	$3, %edx
-#ifdef PIC
-	orl	__fork_generation@GOTOFF(%ecx), %edx
-#else
-	orl	__fork_generation, %edx
-#endif
-	orl	$1, %edx
-
-	LOCK
-	cmpxchgl %edx, (%ebx)
-	jnz	5b
-
-	/* Check whether another thread already runs the initializer.  */
-	testl	$1, %eax
-	jz	3f	/* No -> do it.  */
-
-	/* Check whether the initializer execution was interrupted
-	   by a fork.  */
-	xorl	%edx, %eax
-	testl	$0xfffffffc, %eax
-	jnz	3f	/* Different for generation -> run initializer.  */
-
-	/* Somebody else got here first.  Wait.  */
-#ifdef __ASSUME_PRIVATE_FUTEX
-	movl	$FUTEX_WAIT|FUTEX_PRIVATE_FLAG, %ecx
-#else
-# if FUTEX_WAIT == 0
-	movl	%gs:PRIVATE_FUTEX, %ecx
-# else
-	movl	$FUTEX_WAIT, %ecx
-	orl	%gs:PRIVATE_FUTEX, %ecx
-# endif
-#endif
-	movl	$SYS_futex, %eax
-	ENTER_KERNEL
-	jmp	6b
-
-3:	/* Call the initializer function after setting up the
-	   cancellation handler.  Note that it is not possible here
-	   to use the unwind-based cleanup handling.  This would require
-	   that the user-provided function and all the code it calls
-	   is compiled with exceptions.  Unfortunately this cannot be
-	   guaranteed.  */
-	subl	$UNWINDBUFSIZE+8, %esp
-	cfi_adjust_cfa_offset (UNWINDBUFSIZE+8)
-	movl	%ecx, %ebx		/* PIC register value.  */
-
-	leal	8+UWJMPBUF(%esp), %eax
-	movl	$0, 4(%esp)
-	movl	%eax, (%esp)
-	call	__sigsetjmp@PLT
-	testl	%eax, %eax
-	jne	7f
-
-	leal	8(%esp), %eax
-	call	HIDDEN_JUMPTARGET(__pthread_register_cancel)
-
-	/* Call the user-provided initialization function.  */
-	call	*24+UNWINDBUFSIZE(%esp)
-
-	/* Pop the cleanup handler.  */
-	leal	8(%esp), %eax
-	call	HIDDEN_JUMPTARGET(__pthread_unregister_cancel)
-	addl	$UNWINDBUFSIZE+8, %esp
-	cfi_adjust_cfa_offset (-UNWINDBUFSIZE-8)
-
-	/* Sucessful run of the initializer.  Signal that we are done.  */
-	movl	12(%esp), %ebx
-	LOCK
-	addl	$1, (%ebx)
-
-	/* Wake up all other threads.  */
-	movl	$0x7fffffff, %edx
-#ifdef __ASSUME_PRIVATE_FUTEX
-	movl	$FUTEX_WAKE|FUTEX_PRIVATE_FLAG, %ecx
-#else
-	movl	$FUTEX_WAKE, %ecx
-	orl	%gs:PRIVATE_FUTEX, %ecx
-#endif
-	movl	$SYS_futex, %eax
-	ENTER_KERNEL
-
-4:	popl	%esi
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (6)
-	popl	%ebx
-	cfi_adjust_cfa_offset (-4)
-	cfi_restore (3)
-	xorl	%eax, %eax
-	ret
-
-7:	/* __sigsetjmp returned for the second time.  */
-	movl	20+UNWINDBUFSIZE(%esp), %ebx
-	cfi_adjust_cfa_offset (UNWINDBUFSIZE+16)
-	cfi_offset (3, -8)
-	cfi_offset (6, -12)
-	movl	$0, (%ebx)
-
-	movl	$0x7fffffff, %edx
-#ifdef __ASSUME_PRIVATE_FUTEX
-	movl	$FUTEX_WAKE|FUTEX_PRIVATE_FLAG, %ecx
-#else
-	movl	$FUTEX_WAKE, %ecx
-	orl	%gs:PRIVATE_FUTEX, %ecx
-#endif
-	movl	$SYS_futex, %eax
-	ENTER_KERNEL
-
-	leal	8(%esp), %eax
-	call	HIDDEN_JUMPTARGET (__pthread_unwind_next)
-	/* NOTREACHED */
-	hlt
-	cfi_endproc
-	.size	__pthread_once,.-__pthread_once
-
-hidden_def (__pthread_once)
-strong_alias (__pthread_once, pthread_once)
diff --git a/sysdeps/unix/sysv/linux/x86_64/pthread_once.S b/sysdeps/unix/sysv/linux/x86_64/pthread_once.S
deleted file mode 100644
index 2cbe2fa..0000000
--- a/sysdeps/unix/sysv/linux/x86_64/pthread_once.S
+++ /dev/null
@@ -1,193 +0,0 @@ 
-/* Copyright (C) 2002-2014 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <kernel-features.h>
-#include <tcb-offsets.h>
-#include <lowlevellock.h>
-
-
-	.comm	__fork_generation, 4, 4
-
-	.text
-
-
-	.globl	__pthread_once
-	.type	__pthread_once,@function
-	.align	16
-__pthread_once:
-.LSTARTCODE:
-	cfi_startproc
-#ifdef SHARED
-	cfi_personality(DW_EH_PE_pcrel | DW_EH_PE_sdata4 | DW_EH_PE_indirect,
-			DW.ref.__gcc_personality_v0)
-	cfi_lsda(DW_EH_PE_pcrel | DW_EH_PE_sdata4, .LexceptSTART)
-#else
-	cfi_personality(DW_EH_PE_udata4, __gcc_personality_v0)
-	cfi_lsda(DW_EH_PE_udata4, .LexceptSTART)
-#endif
-	testl	$2, (%rdi)
-	jz	1f
-	xorl	%eax, %eax
-	retq
-
-	/* Preserve the function pointer.  */
-1:	pushq	%rsi
-	cfi_adjust_cfa_offset(8)
-	xorq	%r10, %r10
-
-	/* Not yet initialized or initialization in progress.
-	   Get the fork generation counter now.  */
-6:	movl	(%rdi), %eax
-
-5:	movl	%eax, %edx
-
-	testl	$2, %eax
-	jnz	4f
-
-	andl	$3, %edx
-	orl	__fork_generation(%rip), %edx
-	orl	$1, %edx
-
-	LOCK
-	cmpxchgl %edx, (%rdi)
-	jnz	5b
-
-	/* Check whether another thread already runs the initializer.  */
-	testl	$1, %eax
-	jz	3f	/* No -> do it.  */
-
-	/* Check whether the initializer execution was interrupted
-	   by a fork.  */
-	xorl	%edx, %eax
-	testl	$0xfffffffc, %eax
-	jnz	3f	/* Different for generation -> run initializer.  */
-
-	/* Somebody else got here first.  Wait.  */
-#ifdef __ASSUME_PRIVATE_FUTEX
-	movl	$FUTEX_WAIT|FUTEX_PRIVATE_FLAG, %esi
-#else
-# if FUTEX_WAIT == 0
-	movl	%fs:PRIVATE_FUTEX, %esi
-# else
-	movl	$FUTEX_WAIT, %esi
-	orl	%fs:PRIVATE_FUTEX, %esi
-# endif
-#endif
-	movl	$SYS_futex, %eax
-	syscall
-	jmp	6b
-
-	/* Preserve the pointer to the control variable.  */
-3:	pushq	%rdi
-	cfi_adjust_cfa_offset(8)
-	pushq	%rdi
-	cfi_adjust_cfa_offset(8)
-
-.LcleanupSTART:
-	callq	*16(%rsp)
-.LcleanupEND:
-
-	/* Get the control variable address back.  */
-	popq	%rdi
-	cfi_adjust_cfa_offset(-8)
-
-	/* Sucessful run of the initializer.  Signal that we are done.  */
-	LOCK
-	incl	(%rdi)
-
-	addq	$8, %rsp
-	cfi_adjust_cfa_offset(-8)
-
-	/* Wake up all other threads.  */
-	movl	$0x7fffffff, %edx
-#ifdef __ASSUME_PRIVATE_FUTEX
-	movl	$FUTEX_WAKE|FUTEX_PRIVATE_FLAG, %esi
-#else
-	movl	$FUTEX_WAKE, %esi
-	orl	%fs:PRIVATE_FUTEX, %esi
-#endif
-	movl	$SYS_futex, %eax
-	syscall
-
-4:	addq	$8, %rsp
-	cfi_adjust_cfa_offset(-8)
-	xorl	%eax, %eax
-	retq
-	.size	__pthread_once,.-__pthread_once
-
-
-hidden_def (__pthread_once)
-strong_alias (__pthread_once, pthread_once)
-
-
-	.type	clear_once_control,@function
-	.align	16
-clear_once_control:
-	cfi_adjust_cfa_offset(3 * 8)
-	movq	(%rsp), %rdi
-	movq	%rax, %r8
-	movl	$0, (%rdi)
-
-	movl	$0x7fffffff, %edx
-#ifdef __ASSUME_PRIVATE_FUTEX
-	movl	$FUTEX_WAKE|FUTEX_PRIVATE_FLAG, %esi
-#else
-	movl	$FUTEX_WAKE, %esi
-	orl	%fs:PRIVATE_FUTEX, %esi
-#endif
-	movl	$SYS_futex, %eax
-	syscall
-
-	movq	%r8, %rdi
-.LcallUR:
-	call	_Unwind_Resume@PLT
-	hlt
-.LENDCODE:
-	cfi_endproc
-	.size	clear_once_control,.-clear_once_control
-
-
-	.section .gcc_except_table,"a",@progbits
-.LexceptSTART:
-	.byte	DW_EH_PE_omit			# @LPStart format
-	.byte	DW_EH_PE_omit			# @TType format
-	.byte	DW_EH_PE_uleb128		# call-site format
-	.uleb128 .Lcstend-.Lcstbegin
-.Lcstbegin:
-	.uleb128 .LcleanupSTART-.LSTARTCODE
-	.uleb128 .LcleanupEND-.LcleanupSTART
-	.uleb128 clear_once_control-.LSTARTCODE
-	.uleb128  0
-	.uleb128 .LcallUR-.LSTARTCODE
-	.uleb128 .LENDCODE-.LcallUR
-	.uleb128 0
-	.uleb128  0
-.Lcstend:
-
-
-#ifdef SHARED
-	.hidden	DW.ref.__gcc_personality_v0
-	.weak	DW.ref.__gcc_personality_v0
-	.section .gnu.linkonce.d.DW.ref.__gcc_personality_v0,"aw",@progbits
-	.align	LP_SIZE
-	.type	DW.ref.__gcc_personality_v0, @object
-	.size	DW.ref.__gcc_personality_v0, LP_SIZE
-DW.ref.__gcc_personality_v0:
-	ASM_ADDR __gcc_personality_v0
-#endif