[1/2] Add a fast path for C rd/wrlock

Message ID 1395789863-5026-2-git-send-email-andi@firstfloor.org
State Superseded
Headers

Commit Message

Andi Kleen March 25, 2014, 11:24 p.m. UTC
  From: Andi Kleen <ak@linux.intel.com>

One difference of the C versions to the assembler wr/rdlock
is that the C compiler saves some registers which are unnecessary
for the fast path in the prologue of the functions. Split the
uncontended fast path out into a separate function. Only when contention is
detected is the full featured function called. This makes
the fast path code (nearly) identical to the assembler version,
and gives uncontended performance within a few cycles.

nptl/:
2014-03-25  Andi Kleen  <ak@linux.intel.com>

	* pthread_rwlock_rdlock (__pthread_rwlock_rdlock):
	Split into __do_pthread_rwlock_rdlock and __pthread_rwlock_rdlock.
	* pthread_rwlock_wrlock (__pthread_rwlock_wrlock):
	Split into __do_pthread_rwlock_wrlock and __pthread_wrlock_rdlock.
---
 nptl/pthread_rwlock_rdlock.c | 88 ++++++++++++++++++++++++++++++--------------
 nptl/pthread_rwlock_wrlock.c | 59 ++++++++++++++++++++---------
 2 files changed, 103 insertions(+), 44 deletions(-)
  

Comments

Torvald Riegel May 2, 2014, 12:29 p.m. UTC | #1
On Tue, 2014-03-25 at 16:24 -0700, Andi Kleen wrote:
> From: Andi Kleen <ak@linux.intel.com>
> 
> One difference of the C versions to the assembler wr/rdlock
> is that the C compiler saves some registers which are unnecessary
> for the fast path in the prologue of the functions. Split the
> uncontended fast path out into a separate function. Only when contention is
> detected is the full featured function called. This makes
> the fast path code (nearly) identical to the assembler version,
> and gives uncontended performance within a few cycles.
> 
> nptl/:
> 2014-03-25  Andi Kleen  <ak@linux.intel.com>
> 
> 	* pthread_rwlock_rdlock (__pthread_rwlock_rdlock):
> 	Split into __do_pthread_rwlock_rdlock and __pthread_rwlock_rdlock.
> 	* pthread_rwlock_wrlock (__pthread_rwlock_wrlock):
> 	Split into __do_pthread_rwlock_wrlock and __pthread_wrlock_rdlock.
> ---
>  nptl/pthread_rwlock_rdlock.c | 88 ++++++++++++++++++++++++++++++--------------
>  nptl/pthread_rwlock_wrlock.c | 59 ++++++++++++++++++++---------
>  2 files changed, 103 insertions(+), 44 deletions(-)
> 
> diff --git a/nptl/pthread_rwlock_rdlock.c b/nptl/pthread_rwlock_rdlock.c
> index 3773f7d..a4deed4 100644
> --- a/nptl/pthread_rwlock_rdlock.c
> +++ b/nptl/pthread_rwlock_rdlock.c
> @@ -24,39 +24,16 @@
>  #include <stap-probe.h>
>  
> 
> -/* Acquire read lock for RWLOCK.  */
> -int
> -__pthread_rwlock_rdlock (rwlock)
> -     pthread_rwlock_t *rwlock;
> +/* Acquire read lock for RWLOCK.  Slow path. */

Double space before end of comment.

> +static int __attribute__((noinline))
> +__do_pthread_rwlock_rdlock (pthread_rwlock_t *rwlock)

I'd prefer renaming that to __pthread_rwlock_rdlock_slow.
Alternatively, we could use the "_full" suffix, as the mutex code is
doing.

>  {
>    int result = 0;
>  
> -  LIBC_PROBE (rdlock_entry, 1, rwlock);
> -
> -  /* Make sure we are alone.  */
> -  lll_lock (rwlock->__data.__lock, rwlock->__data.__shared);
> +  /* Lock is taken in caller. */

Double space before end of comment.

>  
>    while (1)
>      {
> -      /* Get the rwlock if there is no writer...  */
> -      if (rwlock->__data.__writer == 0
> -	  /* ...and if either no writer is waiting or we prefer readers.  */
> -	  && (!rwlock->__data.__nr_writers_queued
> -	      || PTHREAD_RWLOCK_PREFER_READER_P (rwlock)))
> -	{
> -	  /* Increment the reader counter.  Avoid overflow.  */
> -	  if (__glibc_unlikely (++rwlock->__data.__nr_readers == 0))
> -	    {
> -	      /* Overflow on number of readers.	 */
> -	      --rwlock->__data.__nr_readers;
> -	      result = EAGAIN;
> -	    }
> -	  else
> -	    LIBC_PROBE (rdlock_acquire_read, 1, rwlock);
> -
> -	  break;
> -	}
> -
>        /* Make sure we are not holding the rwlock as a writer.  This is
>  	 a deadlock situation we recognize and report.  */
>        if (__builtin_expect (rwlock->__data.__writer
> @@ -88,6 +65,25 @@ __pthread_rwlock_rdlock (rwlock)
>        lll_lock (rwlock->__data.__lock, rwlock->__data.__shared);
>  
>        --rwlock->__data.__nr_readers_queued;
> +
> +      /* Get the rwlock if there is no writer...  */
> +      if (rwlock->__data.__writer == 0
> +	  /* ...and if either no writer is waiting or we prefer readers.  */
> +	  && (!rwlock->__data.__nr_writers_queued
> +	      || PTHREAD_RWLOCK_PREFER_READER_P (rwlock)))
> +	{
> +	  /* Increment the reader counter.  Avoid overflow.  */
> +	  if (__glibc_unlikely (++rwlock->__data.__nr_readers == 0))
> +	    {
> +	      /* Overflow on number of readers.	 */
> +	      --rwlock->__data.__nr_readers;
> +	      result = EAGAIN;
> +	    }
> +	  else
> +	    LIBC_PROBE (rdlock_acquire_read, 1, rwlock);
> +
> +	  break;
> +	}
>      }
>  
>    /* We are done, free the lock.  */
> @@ -96,5 +92,43 @@ __pthread_rwlock_rdlock (rwlock)
>    return result;
>  }
>  
> +
> +/* Fast path of acquiring read lock on RWLOCK.  */
> +
> +int
> +__pthread_rwlock_rdlock (pthread_rwlock_t *rwlock)
> +{
> +  int result = 0;
> +
> +  LIBC_PROBE (rdlock_entry, 1, rwlock);
> +
> +  /* Make sure we are alone.  */
> +  lll_lock (rwlock->__data.__lock, rwlock->__data.__shared);
> +
> +  /* Get the rwlock if there is no writer...  */
> +  if (rwlock->__data.__writer == 0
> +      /* ...and if either no writer is waiting or we prefer readers.  */
> +      && (!rwlock->__data.__nr_writers_queued
> +	  || PTHREAD_RWLOCK_PREFER_READER_P (rwlock)))
> +    {
> +      /* Increment the reader counter.  Avoid overflow.  */
> +      if (__glibc_unlikely (++rwlock->__data.__nr_readers == 0))
> +	{
> +	  /* Overflow on number of readers.	 */
> +	  --rwlock->__data.__nr_readers;
> +	  result = EAGAIN;
> +	}
> +      else
> +	LIBC_PROBE (rdlock_acquire_read, 1, rwlock);
> +
> +      /* We are done, free the lock.  */
> +      lll_unlock (rwlock->__data.__lock, rwlock->__data.__shared);
> +
> +      return result;
> +    }
> +
> +  return __do_pthread_rwlock_rdlock (rwlock);
> +}
> +
>  weak_alias (__pthread_rwlock_rdlock, pthread_rwlock_rdlock)
>  hidden_def (__pthread_rwlock_rdlock)
> diff --git a/nptl/pthread_rwlock_wrlock.c b/nptl/pthread_rwlock_wrlock.c
> index 1613d45..2907681 100644
> --- a/nptl/pthread_rwlock_wrlock.c
> +++ b/nptl/pthread_rwlock_wrlock.c
> @@ -25,29 +25,15 @@
>  
> 
>  /* Acquire write lock for RWLOCK.  */
> -int
> -__pthread_rwlock_wrlock (rwlock)
> -     pthread_rwlock_t *rwlock;
> +static int __attribute__((noinline))
> +__do_pthread_rwlock_wrlock (pthread_rwlock_t *rwlock)

See above.
  

Patch

diff --git a/nptl/pthread_rwlock_rdlock.c b/nptl/pthread_rwlock_rdlock.c
index 3773f7d..a4deed4 100644
--- a/nptl/pthread_rwlock_rdlock.c
+++ b/nptl/pthread_rwlock_rdlock.c
@@ -24,39 +24,16 @@ 
 #include <stap-probe.h>
 
 
-/* Acquire read lock for RWLOCK.  */
-int
-__pthread_rwlock_rdlock (rwlock)
-     pthread_rwlock_t *rwlock;
+/* Acquire read lock for RWLOCK.  Slow path. */
+static int __attribute__((noinline))
+__do_pthread_rwlock_rdlock (pthread_rwlock_t *rwlock)
 {
   int result = 0;
 
-  LIBC_PROBE (rdlock_entry, 1, rwlock);
-
-  /* Make sure we are alone.  */
-  lll_lock (rwlock->__data.__lock, rwlock->__data.__shared);
+  /* Lock is taken in caller. */
 
   while (1)
     {
-      /* Get the rwlock if there is no writer...  */
-      if (rwlock->__data.__writer == 0
-	  /* ...and if either no writer is waiting or we prefer readers.  */
-	  && (!rwlock->__data.__nr_writers_queued
-	      || PTHREAD_RWLOCK_PREFER_READER_P (rwlock)))
-	{
-	  /* Increment the reader counter.  Avoid overflow.  */
-	  if (__glibc_unlikely (++rwlock->__data.__nr_readers == 0))
-	    {
-	      /* Overflow on number of readers.	 */
-	      --rwlock->__data.__nr_readers;
-	      result = EAGAIN;
-	    }
-	  else
-	    LIBC_PROBE (rdlock_acquire_read, 1, rwlock);
-
-	  break;
-	}
-
       /* Make sure we are not holding the rwlock as a writer.  This is
 	 a deadlock situation we recognize and report.  */
       if (__builtin_expect (rwlock->__data.__writer
@@ -88,6 +65,25 @@  __pthread_rwlock_rdlock (rwlock)
       lll_lock (rwlock->__data.__lock, rwlock->__data.__shared);
 
       --rwlock->__data.__nr_readers_queued;
+
+      /* Get the rwlock if there is no writer...  */
+      if (rwlock->__data.__writer == 0
+	  /* ...and if either no writer is waiting or we prefer readers.  */
+	  && (!rwlock->__data.__nr_writers_queued
+	      || PTHREAD_RWLOCK_PREFER_READER_P (rwlock)))
+	{
+	  /* Increment the reader counter.  Avoid overflow.  */
+	  if (__glibc_unlikely (++rwlock->__data.__nr_readers == 0))
+	    {
+	      /* Overflow on number of readers.	 */
+	      --rwlock->__data.__nr_readers;
+	      result = EAGAIN;
+	    }
+	  else
+	    LIBC_PROBE (rdlock_acquire_read, 1, rwlock);
+
+	  break;
+	}
     }
 
   /* We are done, free the lock.  */
@@ -96,5 +92,43 @@  __pthread_rwlock_rdlock (rwlock)
   return result;
 }
 
+
+/* Fast path of acquiring read lock on RWLOCK.  */
+
+int
+__pthread_rwlock_rdlock (pthread_rwlock_t *rwlock)
+{
+  int result = 0;
+
+  LIBC_PROBE (rdlock_entry, 1, rwlock);
+
+  /* Make sure we are alone.  */
+  lll_lock (rwlock->__data.__lock, rwlock->__data.__shared);
+
+  /* Get the rwlock if there is no writer...  */
+  if (rwlock->__data.__writer == 0
+      /* ...and if either no writer is waiting or we prefer readers.  */
+      && (!rwlock->__data.__nr_writers_queued
+	  || PTHREAD_RWLOCK_PREFER_READER_P (rwlock)))
+    {
+      /* Increment the reader counter.  Avoid overflow.  */
+      if (__glibc_unlikely (++rwlock->__data.__nr_readers == 0))
+	{
+	  /* Overflow on number of readers.	 */
+	  --rwlock->__data.__nr_readers;
+	  result = EAGAIN;
+	}
+      else
+	LIBC_PROBE (rdlock_acquire_read, 1, rwlock);
+
+      /* We are done, free the lock.  */
+      lll_unlock (rwlock->__data.__lock, rwlock->__data.__shared);
+
+      return result;
+    }
+
+  return __do_pthread_rwlock_rdlock (rwlock);
+}
+
 weak_alias (__pthread_rwlock_rdlock, pthread_rwlock_rdlock)
 hidden_def (__pthread_rwlock_rdlock)
diff --git a/nptl/pthread_rwlock_wrlock.c b/nptl/pthread_rwlock_wrlock.c
index 1613d45..2907681 100644
--- a/nptl/pthread_rwlock_wrlock.c
+++ b/nptl/pthread_rwlock_wrlock.c
@@ -25,29 +25,15 @@ 
 
 
 /* Acquire write lock for RWLOCK.  */
-int
-__pthread_rwlock_wrlock (rwlock)
-     pthread_rwlock_t *rwlock;
+static int __attribute__((noinline))
+__do_pthread_rwlock_wrlock (pthread_rwlock_t *rwlock)
 {
   int result = 0;
 
-  LIBC_PROBE (wrlock_entry, 1, rwlock);
-
-  /* Make sure we are alone.  */
-  lll_lock (rwlock->__data.__lock, rwlock->__data.__shared);
+  /* Caller has taken the lock.  */
 
   while (1)
     {
-      /* Get the rwlock if there is no writer and no reader.  */
-      if (rwlock->__data.__writer == 0 && rwlock->__data.__nr_readers == 0)
-	{
-	  /* Mark self as writer.  */
-	  rwlock->__data.__writer = THREAD_GETMEM (THREAD_SELF, tid);
-
-	  LIBC_PROBE (wrlock_acquire_write, 1, rwlock);
-	  break;
-	}
-
       /* Make sure we are not holding the rwlock as a writer.  This is
 	 a deadlock situation we recognize and report.  */
       if (__builtin_expect (rwlock->__data.__writer
@@ -80,6 +66,16 @@  __pthread_rwlock_wrlock (rwlock)
 
       /* To start over again, remove the thread from the writer list.  */
       --rwlock->__data.__nr_writers_queued;
+
+      /* Get the rwlock if there is no writer and no reader.  */
+      if (rwlock->__data.__writer == 0 && rwlock->__data.__nr_readers == 0)
+	{
+	  /* Mark self as writer.  */
+	  rwlock->__data.__writer = THREAD_GETMEM (THREAD_SELF, tid);
+
+	  LIBC_PROBE (wrlock_acquire_write, 1, rwlock);
+	  break;
+	}
     }
 
   /* We are done, free the lock.  */
@@ -88,5 +84,34 @@  __pthread_rwlock_wrlock (rwlock)
   return result;
 }
 
+/* Fast path of acquiring write lock for RWLOCK.  */
+
+int
+__pthread_rwlock_wrlock (pthread_rwlock_t *rwlock)
+{
+  LIBC_PROBE (wrlock_entry, 1, rwlock);
+
+  /* Make sure we are alone.  */
+  lll_lock (rwlock->__data.__lock, rwlock->__data.__shared);
+
+  /* Get the rwlock if there is no writer and no reader.  */
+  if (__glibc_likely((rwlock->__data.__writer |
+	rwlock->__data.__nr_readers) == 0))
+    {
+      /* Mark self as writer.  */
+      rwlock->__data.__writer = THREAD_GETMEM (THREAD_SELF, tid);
+
+      LIBC_PROBE (wrlock_acquire_write, 1, rwlock);
+
+      /* We are done, free the lock.  */
+      lll_unlock (rwlock->__data.__lock, rwlock->__data.__shared);
+
+      return 0;
+    }
+
+  return __do_pthread_rwlock_wrlock (rwlock);
+}
+
+
 weak_alias (__pthread_rwlock_wrlock, pthread_rwlock_wrlock)
 hidden_def (__pthread_rwlock_wrlock)