[v5,2/3] Reduce CAS in __pthread_mutex_lock_full [BZ #28537]

Message ID 20211110184153.2269857-3-hjl.tools@gmail.com
State Superseded
Headers
Series Optimize CAS [BZ #28537] |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

H.J. Lu Nov. 10, 2021, 6:41 p.m. UTC
  Change __pthread_mutex_lock_full to do an atomic load and skip CAS if
compare may fail to reduce cache line bouncing on contended locks.
---
 nptl/pthread_mutex_lock.c | 38 +++++++++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 13 deletions(-)
  

Patch

diff --git a/nptl/pthread_mutex_lock.c b/nptl/pthread_mutex_lock.c
index 2bd41767e0..d7e8efedd2 100644
--- a/nptl/pthread_mutex_lock.c
+++ b/nptl/pthread_mutex_lock.c
@@ -223,13 +223,13 @@  __pthread_mutex_lock_full (pthread_mutex_t *mutex)
 	      newval |= (oldval & FUTEX_WAITERS) | assume_other_futex_waiters;
 #endif
 
-	      newval
-		= atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
-						       newval, oldval);
-
-	      if (newval != oldval)
+	      int val = atomic_load_relaxed (&mutex->__data.__lock);
+	      if (val != oldval
+		  || ((val = atomic_compare_and_exchange_val_acq
+			 (&mutex->__data.__lock, newval, oldval))
+		      != oldval))
 		{
-		  oldval = newval;
+		  oldval = val;
 		  continue;
 		}
 
@@ -411,11 +411,15 @@  __pthread_mutex_lock_full (pthread_mutex_t *mutex)
 # ifdef NO_INCR
 	newval |= FUTEX_WAITERS;
 # endif
+	oldval = atomic_load_relaxed (&mutex->__data.__lock);
+	if (oldval != 0)
+	  goto locked_mutex;
 	oldval = atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
 						      newval, 0);
 
 	if (oldval != 0)
 	  {
+ locked_mutex:;
 	    /* The mutex is locked.  The kernel will now take care of
 	       everything.  */
 	    int private = (robust
@@ -554,6 +558,10 @@  __pthread_mutex_lock_full (pthread_mutex_t *mutex)
 	    ceilval = ceiling << PTHREAD_MUTEX_PRIO_CEILING_SHIFT;
 	    oldprio = ceiling;
 
+	    oldval = atomic_load_relaxed (&mutex->__data.__lock);
+	    if (oldval != ceilval)
+	      goto ceilval_failed;
+
 	    oldval
 	      = atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
 #ifdef NO_INCR
@@ -568,10 +576,13 @@  __pthread_mutex_lock_full (pthread_mutex_t *mutex)
 
 	    do
 	      {
-		oldval
-		  = atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
-							 ceilval | 2,
-							 ceilval | 1);
+	        oldval = atomic_load_relaxed (&mutex->__data.__lock);
+ ceilval_failed:
+		if (oldval == (ceilval | 1))
+		  oldval
+		    = atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
+							   ceilval | 2,
+							   ceilval | 1);
 
 		if ((oldval & PTHREAD_MUTEX_PRIO_CEILING_MASK) != ceilval)
 		  break;
@@ -581,9 +592,10 @@  __pthread_mutex_lock_full (pthread_mutex_t *mutex)
 			      ceilval | 2,
 			      PTHREAD_MUTEX_PSHARED (mutex));
 	      }
-	    while (atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
-							ceilval | 2, ceilval)
-		   != ceilval);
+	    while (atomic_load_relaxed (&mutex->__data.__lock) != ceilval
+		   || (atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
+							    ceilval | 2, ceilval)
+		       != ceilval));
 	  }
 	while ((oldval & PTHREAD_MUTEX_PRIO_CEILING_MASK) != ceilval);