nptl: Fix PTHREAD_PRIO_PROTECT timed lock

Message ID 20201125202723.1513496-1-adhemerval.zanella@linaro.org
State Committed
Headers
Series nptl: Fix PTHREAD_PRIO_PROTECT timed lock |

Commit Message

Adhemerval Zanella Netto Nov. 25, 2020, 8:27 p.m. UTC
  The 878fe624d4 changed lll_futex_timed_wait, which expects a relative
timeout, with a __futex_abstimed_wait64, which expects an absolute
timeout.  However the code still passes a relative timeout.

Also, the PTHREAD_PRIO_PROTECT support for clocks different than
CLOCK_REALTIME was broken since the inclusion of
pthread_mutex_clocklock (9d20e22e46) since lll_futex_timed_wait
always use CLOCK_REALTIME.

This patch fixes by removing the relative time calculation.  It
also adds some xtests that tests both thread and inter-process
usage.

Checked on x86_64-linux-gnu.
---
 nptl/Makefile                  |  3 ++-
 nptl/pthread_mutex_timedlock.c | 29 +++++------------------------
 nptl/tst-mutexpp5.c            |  2 ++
 nptl/tst-mutexpp9.c            |  2 ++
 sysdeps/pthread/tst-mutex5.c   | 12 +++++++++++-
 sysdeps/pthread/tst-mutex9.c   | 13 ++++++++++++-
 6 files changed, 34 insertions(+), 27 deletions(-)
 create mode 100644 nptl/tst-mutexpp5.c
 create mode 100644 nptl/tst-mutexpp9.c
  

Comments

Mike Crowe Nov. 26, 2020, 11:27 a.m. UTC | #1
On Wednesday 25 November 2020 at 17:27:23 -0300, Adhemerval Zanella wrote:
> The 878fe624d4 changed lll_futex_timed_wait, which expects a relative
> timeout, with a __futex_abstimed_wait64, which expects an absolute
> timeout.  However the code still passes a relative timeout.
> 
> Also, the PTHREAD_PRIO_PROTECT support for clocks different than
> CLOCK_REALTIME was broken since the inclusion of
> pthread_mutex_clocklock (9d20e22e46) since lll_futex_timed_wait
> always use CLOCK_REALTIME.
> 
> This patch fixes by removing the relative time calculation.  It
> also adds some xtests that tests both thread and inter-process
> usage.
> 
> Checked on x86_64-linux-gnu.
> ---
>  nptl/Makefile                  |  3 ++-
>  nptl/pthread_mutex_timedlock.c | 29 +++++------------------------
>  nptl/tst-mutexpp5.c            |  2 ++
>  nptl/tst-mutexpp9.c            |  2 ++
>  sysdeps/pthread/tst-mutex5.c   | 12 +++++++++++-
>  sysdeps/pthread/tst-mutex9.c   | 13 ++++++++++++-
>  6 files changed, 34 insertions(+), 27 deletions(-)
>  create mode 100644 nptl/tst-mutexpp5.c
>  create mode 100644 nptl/tst-mutexpp9.c
> 
> diff --git a/nptl/Makefile b/nptl/Makefile
> index a48426a396..94d805f0d4 100644
> --- a/nptl/Makefile
> +++ b/nptl/Makefile
> @@ -309,7 +309,8 @@ tests-internal := tst-robustpi8 tst-rwlock19 tst-rwlock20 \
>  		  tst-setgetname \
>  
>  xtests = tst-setuid1 tst-setuid1-static tst-setuid2 \
> -	tst-mutexpp1 tst-mutexpp6 tst-mutexpp10 tst-setgroups
> +	tst-mutexpp1 tst-mutexpp6 tst-mutexpp10 tst-setgroups \
> +	tst-mutexpp5 tst-mutexpp9
>  
>  # This test can run into task limits because of a linux kernel bug
>  # and then cause the make process to fail too, see bug 24537.
> diff --git a/nptl/pthread_mutex_timedlock.c b/nptl/pthread_mutex_timedlock.c
> index aaaafa21ce..74adffe790 100644
> --- a/nptl/pthread_mutex_timedlock.c
> +++ b/nptl/pthread_mutex_timedlock.c
> @@ -547,30 +547,11 @@ __pthread_mutex_clocklock_common (pthread_mutex_t *mutex,
>  			goto failpp;
>  		      }
>  
> -		    struct __timespec64 rt;
> -
> -		    /* Get the current time.  */
> -		    __clock_gettime64 (CLOCK_REALTIME, &rt);
> -
> -		    /* Compute relative timeout.  */
> -		    rt.tv_sec = abstime->tv_sec - rt.tv_sec;
> -		    rt.tv_nsec = abstime->tv_nsec - rt.tv_nsec;
> -		    if (rt.tv_nsec < 0)
> -		      {
> -			rt.tv_nsec += 1000000000;
> -			--rt.tv_sec;
> -		      }
> -
> -		    /* Already timed out?  */
> -		    if (rt.tv_sec < 0)
> -		      {
> -			result = ETIMEDOUT;
> -			goto failpp;
> -		      }
> -
> -		    __futex_abstimed_wait64 (
> -		      (unsigned int *) &mutex->__data.__lock, clockid,
> -		      ceilval | 2, &rt, PTHREAD_MUTEX_PSHARED (mutex));
> +		    int e = __futex_abstimed_wait64 (
> +		      (unsigned int *) &mutex->__data.__lock, ceilval | 2,
> +		      clockid, abstime, PTHREAD_MUTEX_PSHARED (mutex));
> +		    if (e == ETIMEDOUT)
> +		      return ETIMEDOUT;

I'm worried that futex could return other errors here which would cause a
busy infinite loop. However, my attempts to provoke EINVAL have failed
since the validity of abstime.tv_nsec is checked earlier. Presumably we
should never get this far if EOVERFLOW could be returned? (If there is a
problem here, then it also affects other mutex types which have similar
code.)

>  		  }
>  	      }
>  	    while (atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
> diff --git a/nptl/tst-mutexpp5.c b/nptl/tst-mutexpp5.c
> new file mode 100644
> index 0000000000..a864a390ca
> --- /dev/null
> +++ b/nptl/tst-mutexpp5.c
> @@ -0,0 +1,2 @@
> +#define ENABLE_PP 1
> +#include "tst-mutex5.c"
> diff --git a/nptl/tst-mutexpp9.c b/nptl/tst-mutexpp9.c
> new file mode 100644
> index 0000000000..c848c74c7e
> --- /dev/null
> +++ b/nptl/tst-mutexpp9.c
> @@ -0,0 +1,2 @@
> +#define ENABLE_PP 1
> +#include "tst-mutex9.c"
> diff --git a/sysdeps/pthread/tst-mutex5.c b/sysdeps/pthread/tst-mutex5.c
> index bfe1a79fa4..dbd2c3c15f 100644
> --- a/sysdeps/pthread/tst-mutex5.c
> +++ b/sysdeps/pthread/tst-mutex5.c
> @@ -27,6 +27,9 @@
>  #include <support/check.h>
>  #include <support/timespec.h>
>  
> +#ifdef ENABLE_PP
> +#include "tst-tpp.h"
> +#endif
>  
>  #ifndef TYPE
>  # define TYPE PTHREAD_MUTEX_NORMAL
> @@ -47,8 +50,11 @@ do_test_clock (clockid_t clockid, const char *fnname)
>    TEST_COMPARE (pthread_mutexattr_init (&a), 0);
>    TEST_COMPARE (pthread_mutexattr_settype (&a, TYPE), 0);
>  
> -#ifdef ENABLE_PI
> +#if defined ENABLE_PI
>    TEST_COMPARE (pthread_mutexattr_setprotocol (&a, PTHREAD_PRIO_INHERIT), 0);
> +#elif defined ENABLE_PP
> +  TEST_COMPARE (pthread_mutexattr_setprotocol (&a, PTHREAD_PRIO_PROTECT), 0);
> +  TEST_COMPARE (pthread_mutexattr_setprioceiling (&a, 6), 0);
>  #endif
>  
>    int err = pthread_mutex_init (&m, &a);
> @@ -110,6 +116,10 @@ do_test_clock (clockid_t clockid, const char *fnname)
>  
>  static int do_test (void)
>  {
> +#ifdef ENABLE_PP
> +  init_tpp_test ();
> +#endif
> +
>    do_test_clock (CLOCK_USE_TIMEDLOCK, "timedlock");
>    do_test_clock (CLOCK_REALTIME, "clocklock(realtime)");
>  #ifndef ENABLE_PI
> diff --git a/sysdeps/pthread/tst-mutex9.c b/sysdeps/pthread/tst-mutex9.c
> index bfc01f8c75..081aeff0f6 100644
> --- a/sysdeps/pthread/tst-mutex9.c
> +++ b/sysdeps/pthread/tst-mutex9.c
> @@ -30,6 +30,10 @@
>  #include <support/timespec.h>
>  #include <support/xunistd.h>
>  
> +#ifdef ENABLE_PP
> +#include "tst-tpp.h"
> +#endif
> +
>  
>  /* A bogus clock value that tells run_test to use pthread_mutex_timedlock
>     rather than pthread_mutex_clocklock.  */
> @@ -73,8 +77,11 @@ do_test_clock (clockid_t clockid)
>  
>    TEST_COMPARE (pthread_mutexattr_settype (&a, PTHREAD_MUTEX_RECURSIVE), 0);
>  
> -#ifdef ENABLE_PI
> +#if defined ENABLE_PI
>    TEST_COMPARE (pthread_mutexattr_setprotocol (&a, PTHREAD_PRIO_INHERIT), 0);
> +#elif defined ENABLE_PP
> +  TEST_COMPARE (pthread_mutexattr_setprotocol (&a, PTHREAD_PRIO_PROTECT), 0);
> +  TEST_COMPARE (pthread_mutexattr_setprioceiling (&a, 6), 0);
>  #endif
>  
>    int e;
> @@ -131,6 +138,10 @@ do_test_clock (clockid_t clockid)
>  static int
>  do_test (void)
>  {
> +#ifdef ENABLE_PP
> +  init_tpp_test ();
> +#endif
> +
>    do_test_clock (CLOCK_USE_TIMEDLOCK);
>    do_test_clock (CLOCK_REALTIME);
>  #ifndef ENABLE_PI

LGTM. I have some extra timespec tests that I'll propose in a separate
patch once this lands.

Thanks.

Mike.
  
Adhemerval Zanella Netto Nov. 26, 2020, 12:15 p.m. UTC | #2
On 26/11/2020 08:27, Mike Crowe wrote:
> On Wednesday 25 November 2020 at 17:27:23 -0300, Adhemerval Zanella wrote:
>> The 878fe624d4 changed lll_futex_timed_wait, which expects a relative
>> timeout, with a __futex_abstimed_wait64, which expects an absolute
>> timeout.  However the code still passes a relative timeout.
>>
>> Also, the PTHREAD_PRIO_PROTECT support for clocks different than
>> CLOCK_REALTIME was broken since the inclusion of
>> pthread_mutex_clocklock (9d20e22e46) since lll_futex_timed_wait
>> always use CLOCK_REALTIME.
>>
>> This patch fixes by removing the relative time calculation.  It
>> also adds some xtests that tests both thread and inter-process
>> usage.
>>
>> Checked on x86_64-linux-gnu.
>> ---
>>  nptl/Makefile                  |  3 ++-
>>  nptl/pthread_mutex_timedlock.c | 29 +++++------------------------
>>  nptl/tst-mutexpp5.c            |  2 ++
>>  nptl/tst-mutexpp9.c            |  2 ++
>>  sysdeps/pthread/tst-mutex5.c   | 12 +++++++++++-
>>  sysdeps/pthread/tst-mutex9.c   | 13 ++++++++++++-
>>  6 files changed, 34 insertions(+), 27 deletions(-)
>>  create mode 100644 nptl/tst-mutexpp5.c
>>  create mode 100644 nptl/tst-mutexpp9.c
>>
>> diff --git a/nptl/Makefile b/nptl/Makefile
>> index a48426a396..94d805f0d4 100644
>> --- a/nptl/Makefile
>> +++ b/nptl/Makefile
>> @@ -309,7 +309,8 @@ tests-internal := tst-robustpi8 tst-rwlock19 tst-rwlock20 \
>>  		  tst-setgetname \
>>  
>>  xtests = tst-setuid1 tst-setuid1-static tst-setuid2 \
>> -	tst-mutexpp1 tst-mutexpp6 tst-mutexpp10 tst-setgroups
>> +	tst-mutexpp1 tst-mutexpp6 tst-mutexpp10 tst-setgroups \
>> +	tst-mutexpp5 tst-mutexpp9
>>  
>>  # This test can run into task limits because of a linux kernel bug
>>  # and then cause the make process to fail too, see bug 24537.
>> diff --git a/nptl/pthread_mutex_timedlock.c b/nptl/pthread_mutex_timedlock.c
>> index aaaafa21ce..74adffe790 100644
>> --- a/nptl/pthread_mutex_timedlock.c
>> +++ b/nptl/pthread_mutex_timedlock.c
>> @@ -547,30 +547,11 @@ __pthread_mutex_clocklock_common (pthread_mutex_t *mutex,
>>  			goto failpp;
>>  		      }
>>  
>> -		    struct __timespec64 rt;
>> -
>> -		    /* Get the current time.  */
>> -		    __clock_gettime64 (CLOCK_REALTIME, &rt);
>> -
>> -		    /* Compute relative timeout.  */
>> -		    rt.tv_sec = abstime->tv_sec - rt.tv_sec;
>> -		    rt.tv_nsec = abstime->tv_nsec - rt.tv_nsec;
>> -		    if (rt.tv_nsec < 0)
>> -		      {
>> -			rt.tv_nsec += 1000000000;
>> -			--rt.tv_sec;
>> -		      }
>> -
>> -		    /* Already timed out?  */
>> -		    if (rt.tv_sec < 0)
>> -		      {
>> -			result = ETIMEDOUT;
>> -			goto failpp;
>> -		      }
>> -
>> -		    __futex_abstimed_wait64 (
>> -		      (unsigned int *) &mutex->__data.__lock, clockid,
>> -		      ceilval | 2, &rt, PTHREAD_MUTEX_PSHARED (mutex));
>> +		    int e = __futex_abstimed_wait64 (
>> +		      (unsigned int *) &mutex->__data.__lock, ceilval | 2,
>> +		      clockid, abstime, PTHREAD_MUTEX_PSHARED (mutex));
>> +		    if (e == ETIMEDOUT)
>> +		      return ETIMEDOUT;
> 
> I'm worried that futex could return other errors here which would cause a
> busy infinite loop. However, my attempts to provoke EINVAL have failed
> since the validity of abstime.tv_nsec is checked earlier. Presumably we
> should never get this far if EOVERFLOW could be returned? (If there is a
> problem here, then it also affects other mutex types which have similar
> code.)

Revising the calls of futex-internal.h which might pass an timeout where
EOVERFLOW might happen, I see nptl code requires some fixes.

At nptl/pthread_mutex_timedlock.c for robust mutexes:

138     case PTHREAD_MUTEX_ROBUST_RECURSIVE_NP:
139     case PTHREAD_MUTEX_ROBUST_ERRORCHECK_NP:
140     case PTHREAD_MUTEX_ROBUST_NORMAL_NP:
141     case PTHREAD_MUTEX_ROBUST_ADAPTIVE_NP:
[...]
235           /* We are about to block; check whether the timeout is invalid.  */
236           if (! valid_nanoseconds (abstime->tv_nsec))
237             return EINVAL;
238           /* Work around the fact that the kernel rejects negative timeout
239              values despite them being valid.  */
240           if (__glibc_unlikely (abstime->tv_sec < 0))
241             return ETIMEDOUT;

The FUTEX_WAITERS will trigger a futex wake on pthread_mutex_unlock, so 
this check is an optimization to avoid it.  It could be removed since
__futex_abstimed_wait64 does the same check (it might incur in a spurious
futex call).

[...]
268           int err = __futex_abstimed_wait64 (
269               (unsigned int *) &mutex->__data.__lock,
270               oldval, clockid, abstime,
271               PTHREAD_ROBUST_MUTEX_PSHARED (mutex));
272           /* The futex call timed out.  */
273           if (err == ETIMEDOUT)
274             return err;

We need to handle EOVERFLOW since without a failure it is assumed the robust
mutex was locked.


The priority seems to be handle as expected:

307     case PTHREAD_MUTEX_PI_RECURSIVE_NP:
308     case PTHREAD_MUTEX_PI_ERRORCHECK_NP:
309     case PTHREAD_MUTEX_PI_NORMAL_NP:
310     case PTHREAD_MUTEX_PI_ADAPTIVE_NP:
311     case PTHREAD_MUTEX_PI_ROBUST_RECURSIVE_NP:
312     case PTHREAD_MUTEX_PI_ROBUST_ERRORCHECK_NP:
313     case PTHREAD_MUTEX_PI_ROBUST_NORMAL_NP:
314     case PTHREAD_MUTEX_PI_ROBUST_ADAPTIVE_NP:
[...]
380             /* The mutex is locked.  The kernel will now take care of
381                everything.  The timeout value must be a relative value.
382                Convert it.  */
383             int private = (robust
384                            ? PTHREAD_ROBUST_MUTEX_PSHARED (mutex)
385                            : PTHREAD_MUTEX_PSHARED (mutex));
386             int e = futex_lock_pi64 (&mutex->__data.__lock, abstime, private);
387             if (e == ETIMEDOUT)
388               return ETIMEDOUT;
389             else if (e == ESRCH || e == EDEADLK)
390               {
391                 assert (e != EDEADLK
392                         || (kind != PTHREAD_MUTEX_ERRORCHECK_NP
393                            && kind != PTHREAD_MUTEX_RECURSIVE_NP));
394                 /* ESRCH can happen only for non-robust PI mutexes where
395                    the owner of the lock died.  */
396                 assert (e != ESRCH || !robust);
397 
398                 /* Delay the thread until the timeout is reached. Then return
399                    ETIMEDOUT.  */
400                 do
401                   e = __futex_abstimed_wait64 (&(unsigned int){0}, 0, clockid,
402                                                abstime, private);
403                 while (e != ETIMEDOUT);
404                 return ETIMEDOUT;
405               }
406             else if (e != 0)
407               return e;

The EOVERFLOW for futex_lock_pi64 will be handled on the last 'e != 0',
and the internal '__futex_abstimed_wait64' should use a valid abstime
(since futex_lock_pi64 would fail early).

As priority protected ones:

469     case PTHREAD_MUTEX_PP_RECURSIVE_NP:
470     case PTHREAD_MUTEX_PP_ERRORCHECK_NP:
471     case PTHREAD_MUTEX_PP_NORMAL_NP:
472     case PTHREAD_MUTEX_PP_ADAPTIVE_NP:
[...]
550                     int e = __futex_abstimed_wait64 (
551                       (unsigned int *) &mutex->__data.__lock, ceilval | 2,
552                       clockid, abstime, PTHREAD_MUTEX_PSHARED (mutex));
553                     if (e == ETIMEDOUT)
554                       return ETIMEDOUT;

As you have noted we do need to handle EOVERFLOW here, otherwise it will
trigger busy infinite loop.  It does not happen now because the timers
passed to __pthread_mutex_clocklock_common by 32-bit archs with 32-bit
support will always fix a 32-bti timespec; but they might happen once
we support the 64-bit time support on kernel older than v5.1.


On pthread rw/rdlock (nptl/pthread_rwlock_common.c) there are some some issues:

328               while (((r = atomic_load_relaxed (&rwlock->__data.__readers))
329                       & PTHREAD_RWLOCK_RWAITING) != 0)
330                 {
331                   int private = __pthread_rwlock_get_private (rwlock);
332                   int err = __futex_abstimed_wait64 (&rwlock->__data.__readers,
333                                                      r, clockid, abstime,
334                                                      private);
335                   /* We ignore EAGAIN and EINTR.  On time-outs, we can just
336                      return because we don't need to clean up anything.  */
337                   if (err == ETIMEDOUT)
338                     return err;
339                 }

This also requires handle EOVERFLOW.

460           int err = __futex_abstimed_wait64 (&rwlock->__data.__wrphase_futex,
461                                              1 | PTHREAD_RWLOCK_FUTEX_USED,
462                                              clockid, abstime, private);
463           if (err == ETIMEDOUT)
464             {
465               /* If we timed out, we need to unregister.  If no read phase
466                  has been installed while we waited, we can just decrement
467                  the number of readers.  Otherwise, we just acquire the
468                  lock, which is allowed because we give no precise timing
469                  guarantees, and because the timeout is only required to
470                  be in effect if we would have had to wait for other
471                  threads (e.g., if futex_wait would time-out immediately
472                  because the given absolute time is in the past).  */

Same here, I think EOVERFLOW should be handled as ETIMEDOUT in this case.

730           int err = __futex_abstimed_wait64 (&rwlock->__data.__writers_futex,
731                                              1 | PTHREAD_RWLOCK_FUTEX_USED,
732                                              clockid, abstime, private);
733           if (err == ETIMEDOUT)
734             {
735               if (prefer_writer)
736                 {
737                   /* We need to unregister as a waiting writer.  If we are the
738                      last writer and writer--writer hand-over is available,
739                      we must make use of it because nobody else will reset
740                      WRLOCKED otherwise.  (If we use it, we simply pretend
741                      that this happened before the timeout; see
742                      pthread_rwlock_rdlock_full for the full reasoning.)
743                      Also see the similar code above.  */

And

829           int err = __futex_abstimed_wait64 (&rwlock->__data.__wrphase_futex,
830                                              PTHREAD_RWLOCK_FUTEX_USED,
831                                              clockid, abstime, private);
832           if (err == ETIMEDOUT)
833             {
834               if (rwlock->__data.__flags != PTHREAD_RWLOCK_PREFER_READER_NP)
835                 {
836                   /* We try writer--writer hand-over.  */

Also need to handle EOVERFLOW as ETIMEDOUT as also return ETIMEOUT instead of
EOVERFLOW.

I plan to address these on a subsequent patch.
  
Adhemerval Zanella Netto Nov. 26, 2020, 12:51 p.m. UTC | #3
On 26/11/2020 09:15, Adhemerval Zanella wrote:
> 
> 
> On 26/11/2020 08:27, Mike Crowe wrote:
>> On Wednesday 25 November 2020 at 17:27:23 -0300, Adhemerval Zanella wrote:
>>> The 878fe624d4 changed lll_futex_timed_wait, which expects a relative
>>> timeout, with a __futex_abstimed_wait64, which expects an absolute
>>> timeout.  However the code still passes a relative timeout.
>>>
>>> Also, the PTHREAD_PRIO_PROTECT support for clocks different than
>>> CLOCK_REALTIME was broken since the inclusion of
>>> pthread_mutex_clocklock (9d20e22e46) since lll_futex_timed_wait
>>> always use CLOCK_REALTIME.
>>>
>>> This patch fixes by removing the relative time calculation.  It
>>> also adds some xtests that tests both thread and inter-process
>>> usage.
>>>
>>> Checked on x86_64-linux-gnu.
>>> ---
>>>  nptl/Makefile                  |  3 ++-
>>>  nptl/pthread_mutex_timedlock.c | 29 +++++------------------------
>>>  nptl/tst-mutexpp5.c            |  2 ++
>>>  nptl/tst-mutexpp9.c            |  2 ++
>>>  sysdeps/pthread/tst-mutex5.c   | 12 +++++++++++-
>>>  sysdeps/pthread/tst-mutex9.c   | 13 ++++++++++++-
>>>  6 files changed, 34 insertions(+), 27 deletions(-)
>>>  create mode 100644 nptl/tst-mutexpp5.c
>>>  create mode 100644 nptl/tst-mutexpp9.c
>>>
>>> diff --git a/nptl/Makefile b/nptl/Makefile
>>> index a48426a396..94d805f0d4 100644
>>> --- a/nptl/Makefile
>>> +++ b/nptl/Makefile
>>> @@ -309,7 +309,8 @@ tests-internal := tst-robustpi8 tst-rwlock19 tst-rwlock20 \
>>>  		  tst-setgetname \
>>>  
>>>  xtests = tst-setuid1 tst-setuid1-static tst-setuid2 \
>>> -	tst-mutexpp1 tst-mutexpp6 tst-mutexpp10 tst-setgroups
>>> +	tst-mutexpp1 tst-mutexpp6 tst-mutexpp10 tst-setgroups \
>>> +	tst-mutexpp5 tst-mutexpp9
>>>  
>>>  # This test can run into task limits because of a linux kernel bug
>>>  # and then cause the make process to fail too, see bug 24537.
>>> diff --git a/nptl/pthread_mutex_timedlock.c b/nptl/pthread_mutex_timedlock.c
>>> index aaaafa21ce..74adffe790 100644
>>> --- a/nptl/pthread_mutex_timedlock.c
>>> +++ b/nptl/pthread_mutex_timedlock.c
>>> @@ -547,30 +547,11 @@ __pthread_mutex_clocklock_common (pthread_mutex_t *mutex,
>>>  			goto failpp;
>>>  		      }
>>>  
>>> -		    struct __timespec64 rt;
>>> -
>>> -		    /* Get the current time.  */
>>> -		    __clock_gettime64 (CLOCK_REALTIME, &rt);
>>> -
>>> -		    /* Compute relative timeout.  */
>>> -		    rt.tv_sec = abstime->tv_sec - rt.tv_sec;
>>> -		    rt.tv_nsec = abstime->tv_nsec - rt.tv_nsec;
>>> -		    if (rt.tv_nsec < 0)
>>> -		      {
>>> -			rt.tv_nsec += 1000000000;
>>> -			--rt.tv_sec;
>>> -		      }
>>> -
>>> -		    /* Already timed out?  */
>>> -		    if (rt.tv_sec < 0)
>>> -		      {
>>> -			result = ETIMEDOUT;
>>> -			goto failpp;
>>> -		      }
>>> -
>>> -		    __futex_abstimed_wait64 (
>>> -		      (unsigned int *) &mutex->__data.__lock, clockid,
>>> -		      ceilval | 2, &rt, PTHREAD_MUTEX_PSHARED (mutex));
>>> +		    int e = __futex_abstimed_wait64 (
>>> +		      (unsigned int *) &mutex->__data.__lock, ceilval | 2,
>>> +		      clockid, abstime, PTHREAD_MUTEX_PSHARED (mutex));
>>> +		    if (e == ETIMEDOUT)
>>> +		      return ETIMEDOUT;
>>
>> I'm worried that futex could return other errors here which would cause a
>> busy infinite loop. However, my attempts to provoke EINVAL have failed
>> since the validity of abstime.tv_nsec is checked earlier. Presumably we
>> should never get this far if EOVERFLOW could be returned? (If there is a
>> problem here, then it also affects other mutex types which have similar
>> code.)
> 
> Revising the calls of futex-internal.h which might pass an timeout where
> EOVERFLOW might happen, I see nptl code requires some fixes.
> 
> At nptl/pthread_mutex_timedlock.c for robust mutexes:
> 
> 138     case PTHREAD_MUTEX_ROBUST_RECURSIVE_NP:
> 139     case PTHREAD_MUTEX_ROBUST_ERRORCHECK_NP:
> 140     case PTHREAD_MUTEX_ROBUST_NORMAL_NP:
> 141     case PTHREAD_MUTEX_ROBUST_ADAPTIVE_NP:
> [...]
> 235           /* We are about to block; check whether the timeout is invalid.  */
> 236           if (! valid_nanoseconds (abstime->tv_nsec))
> 237             return EINVAL;
> 238           /* Work around the fact that the kernel rejects negative timeout
> 239              values despite them being valid.  */
> 240           if (__glibc_unlikely (abstime->tv_sec < 0))
> 241             return ETIMEDOUT;
> 
> The FUTEX_WAITERS will trigger a futex wake on pthread_mutex_unlock, so 
> this check is an optimization to avoid it.  It could be removed since
> __futex_abstimed_wait64 does the same check (it might incur in a spurious
> futex call).
> 
> [...]
> 268           int err = __futex_abstimed_wait64 (
> 269               (unsigned int *) &mutex->__data.__lock,
> 270               oldval, clockid, abstime,
> 271               PTHREAD_ROBUST_MUTEX_PSHARED (mutex));
> 272           /* The futex call timed out.  */
> 273           if (err == ETIMEDOUT)
> 274             return err;
> 
> We need to handle EOVERFLOW since without a failure it is assumed the robust
> mutex was locked.
> 
> 
> The priority seems to be handle as expected:
> 
> 307     case PTHREAD_MUTEX_PI_RECURSIVE_NP:
> 308     case PTHREAD_MUTEX_PI_ERRORCHECK_NP:
> 309     case PTHREAD_MUTEX_PI_NORMAL_NP:
> 310     case PTHREAD_MUTEX_PI_ADAPTIVE_NP:
> 311     case PTHREAD_MUTEX_PI_ROBUST_RECURSIVE_NP:
> 312     case PTHREAD_MUTEX_PI_ROBUST_ERRORCHECK_NP:
> 313     case PTHREAD_MUTEX_PI_ROBUST_NORMAL_NP:
> 314     case PTHREAD_MUTEX_PI_ROBUST_ADAPTIVE_NP:
> [...]
> 380             /* The mutex is locked.  The kernel will now take care of
> 381                everything.  The timeout value must be a relative value.
> 382                Convert it.  */
> 383             int private = (robust
> 384                            ? PTHREAD_ROBUST_MUTEX_PSHARED (mutex)
> 385                            : PTHREAD_MUTEX_PSHARED (mutex));
> 386             int e = futex_lock_pi64 (&mutex->__data.__lock, abstime, private);
> 387             if (e == ETIMEDOUT)
> 388               return ETIMEDOUT;
> 389             else if (e == ESRCH || e == EDEADLK)
> 390               {
> 391                 assert (e != EDEADLK
> 392                         || (kind != PTHREAD_MUTEX_ERRORCHECK_NP
> 393                            && kind != PTHREAD_MUTEX_RECURSIVE_NP));
> 394                 /* ESRCH can happen only for non-robust PI mutexes where
> 395                    the owner of the lock died.  */
> 396                 assert (e != ESRCH || !robust);
> 397 
> 398                 /* Delay the thread until the timeout is reached. Then return
> 399                    ETIMEDOUT.  */
> 400                 do
> 401                   e = __futex_abstimed_wait64 (&(unsigned int){0}, 0, clockid,
> 402                                                abstime, private);
> 403                 while (e != ETIMEDOUT);
> 404                 return ETIMEDOUT;
> 405               }
> 406             else if (e != 0)
> 407               return e;
> 
> The EOVERFLOW for futex_lock_pi64 will be handled on the last 'e != 0',
> and the internal '__futex_abstimed_wait64' should use a valid abstime
> (since futex_lock_pi64 would fail early).
> 
> As priority protected ones:
> 
> 469     case PTHREAD_MUTEX_PP_RECURSIVE_NP:
> 470     case PTHREAD_MUTEX_PP_ERRORCHECK_NP:
> 471     case PTHREAD_MUTEX_PP_NORMAL_NP:
> 472     case PTHREAD_MUTEX_PP_ADAPTIVE_NP:
> [...]
> 550                     int e = __futex_abstimed_wait64 (
> 551                       (unsigned int *) &mutex->__data.__lock, ceilval | 2,
> 552                       clockid, abstime, PTHREAD_MUTEX_PSHARED (mutex));
> 553                     if (e == ETIMEDOUT)
> 554                       return ETIMEDOUT;
> 
> As you have noted we do need to handle EOVERFLOW here, otherwise it will
> trigger busy infinite loop.  It does not happen now because the timers
> passed to __pthread_mutex_clocklock_common by 32-bit archs with 32-bit
> support will always fix a 32-bti timespec; but they might happen once
> we support the 64-bit time support on kernel older than v5.1.
> 
> 
> On pthread rw/rdlock (nptl/pthread_rwlock_common.c) there are some some issues:
> 
> 328               while (((r = atomic_load_relaxed (&rwlock->__data.__readers))
> 329                       & PTHREAD_RWLOCK_RWAITING) != 0)
> 330                 {
> 331                   int private = __pthread_rwlock_get_private (rwlock);
> 332                   int err = __futex_abstimed_wait64 (&rwlock->__data.__readers,
> 333                                                      r, clockid, abstime,
> 334                                                      private);
> 335                   /* We ignore EAGAIN and EINTR.  On time-outs, we can just
> 336                      return because we don't need to clean up anything.  */
> 337                   if (err == ETIMEDOUT)
> 338                     return err;
> 339                 }
> 
> This also requires handle EOVERFLOW.
> 
> 460           int err = __futex_abstimed_wait64 (&rwlock->__data.__wrphase_futex,
> 461                                              1 | PTHREAD_RWLOCK_FUTEX_USED,
> 462                                              clockid, abstime, private);
> 463           if (err == ETIMEDOUT)
> 464             {
> 465               /* If we timed out, we need to unregister.  If no read phase
> 466                  has been installed while we waited, we can just decrement
> 467                  the number of readers.  Otherwise, we just acquire the
> 468                  lock, which is allowed because we give no precise timing
> 469                  guarantees, and because the timeout is only required to
> 470                  be in effect if we would have had to wait for other
> 471                  threads (e.g., if futex_wait would time-out immediately
> 472                  because the given absolute time is in the past).  */
> 
> Same here, I think EOVERFLOW should be handled as ETIMEDOUT in this case.
> 
> 730           int err = __futex_abstimed_wait64 (&rwlock->__data.__writers_futex,
> 731                                              1 | PTHREAD_RWLOCK_FUTEX_USED,
> 732                                              clockid, abstime, private);
> 733           if (err == ETIMEDOUT)
> 734             {
> 735               if (prefer_writer)
> 736                 {
> 737                   /* We need to unregister as a waiting writer.  If we are the
> 738                      last writer and writer--writer hand-over is available,
> 739                      we must make use of it because nobody else will reset
> 740                      WRLOCKED otherwise.  (If we use it, we simply pretend
> 741                      that this happened before the timeout; see
> 742                      pthread_rwlock_rdlock_full for the full reasoning.)
> 743                      Also see the similar code above.  */
> 
> And
> 
> 829           int err = __futex_abstimed_wait64 (&rwlock->__data.__wrphase_futex,
> 830                                              PTHREAD_RWLOCK_FUTEX_USED,
> 831                                              clockid, abstime, private);
> 832           if (err == ETIMEDOUT)
> 833             {
> 834               if (rwlock->__data.__flags != PTHREAD_RWLOCK_PREFER_READER_NP)
> 835                 {
> 836                   /* We try writer--writer hand-over.  */
> 
> Also need to handle EOVERFLOW as ETIMEDOUT as also return ETIMEOUT instead of
> EOVERFLOW.
> 
> I plan to address these on a subsequent patch.
> 

Revising the __futex_clocklock64 usage, they also seems to already handle
EOVERFLOW:

 32 #ifndef lll_clocklock_elision
 33 #define lll_clocklock_elision(futex, adapt_count, clockid, abstime, private) \
 34   __futex_clocklock64 (&(futex), clockid, abstime, private)
 35 #endif

 78       /* We have to get the mutex.  */
 79       result = __futex_clocklock64 (&mutex->__data.__lock, clockid, abstime,
 80                                     PTHREAD_MUTEX_PSHARED (mutex));
 81 
 82       if (result != 0)
 83         goto out;

This already handles EOVERFLOW


100     simple:
101       /* Normal mutex.  */
102       result = __futex_clocklock64 (&mutex->__data.__lock, clockid, abstime,
103                                     PTHREAD_MUTEX_PSHARED (mutex));
104       break;

This also handled EOVERFLOW (it will bail out to line 583).


106     case PTHREAD_MUTEX_TIMED_ELISION_NP:
107     elision: __attribute__((unused))
108       /* Don't record ownership */
109       return lll_clocklock_elision (mutex->__data.__lock,
110                                     mutex->__data.__spins,
111                                     clockid, abstime,
112                                     PTHREAD_MUTEX_PSHARED (mutex));

All the target overriden lll_clocklock_elision call target provided
__lll_lock_elision and this return __futex_clocklock64, so it already handles
EOVERFLOW.


115     case PTHREAD_MUTEX_ADAPTIVE_NP:
116       if (lll_trylock (mutex->__data.__lock) != 0)
117         {
118           int cnt = 0;
119           int max_cnt = MIN (max_adaptive_count (),
120                              mutex->__data.__spins * 2 + 10);
121           do
122             {
123               if (cnt++ >= max_cnt)
124                 {
125                   result = __futex_clocklock64 (&mutex->__data.__lock,
126                                                 clockid, abstime,
127                                                 PTHREAD_MUTEX_PSHARED (mutex));
128                   break;
129                 }
130               atomic_spin_nop ();
131             }
132           while (lll_trylock (mutex->__data.__lock) != 0);
133 
134           mutex->__data.__spins += (cnt - mutex->__data.__spins) / 8;
135         }

This should be safe as well (it will break once cnt reaches max_cnt). It could
bail early if result is EOVERFLOW but it would require to adjust the
__spins as well.
  
Adhemerval Zanella Netto Nov. 27, 2020, 11:22 a.m. UTC | #4
On 26/11/2020 09:51, Adhemerval Zanella wrote:
> 
> 
> On 26/11/2020 09:15, Adhemerval Zanella wrote:
>>
>>
>> On 26/11/2020 08:27, Mike Crowe wrote:
>>> On Wednesday 25 November 2020 at 17:27:23 -0300, Adhemerval Zanella wrote:
>>>> The 878fe624d4 changed lll_futex_timed_wait, which expects a relative
>>>> timeout, with a __futex_abstimed_wait64, which expects an absolute
>>>> timeout.  However the code still passes a relative timeout.
>>>>
>>>> Also, the PTHREAD_PRIO_PROTECT support for clocks different than
>>>> CLOCK_REALTIME was broken since the inclusion of
>>>> pthread_mutex_clocklock (9d20e22e46) since lll_futex_timed_wait
>>>> always use CLOCK_REALTIME.
>>>>
>>>> This patch fixes by removing the relative time calculation.  It
>>>> also adds some xtests that tests both thread and inter-process
>>>> usage.
>>>>
>>>> Checked on x86_64-linux-gnu.
>>>> ---
>>>>  nptl/Makefile                  |  3 ++-
>>>>  nptl/pthread_mutex_timedlock.c | 29 +++++------------------------
>>>>  nptl/tst-mutexpp5.c            |  2 ++
>>>>  nptl/tst-mutexpp9.c            |  2 ++
>>>>  sysdeps/pthread/tst-mutex5.c   | 12 +++++++++++-
>>>>  sysdeps/pthread/tst-mutex9.c   | 13 ++++++++++++-
>>>>  6 files changed, 34 insertions(+), 27 deletions(-)
>>>>  create mode 100644 nptl/tst-mutexpp5.c
>>>>  create mode 100644 nptl/tst-mutexpp9.c
>>>>
>>>> diff --git a/nptl/Makefile b/nptl/Makefile
>>>> index a48426a396..94d805f0d4 100644
>>>> --- a/nptl/Makefile
>>>> +++ b/nptl/Makefile
>>>> @@ -309,7 +309,8 @@ tests-internal := tst-robustpi8 tst-rwlock19 tst-rwlock20 \
>>>>  		  tst-setgetname \
>>>>  
>>>>  xtests = tst-setuid1 tst-setuid1-static tst-setuid2 \
>>>> -	tst-mutexpp1 tst-mutexpp6 tst-mutexpp10 tst-setgroups
>>>> +	tst-mutexpp1 tst-mutexpp6 tst-mutexpp10 tst-setgroups \
>>>> +	tst-mutexpp5 tst-mutexpp9
>>>>  
>>>>  # This test can run into task limits because of a linux kernel bug
>>>>  # and then cause the make process to fail too, see bug 24537.
>>>> diff --git a/nptl/pthread_mutex_timedlock.c b/nptl/pthread_mutex_timedlock.c
>>>> index aaaafa21ce..74adffe790 100644
>>>> --- a/nptl/pthread_mutex_timedlock.c
>>>> +++ b/nptl/pthread_mutex_timedlock.c
>>>> @@ -547,30 +547,11 @@ __pthread_mutex_clocklock_common (pthread_mutex_t *mutex,
>>>>  			goto failpp;
>>>>  		      }
>>>>  
>>>> -		    struct __timespec64 rt;
>>>> -
>>>> -		    /* Get the current time.  */
>>>> -		    __clock_gettime64 (CLOCK_REALTIME, &rt);
>>>> -
>>>> -		    /* Compute relative timeout.  */
>>>> -		    rt.tv_sec = abstime->tv_sec - rt.tv_sec;
>>>> -		    rt.tv_nsec = abstime->tv_nsec - rt.tv_nsec;
>>>> -		    if (rt.tv_nsec < 0)
>>>> -		      {
>>>> -			rt.tv_nsec += 1000000000;
>>>> -			--rt.tv_sec;
>>>> -		      }
>>>> -
>>>> -		    /* Already timed out?  */
>>>> -		    if (rt.tv_sec < 0)
>>>> -		      {
>>>> -			result = ETIMEDOUT;
>>>> -			goto failpp;
>>>> -		      }
>>>> -
>>>> -		    __futex_abstimed_wait64 (
>>>> -		      (unsigned int *) &mutex->__data.__lock, clockid,
>>>> -		      ceilval | 2, &rt, PTHREAD_MUTEX_PSHARED (mutex));
>>>> +		    int e = __futex_abstimed_wait64 (
>>>> +		      (unsigned int *) &mutex->__data.__lock, ceilval | 2,
>>>> +		      clockid, abstime, PTHREAD_MUTEX_PSHARED (mutex));
>>>> +		    if (e == ETIMEDOUT)
>>>> +		      return ETIMEDOUT;
>>>
>>> I'm worried that futex could return other errors here which would cause a
>>> busy infinite loop. However, my attempts to provoke EINVAL have failed
>>> since the validity of abstime.tv_nsec is checked earlier. Presumably we
>>> should never get this far if EOVERFLOW could be returned? (If there is a
>>> problem here, then it also affects other mutex types which have similar
>>> code.)
>>
>> Revising the calls of futex-internal.h which might pass an timeout where
>> EOVERFLOW might happen, I see nptl code requires some fixes.
>>
>> At nptl/pthread_mutex_timedlock.c for robust mutexes:
>>
>> 138     case PTHREAD_MUTEX_ROBUST_RECURSIVE_NP:
>> 139     case PTHREAD_MUTEX_ROBUST_ERRORCHECK_NP:
>> 140     case PTHREAD_MUTEX_ROBUST_NORMAL_NP:
>> 141     case PTHREAD_MUTEX_ROBUST_ADAPTIVE_NP:
>> [...]
>> 235           /* We are about to block; check whether the timeout is invalid.  */
>> 236           if (! valid_nanoseconds (abstime->tv_nsec))
>> 237             return EINVAL;
>> 238           /* Work around the fact that the kernel rejects negative timeout
>> 239              values despite them being valid.  */
>> 240           if (__glibc_unlikely (abstime->tv_sec < 0))
>> 241             return ETIMEDOUT;
>>
>> The FUTEX_WAITERS will trigger a futex wake on pthread_mutex_unlock, so 
>> this check is an optimization to avoid it.  It could be removed since
>> __futex_abstimed_wait64 does the same check (it might incur in a spurious
>> futex call).
>>
>> [...]
>> 268           int err = __futex_abstimed_wait64 (
>> 269               (unsigned int *) &mutex->__data.__lock,
>> 270               oldval, clockid, abstime,
>> 271               PTHREAD_ROBUST_MUTEX_PSHARED (mutex));
>> 272           /* The futex call timed out.  */
>> 273           if (err == ETIMEDOUT)
>> 274             return err;
>>
>> We need to handle EOVERFLOW since without a failure it is assumed the robust
>> mutex was locked.
>>
>>
>> The priority seems to be handle as expected:
>>
>> 307     case PTHREAD_MUTEX_PI_RECURSIVE_NP:
>> 308     case PTHREAD_MUTEX_PI_ERRORCHECK_NP:
>> 309     case PTHREAD_MUTEX_PI_NORMAL_NP:
>> 310     case PTHREAD_MUTEX_PI_ADAPTIVE_NP:
>> 311     case PTHREAD_MUTEX_PI_ROBUST_RECURSIVE_NP:
>> 312     case PTHREAD_MUTEX_PI_ROBUST_ERRORCHECK_NP:
>> 313     case PTHREAD_MUTEX_PI_ROBUST_NORMAL_NP:
>> 314     case PTHREAD_MUTEX_PI_ROBUST_ADAPTIVE_NP:
>> [...]
>> 380             /* The mutex is locked.  The kernel will now take care of
>> 381                everything.  The timeout value must be a relative value.
>> 382                Convert it.  */
>> 383             int private = (robust
>> 384                            ? PTHREAD_ROBUST_MUTEX_PSHARED (mutex)
>> 385                            : PTHREAD_MUTEX_PSHARED (mutex));
>> 386             int e = futex_lock_pi64 (&mutex->__data.__lock, abstime, private);
>> 387             if (e == ETIMEDOUT)
>> 388               return ETIMEDOUT;
>> 389             else if (e == ESRCH || e == EDEADLK)
>> 390               {
>> 391                 assert (e != EDEADLK
>> 392                         || (kind != PTHREAD_MUTEX_ERRORCHECK_NP
>> 393                            && kind != PTHREAD_MUTEX_RECURSIVE_NP));
>> 394                 /* ESRCH can happen only for non-robust PI mutexes where
>> 395                    the owner of the lock died.  */
>> 396                 assert (e != ESRCH || !robust);
>> 397 
>> 398                 /* Delay the thread until the timeout is reached. Then return
>> 399                    ETIMEDOUT.  */
>> 400                 do
>> 401                   e = __futex_abstimed_wait64 (&(unsigned int){0}, 0, clockid,
>> 402                                                abstime, private);
>> 403                 while (e != ETIMEDOUT);
>> 404                 return ETIMEDOUT;
>> 405               }
>> 406             else if (e != 0)
>> 407               return e;
>>
>> The EOVERFLOW for futex_lock_pi64 will be handled on the last 'e != 0',
>> and the internal '__futex_abstimed_wait64' should use a valid abstime
>> (since futex_lock_pi64 would fail early).
>>
>> As priority protected ones:
>>
>> 469     case PTHREAD_MUTEX_PP_RECURSIVE_NP:
>> 470     case PTHREAD_MUTEX_PP_ERRORCHECK_NP:
>> 471     case PTHREAD_MUTEX_PP_NORMAL_NP:
>> 472     case PTHREAD_MUTEX_PP_ADAPTIVE_NP:
>> [...]
>> 550                     int e = __futex_abstimed_wait64 (
>> 551                       (unsigned int *) &mutex->__data.__lock, ceilval | 2,
>> 552                       clockid, abstime, PTHREAD_MUTEX_PSHARED (mutex));
>> 553                     if (e == ETIMEDOUT)
>> 554                       return ETIMEDOUT;
>>
>> As you have noted we do need to handle EOVERFLOW here, otherwise it will
>> trigger busy infinite loop.  It does not happen now because the timers
>> passed to __pthread_mutex_clocklock_common by 32-bit archs with 32-bit
>> support will always fix a 32-bti timespec; but they might happen once
>> we support the 64-bit time support on kernel older than v5.1.
>>
>>
>> On pthread rw/rdlock (nptl/pthread_rwlock_common.c) there are some some issues:
>>
>> 328               while (((r = atomic_load_relaxed (&rwlock->__data.__readers))
>> 329                       & PTHREAD_RWLOCK_RWAITING) != 0)
>> 330                 {
>> 331                   int private = __pthread_rwlock_get_private (rwlock);
>> 332                   int err = __futex_abstimed_wait64 (&rwlock->__data.__readers,
>> 333                                                      r, clockid, abstime,
>> 334                                                      private);
>> 335                   /* We ignore EAGAIN and EINTR.  On time-outs, we can just
>> 336                      return because we don't need to clean up anything.  */
>> 337                   if (err == ETIMEDOUT)
>> 338                     return err;
>> 339                 }
>>
>> This also requires handle EOVERFLOW.
>>
>> 460           int err = __futex_abstimed_wait64 (&rwlock->__data.__wrphase_futex,
>> 461                                              1 | PTHREAD_RWLOCK_FUTEX_USED,
>> 462                                              clockid, abstime, private);
>> 463           if (err == ETIMEDOUT)
>> 464             {
>> 465               /* If we timed out, we need to unregister.  If no read phase
>> 466                  has been installed while we waited, we can just decrement
>> 467                  the number of readers.  Otherwise, we just acquire the
>> 468                  lock, which is allowed because we give no precise timing
>> 469                  guarantees, and because the timeout is only required to
>> 470                  be in effect if we would have had to wait for other
>> 471                  threads (e.g., if futex_wait would time-out immediately
>> 472                  because the given absolute time is in the past).  */
>>
>> Same here, I think EOVERFLOW should be handled as ETIMEDOUT in this case.
>>
>> 730           int err = __futex_abstimed_wait64 (&rwlock->__data.__writers_futex,
>> 731                                              1 | PTHREAD_RWLOCK_FUTEX_USED,
>> 732                                              clockid, abstime, private);
>> 733           if (err == ETIMEDOUT)
>> 734             {
>> 735               if (prefer_writer)
>> 736                 {
>> 737                   /* We need to unregister as a waiting writer.  If we are the
>> 738                      last writer and writer--writer hand-over is available,
>> 739                      we must make use of it because nobody else will reset
>> 740                      WRLOCKED otherwise.  (If we use it, we simply pretend
>> 741                      that this happened before the timeout; see
>> 742                      pthread_rwlock_rdlock_full for the full reasoning.)
>> 743                      Also see the similar code above.  */
>>
>> And
>>
>> 829           int err = __futex_abstimed_wait64 (&rwlock->__data.__wrphase_futex,
>> 830                                              PTHREAD_RWLOCK_FUTEX_USED,
>> 831                                              clockid, abstime, private);
>> 832           if (err == ETIMEDOUT)
>> 833             {
>> 834               if (rwlock->__data.__flags != PTHREAD_RWLOCK_PREFER_READER_NP)
>> 835                 {
>> 836                   /* We try writer--writer hand-over.  */
>>
>> Also need to handle EOVERFLOW as ETIMEDOUT as also return ETIMEOUT instead of
>> EOVERFLOW.
>>
>> I plan to address these on a subsequent patch.
>>
> 
> Revising the __futex_clocklock64 usage, they also seems to already handle
> EOVERFLOW:
> 
>  32 #ifndef lll_clocklock_elision
>  33 #define lll_clocklock_elision(futex, adapt_count, clockid, abstime, private) \
>  34   __futex_clocklock64 (&(futex), clockid, abstime, private)
>  35 #endif
> 
>  78       /* We have to get the mutex.  */
>  79       result = __futex_clocklock64 (&mutex->__data.__lock, clockid, abstime,
>  80                                     PTHREAD_MUTEX_PSHARED (mutex));
>  81 
>  82       if (result != 0)
>  83         goto out;
> 
> This already handles EOVERFLOW
> 
> 
> 100     simple:
> 101       /* Normal mutex.  */
> 102       result = __futex_clocklock64 (&mutex->__data.__lock, clockid, abstime,
> 103                                     PTHREAD_MUTEX_PSHARED (mutex));
> 104       break;
> 
> This also handled EOVERFLOW (it will bail out to line 583).
> 
> 
> 106     case PTHREAD_MUTEX_TIMED_ELISION_NP:
> 107     elision: __attribute__((unused))
> 108       /* Don't record ownership */
> 109       return lll_clocklock_elision (mutex->__data.__lock,
> 110                                     mutex->__data.__spins,
> 111                                     clockid, abstime,
> 112                                     PTHREAD_MUTEX_PSHARED (mutex));
> 
> All the target overriden lll_clocklock_elision call target provided
> __lll_lock_elision and this return __futex_clocklock64, so it already handles
> EOVERFLOW.
> 
> 
> 115     case PTHREAD_MUTEX_ADAPTIVE_NP:
> 116       if (lll_trylock (mutex->__data.__lock) != 0)
> 117         {
> 118           int cnt = 0;
> 119           int max_cnt = MIN (max_adaptive_count (),
> 120                              mutex->__data.__spins * 2 + 10);
> 121           do
> 122             {
> 123               if (cnt++ >= max_cnt)
> 124                 {
> 125                   result = __futex_clocklock64 (&mutex->__data.__lock,
> 126                                                 clockid, abstime,
> 127                                                 PTHREAD_MUTEX_PSHARED (mutex));
> 128                   break;
> 129                 }
> 130               atomic_spin_nop ();
> 131             }
> 132           while (lll_trylock (mutex->__data.__lock) != 0);
> 133 
> 134           mutex->__data.__spins += (cnt - mutex->__data.__spins) / 8;
> 135         }
> 
> This should be safe as well (it will break once cnt reaches max_cnt). It could
> bail early if result is EOVERFLOW but it would require to adjust the
> __spins as well.
> 

I forgot to analyze the usages of __futex_abstimed_wait_cancelable64 as well.

On nptl/pthread_cond_wait.c:

504           err = __futex_abstimed_wait_cancelable64 (
505             cond->__data.__g_signals + g, 0, clockid, abstime, private);
506 
507           __pthread_cleanup_pop (&buffer, 0);
508 
509           if (__glibc_unlikely (err == ETIMEDOUT))
510             {

This also requires handle EOVERFLOW.

The nptl/pthread_join_common.c is safe:

102           int ret = __futex_abstimed_wait_cancelable64 (
103             (unsigned int *) &pd->tid, tid, clockid, abstime, LLL_SHARED);
104           if (ret == ETIMEDOUT || ret == EOVERFLOW)
105             {
106               result = ret;
107               break;
108             }

The nptl/sem_waitcommon.c also requires fixing:

104 static int
105 __attribute__ ((noinline))
106 do_futex_wait (struct new_sem *sem, clockid_t clockid,
107                const struct __timespec64 *abstime)
108 {
109   int err;
110 
111 #if __HAVE_64B_ATOMICS
112   err = __futex_abstimed_wait_cancelable64 (
113       (unsigned int *) &sem->data + SEM_VALUE_OFFSET, 0,
114       clockid, abstime,
115       sem->private);
116 #else
117   err = __futex_abstimed_wait_cancelable64 (&sem->value, SEM_NWAITERS_MASK,
118                                             clockid, abstime, sem->private);
119 #endif
120 
121   return err;
122 }

79   for (;;)
180     {
181       /* If there is no token available, sleep until there is.  */
182       if ((d & SEM_VALUE_MASK) == 0)
183         {
184           err = do_futex_wait (sem, clockid, abstime);
[...]
194           if (err == ETIMEDOUT || err == EINTR)
195             {
196               __set_errno (err);
197               err = -1;
  

Patch

diff --git a/nptl/Makefile b/nptl/Makefile
index a48426a396..94d805f0d4 100644
--- a/nptl/Makefile
+++ b/nptl/Makefile
@@ -309,7 +309,8 @@  tests-internal := tst-robustpi8 tst-rwlock19 tst-rwlock20 \
 		  tst-setgetname \
 
 xtests = tst-setuid1 tst-setuid1-static tst-setuid2 \
-	tst-mutexpp1 tst-mutexpp6 tst-mutexpp10 tst-setgroups
+	tst-mutexpp1 tst-mutexpp6 tst-mutexpp10 tst-setgroups \
+	tst-mutexpp5 tst-mutexpp9
 
 # This test can run into task limits because of a linux kernel bug
 # and then cause the make process to fail too, see bug 24537.
diff --git a/nptl/pthread_mutex_timedlock.c b/nptl/pthread_mutex_timedlock.c
index aaaafa21ce..74adffe790 100644
--- a/nptl/pthread_mutex_timedlock.c
+++ b/nptl/pthread_mutex_timedlock.c
@@ -547,30 +547,11 @@  __pthread_mutex_clocklock_common (pthread_mutex_t *mutex,
 			goto failpp;
 		      }
 
-		    struct __timespec64 rt;
-
-		    /* Get the current time.  */
-		    __clock_gettime64 (CLOCK_REALTIME, &rt);
-
-		    /* Compute relative timeout.  */
-		    rt.tv_sec = abstime->tv_sec - rt.tv_sec;
-		    rt.tv_nsec = abstime->tv_nsec - rt.tv_nsec;
-		    if (rt.tv_nsec < 0)
-		      {
-			rt.tv_nsec += 1000000000;
-			--rt.tv_sec;
-		      }
-
-		    /* Already timed out?  */
-		    if (rt.tv_sec < 0)
-		      {
-			result = ETIMEDOUT;
-			goto failpp;
-		      }
-
-		    __futex_abstimed_wait64 (
-		      (unsigned int *) &mutex->__data.__lock, clockid,
-		      ceilval | 2, &rt, PTHREAD_MUTEX_PSHARED (mutex));
+		    int e = __futex_abstimed_wait64 (
+		      (unsigned int *) &mutex->__data.__lock, ceilval | 2,
+		      clockid, abstime, PTHREAD_MUTEX_PSHARED (mutex));
+		    if (e == ETIMEDOUT)
+		      return ETIMEDOUT;
 		  }
 	      }
 	    while (atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
diff --git a/nptl/tst-mutexpp5.c b/nptl/tst-mutexpp5.c
new file mode 100644
index 0000000000..a864a390ca
--- /dev/null
+++ b/nptl/tst-mutexpp5.c
@@ -0,0 +1,2 @@ 
+#define ENABLE_PP 1
+#include "tst-mutex5.c"
diff --git a/nptl/tst-mutexpp9.c b/nptl/tst-mutexpp9.c
new file mode 100644
index 0000000000..c848c74c7e
--- /dev/null
+++ b/nptl/tst-mutexpp9.c
@@ -0,0 +1,2 @@ 
+#define ENABLE_PP 1
+#include "tst-mutex9.c"
diff --git a/sysdeps/pthread/tst-mutex5.c b/sysdeps/pthread/tst-mutex5.c
index bfe1a79fa4..dbd2c3c15f 100644
--- a/sysdeps/pthread/tst-mutex5.c
+++ b/sysdeps/pthread/tst-mutex5.c
@@ -27,6 +27,9 @@ 
 #include <support/check.h>
 #include <support/timespec.h>
 
+#ifdef ENABLE_PP
+#include "tst-tpp.h"
+#endif
 
 #ifndef TYPE
 # define TYPE PTHREAD_MUTEX_NORMAL
@@ -47,8 +50,11 @@  do_test_clock (clockid_t clockid, const char *fnname)
   TEST_COMPARE (pthread_mutexattr_init (&a), 0);
   TEST_COMPARE (pthread_mutexattr_settype (&a, TYPE), 0);
 
-#ifdef ENABLE_PI
+#if defined ENABLE_PI
   TEST_COMPARE (pthread_mutexattr_setprotocol (&a, PTHREAD_PRIO_INHERIT), 0);
+#elif defined ENABLE_PP
+  TEST_COMPARE (pthread_mutexattr_setprotocol (&a, PTHREAD_PRIO_PROTECT), 0);
+  TEST_COMPARE (pthread_mutexattr_setprioceiling (&a, 6), 0);
 #endif
 
   int err = pthread_mutex_init (&m, &a);
@@ -110,6 +116,10 @@  do_test_clock (clockid_t clockid, const char *fnname)
 
 static int do_test (void)
 {
+#ifdef ENABLE_PP
+  init_tpp_test ();
+#endif
+
   do_test_clock (CLOCK_USE_TIMEDLOCK, "timedlock");
   do_test_clock (CLOCK_REALTIME, "clocklock(realtime)");
 #ifndef ENABLE_PI
diff --git a/sysdeps/pthread/tst-mutex9.c b/sysdeps/pthread/tst-mutex9.c
index bfc01f8c75..081aeff0f6 100644
--- a/sysdeps/pthread/tst-mutex9.c
+++ b/sysdeps/pthread/tst-mutex9.c
@@ -30,6 +30,10 @@ 
 #include <support/timespec.h>
 #include <support/xunistd.h>
 
+#ifdef ENABLE_PP
+#include "tst-tpp.h"
+#endif
+
 
 /* A bogus clock value that tells run_test to use pthread_mutex_timedlock
    rather than pthread_mutex_clocklock.  */
@@ -73,8 +77,11 @@  do_test_clock (clockid_t clockid)
 
   TEST_COMPARE (pthread_mutexattr_settype (&a, PTHREAD_MUTEX_RECURSIVE), 0);
 
-#ifdef ENABLE_PI
+#if defined ENABLE_PI
   TEST_COMPARE (pthread_mutexattr_setprotocol (&a, PTHREAD_PRIO_INHERIT), 0);
+#elif defined ENABLE_PP
+  TEST_COMPARE (pthread_mutexattr_setprotocol (&a, PTHREAD_PRIO_PROTECT), 0);
+  TEST_COMPARE (pthread_mutexattr_setprioceiling (&a, 6), 0);
 #endif
 
   int e;
@@ -131,6 +138,10 @@  do_test_clock (clockid_t clockid)
 static int
 do_test (void)
 {
+#ifdef ENABLE_PP
+  init_tpp_test ();
+#endif
+
   do_test_clock (CLOCK_USE_TIMEDLOCK);
   do_test_clock (CLOCK_REALTIME);
 #ifndef ENABLE_PI