nptl: Fix __futex_clocklock64 return error check [BZ #26964]

Message ID 20201127143355.1844509-1-lamm@linux.ibm.com
State Superseded
Headers
Series nptl: Fix __futex_clocklock64 return error check [BZ #26964] |

Commit Message

Lucas A. M. Magalhaes Nov. 27, 2020, 2:33 p.m. UTC
  The earlier implementation of this, __lll_clocklock, calls lll_clockwait
that doesn't return the futex syscall error codes.  It always tries again
if that fails.

However in the current implementation, when the futex returns EAGAIN,
__futex_clocklock64 will also return EGAIN, even if the futex is taken.

This patch fixes the EAGAIN issue and also adds a check for EINTR.  As
futex syscall can return EINTR if the thread is interrupted by a signal.
In this case I'm assuming the function should continue trying to lock as
there is no mention to about it on POSIX.  Also add a test for both
scenarios.
---
 nptl/Makefile                         |   2 +-
 nptl/tst-pthread-timedlock-lockloop.c | 136 ++++++++++++++++++++++++++
 sysdeps/nptl/futex-internal.h         |   9 ++
 3 files changed, 146 insertions(+), 1 deletion(-)
 create mode 100644 nptl/tst-pthread-timedlock-lockloop.c
  

Comments

Adhemerval Zanella Netto Nov. 27, 2020, 5:27 p.m. UTC | #1
On 27/11/2020 11:33, Lucas A. M. Magalhaes via Libc-alpha wrote:
> The earlier implementation of this, __lll_clocklock, calls lll_clockwait
> that doesn't return the futex syscall error codes.  It always tries again
> if that fails.
> 
> However in the current implementation, when the futex returns EAGAIN,
> __futex_clocklock64 will also return EGAIN, even if the futex is taken.
> 
> This patch fixes the EAGAIN issue and also adds a check for EINTR.  As
> futex syscall can return EINTR if the thread is interrupted by a signal.
> In this case I'm assuming the function should continue trying to lock as
> there is no mention to about it on POSIX.  Also add a test for both
> scenarios.

Thanks for working on this, I should have caught this on the 
"y2038: nptl: Convert pthread_mutex_{clock|timed}lock to support 64 bit"
review.

Some comments below.

> ---
>  nptl/Makefile                         |   2 +-
>  nptl/tst-pthread-timedlock-lockloop.c | 136 ++++++++++++++++++++++++++
>  sysdeps/nptl/futex-internal.h         |   9 ++
>  3 files changed, 146 insertions(+), 1 deletion(-)
>  create mode 100644 nptl/tst-pthread-timedlock-lockloop.c
> 
> diff --git a/nptl/Makefile b/nptl/Makefile
> index a48426a396..91324e09f2 100644
> --- a/nptl/Makefile
> +++ b/nptl/Makefile
> @@ -298,7 +298,7 @@ tests = tst-attr2 tst-attr3 tst-default-attr \
>  	tst-thread-affinity-sched \
>  	tst-pthread-defaultattr-free \
>  	tst-pthread-attr-sigmask \
> -
> +	tst-pthread-timedlock-lockloop \
>  
>  tests-container =  tst-pthread-getattr
>  

Ok, although the final '\' is not required.

> diff --git a/nptl/tst-pthread-timedlock-lockloop.c b/nptl/tst-pthread-timedlock-lockloop.c
> new file mode 100644
> index 0000000000..abcc5724cf
> --- /dev/null
> +++ b/nptl/tst-pthread-timedlock-lockloop.c
> @@ -0,0 +1,136 @@
> +/* Make sure pthrea_mutex_timedlock doesn't return spurious error codes.
> +
> +   Copyright (C) 2020 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +#include <errno.h>
> +#include <pthread.h>
> +#include <signal.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <time.h>
> +
> +#define NANO_PER_SEC 1000000000LL
> +#define TIMEOUT (NANO_PER_SEC / 1000LL)
> +#define NUM_THREADS 50
> +#define RETEST_TIMES 100
> +
> +struct worker_args
> +{
> +  pthread_mutex_t mtx;
> +  int runs;
> +};
> +
> +void
> +signal_handler (int sig_num)
> +{
> +  if (sig_num != SIGUSR1)
> +    printf ("Unexpected signal");
> +}

Use TEST_COMPARE, but I would just remove the check since printf is not really
async-signal-safe.

> +
> +/* Call pthread_mutex_timedlock()/pthread_mutex_unlock() repetitively, hoping
> +   that one of them returns EAGAIN or EINTR unexpectedly.  */
> +static void *
> +worker (void *arg)
> +{
> +  pthread_mutex_t *mtx = &(((struct worker_args *) arg)->mtx);

Move both the mutex and the iteration number to a global variable, it 
would simplify the code a lot.

> +  struct timespec abs_time;
> +  signal (SIGUSR1, signal_handler);

This is done concurrent signal handler set, which I think is not what you meant.
Move is to do_test function prior thread creation.

> +
> +  for (unsigned run = 0; run < ((struct worker_args *) arg)->runs; run++)
> +    {
> +      clock_gettime (CLOCK_REALTIME, &abs_time);
> +      abs_time.tv_nsec += TIMEOUT;
> +      if (abs_time.tv_nsec >= NANO_PER_SEC)
> +	{
> +	  abs_time.tv_sec++;
> +	  abs_time.tv_nsec -= NANO_PER_SEC;
> +	}

Use:

  struct timespec abs_time = timespec_add (xclock_now (CLOCK_REALTIME),
                                           make_timespec (0, 1000000));


> +
> +      int ret = pthread_mutex_timedlock (mtx, &abs_time);

Check also pthread_mutex_clocklock against CLOCK_REALTIME and CLOCK_MONOTONIC
(maybe by executing the same batch check with different worker threads).

> +
> +      if (ret == 0)
> +	pthread_mutex_unlock (mtx);

Use xpthread_mutex_unlock

> +
> +      if (ret == EAGAIN || ret == EINTR)
> +	{
> +	  printf ("Unexpected return %d\n", ret);
> +	  return (void *) 1;
> +	}
> +    }

Use

  TEST_VERIFY_EXIT (ret != EAGAIN && ret != EINTR);

Or just: 
 
  TEST_VERIFY_EXIT (ret == 0 || ret == ETIMEDOUT)

This will bail early in the case of failure and simplify the error
handling.

> +  return NULL;
> +}
> +
> +static int
> +do_test (void)
> +{
> +  pthread_t *workers =
> +    (pthread_t *) malloc (NUM_THREADS * sizeof (pthread_t));

Use xmalloc, but since NUM_THREADS is quite small and pthread_t is an
a word size on all supported ABI just allocate on the stack.

> +
> +  struct worker_args args;
> +  pthread_mutex_init (&(args.mtx), NULL);

Move it to a global variable, it would simplify the code a lot.

> +
> +  int *thread_ret = NULL, ret = 0;
> +
> +  /* Run the checks to catch an EAGAIN.  */
> +  /* As there is no way to ensure the error condition, just run the test many
> +     times hoping to catch the error.  */
> +  args.runs = 100;
> +  for (int run = 0; run < RETEST_TIMES; run++)
> +    {
> +      for (int i = 0; i < NUM_THREADS; i++)
> +	{
> +	  pthread_create (&workers[i], NULL, worker, (void *) &args);

Use xpthread_create and remove the brackets.

> +	}
> +      for (int i = 0; i < NUM_THREADS; i++)
> +	{
> +	  pthread_join (workers[i], (void **) &thread_ret);

Use xpthread_join.

> +	  if (thread_ret != NULL)
> +	    ++ret;
> +	}
> +      if (ret != 0)
> +	goto err;

There is no need to this check if you just bail out on a failure
in the worker thread.

> +    }
> +
> +  /* Run the test to check if we catch an EINTR.  */
> +  /* As there is no way to ensure the error condition, just run the test many
> +     times hoping to catch the error.  */
> +  pthread_t thread;
> +  args.runs = 1;
> +  for (int i = 0; i < RETEST_TIMES * 1000; i++)
> +    {
> +      if (pthread_mutex_lock (&(args.mtx)) != 0)
> +	{
> +	  printf ("Mutex lock failed\n");
> +	  goto err;
> +	}
> +      pthread_create (&thread, NULL, worker, (void *) &args);
> +      pthread_kill (thread, SIGUSR1);

This is most like won't interrupt the pthread mutex lock depending of the
scheduling and the underlying hardware, but I don't have a better alternative
than adding a small timeout before callling pthread_kill (which it is itself
flaky as well).


> +      pthread_mutex_unlock (&(args.mtx));
> +      pthread_join (thread, (void **) &thread_ret);

The the pthread function from support/xthread.h.

> +      if (thread_ret != NULL) 
> +	goto err;
> +    }
> +
> +  free (workers);
> +  return 0;
> +
> +err:
> +  free (workers);
> +  return 1;
> +}
> +
> +#include <support/test-driver.c>
> diff --git a/sysdeps/nptl/futex-internal.h b/sysdeps/nptl/futex-internal.h
> index e67803952f..f16d26d994 100644
> --- a/sysdeps/nptl/futex-internal.h
> +++ b/sysdeps/nptl/futex-internal.h
> @@ -424,10 +424,19 @@ __futex_clocklock64 (int *futex, clockid_t clockid,
>      {
>        while (atomic_exchange_acq (futex, 2) != 0)
>          {
> +	  /* At this point we tried to get the futex but failed and set its
> +	     value to 2.  However the futex value can be changed by other
> +	     thread before this calls the futex syscall.  If so the syscall
> +	     will return EAGAIN.  */
>            err = __futex_abstimed_wait64 ((unsigned int *) futex, 2, clockid,
>  					 abstime, private);
>            if (err == EINVAL || err == ETIMEDOUT || err == EOVERFLOW)
>              break;
> +	  /* If EAGAIN or EINTR is returned here the error code should be reset
> +	     as we will try again to acquire the futex and it may success.
> +	     Otherwise the mutex will be locked and the return will not be 0.  */
> +	  if (err == EAGAIN || err == EINTR)
> +	    err = 0;
>          }
>      }
>    return err;
> 

Indeed, it should be similar to what it is done on nptl/pthread_join_common.c.
Maybe something more simple as below, it generates less code:

  static __always_inline int
  __futex_clocklock64 (int *futex, clockid_t clockid,
                       const struct __timespec64 *abstime, int private)
  { 
    if (__glibc_unlikely (atomic_compare_and_exchange_bool_acq (futex, 1, 0)))
      { 
        while (atomic_exchange_acq (futex, 2) != 0)
          { 
            int err = __futex_abstimed_wait64 (
              (unsigned int *) futex, 2, clockid, abstime, private);
            if (err == EINVAL || err == ETIMEDOUT || err == EOVERFLOW)
              return err;
          }
      }
    return 0;
  }

(I am not sure if EINVAL will even be returned in current 
__futex_clocklock64, but it should be ok to test it).
  

Patch

diff --git a/nptl/Makefile b/nptl/Makefile
index a48426a396..91324e09f2 100644
--- a/nptl/Makefile
+++ b/nptl/Makefile
@@ -298,7 +298,7 @@  tests = tst-attr2 tst-attr3 tst-default-attr \
 	tst-thread-affinity-sched \
 	tst-pthread-defaultattr-free \
 	tst-pthread-attr-sigmask \
-
+	tst-pthread-timedlock-lockloop \
 
 tests-container =  tst-pthread-getattr
 
diff --git a/nptl/tst-pthread-timedlock-lockloop.c b/nptl/tst-pthread-timedlock-lockloop.c
new file mode 100644
index 0000000000..abcc5724cf
--- /dev/null
+++ b/nptl/tst-pthread-timedlock-lockloop.c
@@ -0,0 +1,136 @@ 
+/* Make sure pthrea_mutex_timedlock doesn't return spurious error codes.
+
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+#include <errno.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+
+#define NANO_PER_SEC 1000000000LL
+#define TIMEOUT (NANO_PER_SEC / 1000LL)
+#define NUM_THREADS 50
+#define RETEST_TIMES 100
+
+struct worker_args
+{
+  pthread_mutex_t mtx;
+  int runs;
+};
+
+void
+signal_handler (int sig_num)
+{
+  if (sig_num != SIGUSR1)
+    printf ("Unexpected signal");
+}
+
+/* Call pthread_mutex_timedlock()/pthread_mutex_unlock() repetitively, hoping
+   that one of them returns EAGAIN or EINTR unexpectedly.  */
+static void *
+worker (void *arg)
+{
+  pthread_mutex_t *mtx = &(((struct worker_args *) arg)->mtx);
+  struct timespec abs_time;
+  signal (SIGUSR1, signal_handler);
+
+  for (unsigned run = 0; run < ((struct worker_args *) arg)->runs; run++)
+    {
+      clock_gettime (CLOCK_REALTIME, &abs_time);
+      abs_time.tv_nsec += TIMEOUT;
+      if (abs_time.tv_nsec >= NANO_PER_SEC)
+	{
+	  abs_time.tv_sec++;
+	  abs_time.tv_nsec -= NANO_PER_SEC;
+	}
+
+      int ret = pthread_mutex_timedlock (mtx, &abs_time);
+
+      if (ret == 0)
+	pthread_mutex_unlock (mtx);
+
+      if (ret == EAGAIN || ret == EINTR)
+	{
+	  printf ("Unexpected return %d\n", ret);
+	  return (void *) 1;
+	}
+    }
+  return NULL;
+}
+
+static int
+do_test (void)
+{
+  pthread_t *workers =
+    (pthread_t *) malloc (NUM_THREADS * sizeof (pthread_t));
+
+  struct worker_args args;
+  pthread_mutex_init (&(args.mtx), NULL);
+
+  int *thread_ret = NULL, ret = 0;
+
+  /* Run the checks to catch an EAGAIN.  */
+  /* As there is no way to ensure the error condition, just run the test many
+     times hoping to catch the error.  */
+  args.runs = 100;
+  for (int run = 0; run < RETEST_TIMES; run++)
+    {
+      for (int i = 0; i < NUM_THREADS; i++)
+	{
+	  pthread_create (&workers[i], NULL, worker, (void *) &args);
+	}
+      for (int i = 0; i < NUM_THREADS; i++)
+	{
+	  pthread_join (workers[i], (void **) &thread_ret);
+	  if (thread_ret != NULL)
+	    ++ret;
+	}
+      if (ret != 0)
+	goto err;
+    }
+
+  /* Run the test to check if we catch an EINTR.  */
+  /* As there is no way to ensure the error condition, just run the test many
+     times hoping to catch the error.  */
+  pthread_t thread;
+  args.runs = 1;
+  for (int i = 0; i < RETEST_TIMES * 1000; i++)
+    {
+      if (pthread_mutex_lock (&(args.mtx)) != 0)
+	{
+	  printf ("Mutex lock failed\n");
+	  goto err;
+	}
+      pthread_create (&thread, NULL, worker, (void *) &args);
+      pthread_kill (thread, SIGUSR1);
+      pthread_mutex_unlock (&(args.mtx));
+      pthread_join (thread, (void **) &thread_ret);
+      if (thread_ret != NULL)
+	goto err;
+    }
+
+  free (workers);
+  return 0;
+
+err:
+  free (workers);
+  return 1;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/nptl/futex-internal.h b/sysdeps/nptl/futex-internal.h
index e67803952f..f16d26d994 100644
--- a/sysdeps/nptl/futex-internal.h
+++ b/sysdeps/nptl/futex-internal.h
@@ -424,10 +424,19 @@  __futex_clocklock64 (int *futex, clockid_t clockid,
     {
       while (atomic_exchange_acq (futex, 2) != 0)
         {
+	  /* At this point we tried to get the futex but failed and set its
+	     value to 2.  However the futex value can be changed by other
+	     thread before this calls the futex syscall.  If so the syscall
+	     will return EAGAIN.  */
           err = __futex_abstimed_wait64 ((unsigned int *) futex, 2, clockid,
 					 abstime, private);
           if (err == EINVAL || err == ETIMEDOUT || err == EOVERFLOW)
             break;
+	  /* If EAGAIN or EINTR is returned here the error code should be reset
+	     as we will try again to acquire the futex and it may success.
+	     Otherwise the mutex will be locked and the return will not be 0.  */
+	  if (err == EAGAIN || err == EINTR)
+	    err = 0;
         }
     }
   return err;