[v2,3/4] Move libio lock single-thread optimization to generic libc-lock (BZ #27842)

Message ID 20220426191523.833171-4-adhemerval.zanella@linaro.org
State Superseded
Headers
Series Move libio lock single-thread optimization to generic libc-lock |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Adhemerval Zanella Netto April 26, 2022, 7:15 p.m. UTC
  This patch moves the single thread stdio optimization (d2e04918833d9)
to the libc-lock.  With generic support there is no need to add
per-function code to handle the single-thread case, and it allows to
removes the _IO_enable_locks (since once process goes multithread the
locks will be always used).

It also handles the memory streams requirement (de895ddcd7fc), since
the SINGLE_THREAD_P already contains te information whether the
process is multithread (so there is no need to disable the optimization
because such stream are listed in _IO_list_all).

Finally it also removed the flockfile uses a read-modify-write operation
on _flags2 outside a lock region (BZ #27842).

Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu.
---
 libio/Versions           |  3 ---
 libio/feof.c             |  2 --
 libio/ferror.c           |  2 --
 libio/fputc.c            |  2 --
 libio/genops.c           | 28 ----------------------------
 libio/getc.c             |  2 --
 libio/getchar.c          |  4 +---
 libio/iofopncook.c       |  2 --
 libio/ioungetc.c         |  2 --
 libio/libio.h            |  4 ----
 libio/memstream.c        |  3 ---
 libio/putc.c             |  2 --
 libio/wmemstream.c       |  3 ---
 nptl/pthread_create.c    |  3 ---
 stdio-common/flockfile.c |  1 -
 sysdeps/mach/libc-lock.h |  9 +++++----
 sysdeps/nptl/libc-lock.h |  7 ++++---
 17 files changed, 10 insertions(+), 69 deletions(-)
  

Comments

Florian Weimer April 27, 2022, 1:30 p.m. UTC | #1
* Adhemerval Zanella via Libc-alpha:

> diff --git a/sysdeps/nptl/libc-lock.h b/sysdeps/nptl/libc-lock.h
> index 6c2d6acfd1..abd84e71b4 100644
> --- a/sysdeps/nptl/libc-lock.h
> +++ b/sysdeps/nptl/libc-lock.h
> @@ -86,7 +86,7 @@ typedef struct { int lock; int cnt; void *owner; } __libc_lock_recursive_t;
>  # define __libc_lock_lock_recursive(NAME) \
>    do {									      \
>      void *self = THREAD_SELF;						      \
> -    if ((NAME).owner != self)						      \
> +    if (!SINGLE_THREAD_P && (NAME).owner != self)						      \
>        {									      \
>  	lll_lock ((NAME).lock, LLL_PRIVATE);				      \
>  	(NAME).owner = self;						      \
> @@ -104,7 +104,7 @@ typedef struct { int lock; int cnt; void *owner; } __libc_lock_recursive_t;
>    ({									      \
>      int result = 0;							      \
>      void *self = THREAD_SELF;						      \
> -    if ((NAME).owner != self)						      \
> +    if (!SINGLE_THREAD_P && (NAME).owner != self)						      \
>        {									      \
>  	if (lll_trylock ((NAME).lock) == 0)				      \
>  	  {								      \
> @@ -131,7 +131,8 @@ typedef struct { int lock; int cnt; void *owner; } __libc_lock_recursive_t;
>      if (--(NAME).cnt == 0)						      \
>        {									      \
>  	(NAME).owner = NULL;						      \
> -	lll_unlock ((NAME).lock, LLL_PRIVATE);				      \
> +        if (!SINGLE_THREAD_P)                                                 \
> +	  lll_unlock ((NAME).lock, LLL_PRIVATE);			      \
>        }									      \
>    } while (0)
>  #else

I don't think this is correct if threads are created in the lock region.

Thanks,
Florian
  
Adhemerval Zanella Netto April 27, 2022, 4:32 p.m. UTC | #2
On 27/04/2022 10:30, Florian Weimer wrote:
> * Adhemerval Zanella via Libc-alpha:
> 
>> diff --git a/sysdeps/nptl/libc-lock.h b/sysdeps/nptl/libc-lock.h
>> index 6c2d6acfd1..abd84e71b4 100644
>> --- a/sysdeps/nptl/libc-lock.h
>> +++ b/sysdeps/nptl/libc-lock.h
>> @@ -86,7 +86,7 @@ typedef struct { int lock; int cnt; void *owner; } __libc_lock_recursive_t;
>>  # define __libc_lock_lock_recursive(NAME) \
>>    do {									      \
>>      void *self = THREAD_SELF;						      \
>> -    if ((NAME).owner != self)						      \
>> +    if (!SINGLE_THREAD_P && (NAME).owner != self)						      \
>>        {									      \
>>  	lll_lock ((NAME).lock, LLL_PRIVATE);				      \
>>  	(NAME).owner = self;						      \
>> @@ -104,7 +104,7 @@ typedef struct { int lock; int cnt; void *owner; } __libc_lock_recursive_t;
>>    ({									      \
>>      int result = 0;							      \
>>      void *self = THREAD_SELF;						      \
>> -    if ((NAME).owner != self)						      \
>> +    if (!SINGLE_THREAD_P && (NAME).owner != self)						      \
>>        {									      \
>>  	if (lll_trylock ((NAME).lock) == 0)				      \
>>  	  {								      \
>> @@ -131,7 +131,8 @@ typedef struct { int lock; int cnt; void *owner; } __libc_lock_recursive_t;
>>      if (--(NAME).cnt == 0)						      \
>>        {									      \
>>  	(NAME).owner = NULL;						      \
>> -	lll_unlock ((NAME).lock, LLL_PRIVATE);				      \
>> +        if (!SINGLE_THREAD_P)                                                 \
>> +	  lll_unlock ((NAME).lock, LLL_PRIVATE);			      \
>>        }									      \
>>    } while (0)
>>  #else
> 
> I don't think this is correct if threads are created in the lock region.

I was not sure about this one and I think we the main issue in fact there is
we can't use the single-thread optimization on unlock.  Maybe a better option
would to use a different scheme as proposed by 
https://hal.inria.fr/hal-01236734/document, where we can embedded lock and
cnt in only one variable (as the lll_lock already does).
  
Adhemerval Zanella Netto April 28, 2022, 4:39 p.m. UTC | #3
On 27/04/2022 13:32, Adhemerval Zanella wrote:
> 
> 
> On 27/04/2022 10:30, Florian Weimer wrote:
>> * Adhemerval Zanella via Libc-alpha:
>>
>>> diff --git a/sysdeps/nptl/libc-lock.h b/sysdeps/nptl/libc-lock.h
>>> index 6c2d6acfd1..abd84e71b4 100644
>>> --- a/sysdeps/nptl/libc-lock.h
>>> +++ b/sysdeps/nptl/libc-lock.h
>>> @@ -86,7 +86,7 @@ typedef struct { int lock; int cnt; void *owner; } __libc_lock_recursive_t;
>>>  # define __libc_lock_lock_recursive(NAME) \
>>>    do {									      \
>>>      void *self = THREAD_SELF;						      \
>>> -    if ((NAME).owner != self)						      \
>>> +    if (!SINGLE_THREAD_P && (NAME).owner != self)						      \
>>>        {									      \
>>>  	lll_lock ((NAME).lock, LLL_PRIVATE);				      \
>>>  	(NAME).owner = self;						      \
>>> @@ -104,7 +104,7 @@ typedef struct { int lock; int cnt; void *owner; } __libc_lock_recursive_t;
>>>    ({									      \
>>>      int result = 0;							      \
>>>      void *self = THREAD_SELF;						      \
>>> -    if ((NAME).owner != self)						      \
>>> +    if (!SINGLE_THREAD_P && (NAME).owner != self)						      \
>>>        {									      \
>>>  	if (lll_trylock ((NAME).lock) == 0)				      \
>>>  	  {								      \
>>> @@ -131,7 +131,8 @@ typedef struct { int lock; int cnt; void *owner; } __libc_lock_recursive_t;
>>>      if (--(NAME).cnt == 0)						      \
>>>        {									      \
>>>  	(NAME).owner = NULL;						      \
>>> -	lll_unlock ((NAME).lock, LLL_PRIVATE);				      \
>>> +        if (!SINGLE_THREAD_P)                                                 \
>>> +	  lll_unlock ((NAME).lock, LLL_PRIVATE);			      \
>>>        }									      \
>>>    } while (0)
>>>  #else
>>
>> I don't think this is correct if threads are created in the lock region.
> 
> I was not sure about this one and I think we the main issue in fact there is
> we can't use the single-thread optimization on unlock.  Maybe a better option
> would to use a different scheme as proposed by 
> https://hal.inria.fr/hal-01236734/document, where we can embedded lock and
> cnt in only one variable (as the lll_lock already does).

Don't 99f841c441feeaa9a3d97fd91bb3d6ec8073c982 have the issue for pthread_mutex_lock ?
  
Florian Weimer April 28, 2022, 4:56 p.m. UTC | #4
* Adhemerval Zanella:

>>>>  	(NAME).owner = NULL;						      \
>>>> -	lll_unlock ((NAME).lock, LLL_PRIVATE);				      \
>>>> +        if (!SINGLE_THREAD_P)                                                 \
>>>> +	  lll_unlock ((NAME).lock, LLL_PRIVATE);			      \
>>>>        }									      \
>>>>    } while (0)
>>>>  #else
>>>
>>> I don't think this is correct if threads are created in the lock region.
>> 
>> I was not sure about this one and I think we the main issue in fact there is
>> we can't use the single-thread optimization on unlock.  Maybe a better option
>> would to use a different scheme as proposed by 
>> https://hal.inria.fr/hal-01236734/document, where we can embedded lock and
>> cnt in only one variable (as the lll_lock already does).
>
> Don't 99f841c441feeaa9a3d97fd91bb3d6ec8073c982 have the issue for pthread_mutex_lock ?

No, that optimization follows our documented guidance, namely:

| Most applications should perform the same actions whether or not
| @code{__libc_single_threaded} is true, except with less
| synchronization.  If this rule is followed, a process that
| subsequently becomes multi-threaded is already in a consistent state.

Thanks,
Florian
  
Adhemerval Zanella Netto April 28, 2022, 5:14 p.m. UTC | #5
On 28/04/2022 13:56, Florian Weimer wrote:
> * Adhemerval Zanella:
> 
>>>>>  	(NAME).owner = NULL;						      \
>>>>> -	lll_unlock ((NAME).lock, LLL_PRIVATE);				      \
>>>>> +        if (!SINGLE_THREAD_P)                                                 \
>>>>> +	  lll_unlock ((NAME).lock, LLL_PRIVATE);			      \
>>>>>        }									      \
>>>>>    } while (0)
>>>>>  #else
>>>>
>>>> I don't think this is correct if threads are created in the lock region.
>>>
>>> I was not sure about this one and I think we the main issue in fact there is
>>> we can't use the single-thread optimization on unlock.  Maybe a better option
>>> would to use a different scheme as proposed by 
>>> https://hal.inria.fr/hal-01236734/document, where we can embedded lock and
>>> cnt in only one variable (as the lll_lock already does).
>>
>> Don't 99f841c441feeaa9a3d97fd91bb3d6ec8073c982 have the issue for pthread_mutex_lock ?
> 
> No, that optimization follows our documented guidance, namely:
> 
> | Most applications should perform the same actions whether or not
> | @code{__libc_single_threaded} is true, except with less
> | synchronization.  If this rule is followed, a process that
> | subsequently becomes multi-threaded is already in a consistent state.
> 

So I wonder if

/* Lock the recursive named lock variable.  */
#define __libc_lock_lock_recursive(NAME) \
  do {                                                                        \
    void *self = THREAD_SELF;                                                 \
    if ((NAME).owner != self)                                                 \
      {									      \
	if (SINGLE_THREAD_P)                                                  \
	  (NAME).lock = 1;						      \
        else								      \
           lll_lock ((NAME).lock, LLL_PRIVATE);                               \
        (NAME).owner = self;                                                  \
      }                                                                       \
    ++(NAME).cnt;                                                             \
  } while (0)

/* Unlock the recursive named lock variable.  */
/* We do no error checking here.  */
#define __libc_lock_unlock_recursive(NAME) \
  do {                                                                        \
    if (--(NAME).cnt == 0)                                                    \
      {                                                                       \
        (NAME).owner = NULL;                                                  \
        if (SINGLE_THREAD_P)						      \
          (NAME).lock = 0;  						      \
        else                                                                  \
          lll_unlock ((NAME).lock, LLL_PRIVATE);                              \
      }                                                                       \
  } while (0)

Or if we are bounded to keep the current practice to check for single-thread and
skip locks internally.  It would be good to consolidate all the internal lock
usage and have the single-thread lock optimizations on all locks, not only on
pthread_mutex_lock.
  

Patch

diff --git a/libio/Versions b/libio/Versions
index b91a7bc914..b528fae670 100644
--- a/libio/Versions
+++ b/libio/Versions
@@ -159,9 +159,6 @@  libc {
     # Used by NPTL and librt
     __libc_fatal;
 
-    # Used by NPTL
-    _IO_enable_locks;
-
     __fseeko64;
     __ftello64;
   }
diff --git a/libio/feof.c b/libio/feof.c
index 7f79b2795e..41e7beeca7 100644
--- a/libio/feof.c
+++ b/libio/feof.c
@@ -32,8 +32,6 @@  _IO_feof (FILE *fp)
 {
   int result;
   CHECK_FILE (fp, EOF);
-  if (!_IO_need_lock (fp))
-    return _IO_feof_unlocked (fp);
   _IO_flockfile (fp);
   result = _IO_feof_unlocked (fp);
   _IO_funlockfile (fp);
diff --git a/libio/ferror.c b/libio/ferror.c
index d2489c54d3..a60efc3f0f 100644
--- a/libio/ferror.c
+++ b/libio/ferror.c
@@ -32,8 +32,6 @@  _IO_ferror (FILE *fp)
 {
   int result;
   CHECK_FILE (fp, EOF);
-  if (!_IO_need_lock (fp))
-    return _IO_ferror_unlocked (fp);
   _IO_flockfile (fp);
   result = _IO_ferror_unlocked (fp);
   _IO_funlockfile (fp);
diff --git a/libio/fputc.c b/libio/fputc.c
index d29a9d0aee..c3d66500d1 100644
--- a/libio/fputc.c
+++ b/libio/fputc.c
@@ -32,8 +32,6 @@  fputc (int c, FILE *fp)
 {
   int result;
   CHECK_FILE (fp, EOF);
-  if (!_IO_need_lock (fp))
-    return _IO_putc_unlocked (c, fp);
   _IO_acquire_lock (fp);
   result = _IO_putc_unlocked (c, fp);
   _IO_release_lock (fp);
diff --git a/libio/genops.c b/libio/genops.c
index ccfbcd149d..eca50bacbc 100644
--- a/libio/genops.c
+++ b/libio/genops.c
@@ -488,39 +488,11 @@  _IO_init (FILE *fp, int flags)
   _IO_init_internal (fp, flags);
 }
 
-static int stdio_needs_locking;
-
-/* In a single-threaded process most stdio locks can be omitted.  After
-   _IO_enable_locks is called, locks are not optimized away any more.
-   It must be first called while the process is still single-threaded.
-
-   This lock optimization can be disabled on a per-file basis by setting
-   _IO_FLAGS2_NEED_LOCK, because a file can have user-defined callbacks
-   or can be locked with flockfile and then a thread may be created
-   between a lock and unlock, so omitting the lock is not valid.
-
-   Here we have to make sure that the flag is set on all existing files
-   and files created later.  */
-void
-_IO_enable_locks (void)
-{
-  _IO_ITER i;
-
-  if (stdio_needs_locking)
-    return;
-  stdio_needs_locking = 1;
-  for (i = _IO_iter_begin (); i != _IO_iter_end (); i = _IO_iter_next (i))
-    _IO_iter_file (i)->_flags2 |= _IO_FLAGS2_NEED_LOCK;
-}
-libc_hidden_def (_IO_enable_locks)
-
 void
 _IO_old_init (FILE *fp, int flags)
 {
   fp->_flags = _IO_MAGIC|flags;
   fp->_flags2 = 0;
-  if (stdio_needs_locking)
-    fp->_flags2 |= _IO_FLAGS2_NEED_LOCK;
   fp->_IO_buf_base = NULL;
   fp->_IO_buf_end = NULL;
   fp->_IO_read_base = NULL;
diff --git a/libio/getc.c b/libio/getc.c
index 8c79cf702f..18647aa3e1 100644
--- a/libio/getc.c
+++ b/libio/getc.c
@@ -34,8 +34,6 @@  _IO_getc (FILE *fp)
 {
   int result;
   CHECK_FILE (fp, EOF);
-  if (!_IO_need_lock (fp))
-    return _IO_getc_unlocked (fp);
   _IO_acquire_lock (fp);
   result = _IO_getc_unlocked (fp);
   _IO_release_lock (fp);
diff --git a/libio/getchar.c b/libio/getchar.c
index e0f4f471b3..4ab1e7b065 100644
--- a/libio/getchar.c
+++ b/libio/getchar.c
@@ -33,10 +33,8 @@  int
 getchar (void)
 {
   int result;
-  if (!_IO_need_lock (stdin))
-    return _IO_getc_unlocked (stdin);
   _IO_acquire_lock (stdin);
   result = _IO_getc_unlocked (stdin);
   _IO_release_lock (stdin);
   return result;
-}
\ No newline at end of file
+}
diff --git a/libio/iofopncook.c b/libio/iofopncook.c
index 16bcc7f4a3..5ecca41717 100644
--- a/libio/iofopncook.c
+++ b/libio/iofopncook.c
@@ -162,8 +162,6 @@  _IO_cookie_init (struct _IO_cookie_file *cfile, int read_write,
   _IO_mask_flags (&cfile->__fp.file, read_write,
 		  _IO_NO_READS+_IO_NO_WRITES+_IO_IS_APPENDING);
 
-  cfile->__fp.file._flags2 |= _IO_FLAGS2_NEED_LOCK;
-
   /* We use a negative number different from -1 for _fileno to mark that
      this special stream is not associated with a real file, but still has
      to be treated as such.  */
diff --git a/libio/ioungetc.c b/libio/ioungetc.c
index 92d5572e21..547953dd16 100644
--- a/libio/ioungetc.c
+++ b/libio/ioungetc.c
@@ -33,8 +33,6 @@  ungetc (int c, FILE *fp)
   CHECK_FILE (fp, EOF);
   if (c == EOF)
     return EOF;
-  if (!_IO_need_lock (fp))
-    return _IO_sputbackc (fp, (unsigned char) c);
   _IO_acquire_lock (fp);
   result = _IO_sputbackc (fp, (unsigned char) c);
   _IO_release_lock (fp);
diff --git a/libio/libio.h b/libio/libio.h
index 42ff6143af..e1e8ffdc54 100644
--- a/libio/libio.h
+++ b/libio/libio.h
@@ -88,7 +88,6 @@  typedef struct
 #define _IO_FLAGS2_USER_WBUF 8
 #define _IO_FLAGS2_NOCLOSE 32
 #define _IO_FLAGS2_CLOEXEC 64
-#define _IO_FLAGS2_NEED_LOCK 128
 
 /* _IO_pos_BAD is an off64_t value indicating error, unknown, or EOF.  */
 #define _IO_pos_BAD ((off64_t) -1)
@@ -212,9 +211,6 @@  extern int _IO_ftrylockfile (FILE *) __THROW;
 #define _IO_cleanup_region_end(_Doit) /**/
 #endif
 
-#define _IO_need_lock(_fp) \
-  (((_fp)->_flags2 & _IO_FLAGS2_NEED_LOCK) != 0)
-
 extern int _IO_vfscanf (FILE * __restrict, const char * __restrict,
 			__gnuc_va_list, int *__restrict);
 extern __ssize_t _IO_padn (FILE *, int, __ssize_t);
diff --git a/libio/memstream.c b/libio/memstream.c
index 7ab61876ba..15022b72ee 100644
--- a/libio/memstream.c
+++ b/libio/memstream.c
@@ -92,9 +92,6 @@  __open_memstream (char **bufloc, size_t *sizeloc)
   new_f->fp.bufloc = bufloc;
   new_f->fp.sizeloc = sizeloc;
 
-  /* Disable single thread optimization.  BZ 21735.  */
-  new_f->fp._sf._sbf._f._flags2 |= _IO_FLAGS2_NEED_LOCK;
-
   return (FILE *) &new_f->fp._sf._sbf;
 }
 libc_hidden_def (__open_memstream)
diff --git a/libio/putc.c b/libio/putc.c
index 44e30649c9..f4cc2024b6 100644
--- a/libio/putc.c
+++ b/libio/putc.c
@@ -25,8 +25,6 @@  _IO_putc (int c, FILE *fp)
 {
   int result;
   CHECK_FILE (fp, EOF);
-  if (!_IO_need_lock (fp))
-    return _IO_putc_unlocked (c, fp);
   _IO_acquire_lock (fp);
   result = _IO_putc_unlocked (c, fp);
   _IO_release_lock (fp);
diff --git a/libio/wmemstream.c b/libio/wmemstream.c
index 9366ef4aad..abaf421069 100644
--- a/libio/wmemstream.c
+++ b/libio/wmemstream.c
@@ -94,9 +94,6 @@  open_wmemstream (wchar_t **bufloc, size_t *sizeloc)
   new_f->fp.bufloc = bufloc;
   new_f->fp.sizeloc = sizeloc;
 
-  /* Disable single thread optimization.  BZ 21735.  */
-  new_f->fp._sf._sbf._f._flags2 |= _IO_FLAGS2_NEED_LOCK;
-
   return (FILE *) &new_f->fp._sf._sbf;
 }
 
diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
index e7a099acb7..4f45ea36bc 100644
--- a/nptl/pthread_create.c
+++ b/nptl/pthread_create.c
@@ -740,9 +740,6 @@  __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
         collect_default_sched (pd);
     }
 
-  if (__glibc_unlikely (__nptl_nthreads == 1))
-    _IO_enable_locks ();
-
   /* Pass the descriptor to the caller.  */
   *newthread = (pthread_t) pd;
 
diff --git a/stdio-common/flockfile.c b/stdio-common/flockfile.c
index a5decb450f..49f72c69ab 100644
--- a/stdio-common/flockfile.c
+++ b/stdio-common/flockfile.c
@@ -22,7 +22,6 @@ 
 void
 __flockfile (FILE *stream)
 {
-  stream->_flags2 |= _IO_FLAGS2_NEED_LOCK;
   _IO_lock_lock (*stream->_lock);
 }
 weak_alias (__flockfile, flockfile);
diff --git a/sysdeps/mach/libc-lock.h b/sysdeps/mach/libc-lock.h
index 225eb67f5a..ee38948d1e 100644
--- a/sysdeps/mach/libc-lock.h
+++ b/sysdeps/mach/libc-lock.h
@@ -106,9 +106,9 @@  typedef struct __libc_lock_recursive_opaque__ __libc_lock_recursive_t;
      __libc_lock_recursive_t *const __lock = &(NAME);   \
      void *__self = __libc_lock_owner_self ();   \
      int __r = 0;   \
-     if (__self == __lock->owner)   \
+     if (!SINGLE_THREAD_P && __self == __lock->owner)   \
        ++__lock->cnt;   \
-     else if ((__r = lll_trylock (__lock->lock)) == 0)   \
+     else if (!SINGLE_THREAD_P && (__r = lll_trylock (__lock->lock)) == 0)   \
        __lock->owner = __self, __lock->cnt = 1;   \
      __r;   \
    })
@@ -117,7 +117,7 @@  typedef struct __libc_lock_recursive_opaque__ __libc_lock_recursive_t;
   ({   \
      __libc_lock_recursive_t *const __lock = &(NAME);   \
      void *__self = __libc_lock_owner_self ();   \
-     if (__self != __lock->owner)   \
+     if (!SINGLE_THREAD_P && __self != __lock->owner)   \
        {   \
          lll_lock (__lock->lock, 0);   \
          __lock->owner = __self;   \
@@ -132,7 +132,8 @@  typedef struct __libc_lock_recursive_opaque__ __libc_lock_recursive_t;
      if (--__lock->cnt == 0)   \
        {   \
          __lock->owner = 0;   \
-         lll_unlock (__lock->lock, 0);   \
+         if (!SINGLE_THREAD_P) \
+           lll_unlock (__lock->lock, 0);   \
        }   \
    })
 
diff --git a/sysdeps/nptl/libc-lock.h b/sysdeps/nptl/libc-lock.h
index 6c2d6acfd1..abd84e71b4 100644
--- a/sysdeps/nptl/libc-lock.h
+++ b/sysdeps/nptl/libc-lock.h
@@ -86,7 +86,7 @@  typedef struct { int lock; int cnt; void *owner; } __libc_lock_recursive_t;
 # define __libc_lock_lock_recursive(NAME) \
   do {									      \
     void *self = THREAD_SELF;						      \
-    if ((NAME).owner != self)						      \
+    if (!SINGLE_THREAD_P && (NAME).owner != self)						      \
       {									      \
 	lll_lock ((NAME).lock, LLL_PRIVATE);				      \
 	(NAME).owner = self;						      \
@@ -104,7 +104,7 @@  typedef struct { int lock; int cnt; void *owner; } __libc_lock_recursive_t;
   ({									      \
     int result = 0;							      \
     void *self = THREAD_SELF;						      \
-    if ((NAME).owner != self)						      \
+    if (!SINGLE_THREAD_P && (NAME).owner != self)						      \
       {									      \
 	if (lll_trylock ((NAME).lock) == 0)				      \
 	  {								      \
@@ -131,7 +131,8 @@  typedef struct { int lock; int cnt; void *owner; } __libc_lock_recursive_t;
     if (--(NAME).cnt == 0)						      \
       {									      \
 	(NAME).owner = NULL;						      \
-	lll_unlock ((NAME).lock, LLL_PRIVATE);				      \
+        if (!SINGLE_THREAD_P)                                                 \
+	  lll_unlock ((NAME).lock, LLL_PRIVATE);			      \
       }									      \
   } while (0)
 #else