[v2,3/4] Move libio lock single-thread optimization to generic libc-lock (BZ #27842)
Checks
Context |
Check |
Description |
dj/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
Commit Message
This patch moves the single thread stdio optimization (d2e04918833d9)
to the libc-lock. With generic support there is no need to add
per-function code to handle the single-thread case, and it allows to
removes the _IO_enable_locks (since once process goes multithread the
locks will be always used).
It also handles the memory streams requirement (de895ddcd7fc), since
the SINGLE_THREAD_P already contains te information whether the
process is multithread (so there is no need to disable the optimization
because such stream are listed in _IO_list_all).
Finally it also removed the flockfile uses a read-modify-write operation
on _flags2 outside a lock region (BZ #27842).
Checked on x86_64-linux-gnu, i686-linux-gnu, and aarch64-linux-gnu.
---
libio/Versions | 3 ---
libio/feof.c | 2 --
libio/ferror.c | 2 --
libio/fputc.c | 2 --
libio/genops.c | 28 ----------------------------
libio/getc.c | 2 --
libio/getchar.c | 4 +---
libio/iofopncook.c | 2 --
libio/ioungetc.c | 2 --
libio/libio.h | 4 ----
libio/memstream.c | 3 ---
libio/putc.c | 2 --
libio/wmemstream.c | 3 ---
nptl/pthread_create.c | 3 ---
stdio-common/flockfile.c | 1 -
sysdeps/mach/libc-lock.h | 9 +++++----
sysdeps/nptl/libc-lock.h | 7 ++++---
17 files changed, 10 insertions(+), 69 deletions(-)
Comments
* Adhemerval Zanella via Libc-alpha:
> diff --git a/sysdeps/nptl/libc-lock.h b/sysdeps/nptl/libc-lock.h
> index 6c2d6acfd1..abd84e71b4 100644
> --- a/sysdeps/nptl/libc-lock.h
> +++ b/sysdeps/nptl/libc-lock.h
> @@ -86,7 +86,7 @@ typedef struct { int lock; int cnt; void *owner; } __libc_lock_recursive_t;
> # define __libc_lock_lock_recursive(NAME) \
> do { \
> void *self = THREAD_SELF; \
> - if ((NAME).owner != self) \
> + if (!SINGLE_THREAD_P && (NAME).owner != self) \
> { \
> lll_lock ((NAME).lock, LLL_PRIVATE); \
> (NAME).owner = self; \
> @@ -104,7 +104,7 @@ typedef struct { int lock; int cnt; void *owner; } __libc_lock_recursive_t;
> ({ \
> int result = 0; \
> void *self = THREAD_SELF; \
> - if ((NAME).owner != self) \
> + if (!SINGLE_THREAD_P && (NAME).owner != self) \
> { \
> if (lll_trylock ((NAME).lock) == 0) \
> { \
> @@ -131,7 +131,8 @@ typedef struct { int lock; int cnt; void *owner; } __libc_lock_recursive_t;
> if (--(NAME).cnt == 0) \
> { \
> (NAME).owner = NULL; \
> - lll_unlock ((NAME).lock, LLL_PRIVATE); \
> + if (!SINGLE_THREAD_P) \
> + lll_unlock ((NAME).lock, LLL_PRIVATE); \
> } \
> } while (0)
> #else
I don't think this is correct if threads are created in the lock region.
Thanks,
Florian
On 27/04/2022 10:30, Florian Weimer wrote:
> * Adhemerval Zanella via Libc-alpha:
>
>> diff --git a/sysdeps/nptl/libc-lock.h b/sysdeps/nptl/libc-lock.h
>> index 6c2d6acfd1..abd84e71b4 100644
>> --- a/sysdeps/nptl/libc-lock.h
>> +++ b/sysdeps/nptl/libc-lock.h
>> @@ -86,7 +86,7 @@ typedef struct { int lock; int cnt; void *owner; } __libc_lock_recursive_t;
>> # define __libc_lock_lock_recursive(NAME) \
>> do { \
>> void *self = THREAD_SELF; \
>> - if ((NAME).owner != self) \
>> + if (!SINGLE_THREAD_P && (NAME).owner != self) \
>> { \
>> lll_lock ((NAME).lock, LLL_PRIVATE); \
>> (NAME).owner = self; \
>> @@ -104,7 +104,7 @@ typedef struct { int lock; int cnt; void *owner; } __libc_lock_recursive_t;
>> ({ \
>> int result = 0; \
>> void *self = THREAD_SELF; \
>> - if ((NAME).owner != self) \
>> + if (!SINGLE_THREAD_P && (NAME).owner != self) \
>> { \
>> if (lll_trylock ((NAME).lock) == 0) \
>> { \
>> @@ -131,7 +131,8 @@ typedef struct { int lock; int cnt; void *owner; } __libc_lock_recursive_t;
>> if (--(NAME).cnt == 0) \
>> { \
>> (NAME).owner = NULL; \
>> - lll_unlock ((NAME).lock, LLL_PRIVATE); \
>> + if (!SINGLE_THREAD_P) \
>> + lll_unlock ((NAME).lock, LLL_PRIVATE); \
>> } \
>> } while (0)
>> #else
>
> I don't think this is correct if threads are created in the lock region.
I was not sure about this one and I think we the main issue in fact there is
we can't use the single-thread optimization on unlock. Maybe a better option
would to use a different scheme as proposed by
https://hal.inria.fr/hal-01236734/document, where we can embedded lock and
cnt in only one variable (as the lll_lock already does).
On 27/04/2022 13:32, Adhemerval Zanella wrote:
>
>
> On 27/04/2022 10:30, Florian Weimer wrote:
>> * Adhemerval Zanella via Libc-alpha:
>>
>>> diff --git a/sysdeps/nptl/libc-lock.h b/sysdeps/nptl/libc-lock.h
>>> index 6c2d6acfd1..abd84e71b4 100644
>>> --- a/sysdeps/nptl/libc-lock.h
>>> +++ b/sysdeps/nptl/libc-lock.h
>>> @@ -86,7 +86,7 @@ typedef struct { int lock; int cnt; void *owner; } __libc_lock_recursive_t;
>>> # define __libc_lock_lock_recursive(NAME) \
>>> do { \
>>> void *self = THREAD_SELF; \
>>> - if ((NAME).owner != self) \
>>> + if (!SINGLE_THREAD_P && (NAME).owner != self) \
>>> { \
>>> lll_lock ((NAME).lock, LLL_PRIVATE); \
>>> (NAME).owner = self; \
>>> @@ -104,7 +104,7 @@ typedef struct { int lock; int cnt; void *owner; } __libc_lock_recursive_t;
>>> ({ \
>>> int result = 0; \
>>> void *self = THREAD_SELF; \
>>> - if ((NAME).owner != self) \
>>> + if (!SINGLE_THREAD_P && (NAME).owner != self) \
>>> { \
>>> if (lll_trylock ((NAME).lock) == 0) \
>>> { \
>>> @@ -131,7 +131,8 @@ typedef struct { int lock; int cnt; void *owner; } __libc_lock_recursive_t;
>>> if (--(NAME).cnt == 0) \
>>> { \
>>> (NAME).owner = NULL; \
>>> - lll_unlock ((NAME).lock, LLL_PRIVATE); \
>>> + if (!SINGLE_THREAD_P) \
>>> + lll_unlock ((NAME).lock, LLL_PRIVATE); \
>>> } \
>>> } while (0)
>>> #else
>>
>> I don't think this is correct if threads are created in the lock region.
>
> I was not sure about this one and I think we the main issue in fact there is
> we can't use the single-thread optimization on unlock. Maybe a better option
> would to use a different scheme as proposed by
> https://hal.inria.fr/hal-01236734/document, where we can embedded lock and
> cnt in only one variable (as the lll_lock already does).
Don't 99f841c441feeaa9a3d97fd91bb3d6ec8073c982 have the issue for pthread_mutex_lock ?
* Adhemerval Zanella:
>>>> (NAME).owner = NULL; \
>>>> - lll_unlock ((NAME).lock, LLL_PRIVATE); \
>>>> + if (!SINGLE_THREAD_P) \
>>>> + lll_unlock ((NAME).lock, LLL_PRIVATE); \
>>>> } \
>>>> } while (0)
>>>> #else
>>>
>>> I don't think this is correct if threads are created in the lock region.
>>
>> I was not sure about this one and I think we the main issue in fact there is
>> we can't use the single-thread optimization on unlock. Maybe a better option
>> would to use a different scheme as proposed by
>> https://hal.inria.fr/hal-01236734/document, where we can embedded lock and
>> cnt in only one variable (as the lll_lock already does).
>
> Don't 99f841c441feeaa9a3d97fd91bb3d6ec8073c982 have the issue for pthread_mutex_lock ?
No, that optimization follows our documented guidance, namely:
| Most applications should perform the same actions whether or not
| @code{__libc_single_threaded} is true, except with less
| synchronization. If this rule is followed, a process that
| subsequently becomes multi-threaded is already in a consistent state.
Thanks,
Florian
On 28/04/2022 13:56, Florian Weimer wrote:
> * Adhemerval Zanella:
>
>>>>> (NAME).owner = NULL; \
>>>>> - lll_unlock ((NAME).lock, LLL_PRIVATE); \
>>>>> + if (!SINGLE_THREAD_P) \
>>>>> + lll_unlock ((NAME).lock, LLL_PRIVATE); \
>>>>> } \
>>>>> } while (0)
>>>>> #else
>>>>
>>>> I don't think this is correct if threads are created in the lock region.
>>>
>>> I was not sure about this one and I think we the main issue in fact there is
>>> we can't use the single-thread optimization on unlock. Maybe a better option
>>> would to use a different scheme as proposed by
>>> https://hal.inria.fr/hal-01236734/document, where we can embedded lock and
>>> cnt in only one variable (as the lll_lock already does).
>>
>> Don't 99f841c441feeaa9a3d97fd91bb3d6ec8073c982 have the issue for pthread_mutex_lock ?
>
> No, that optimization follows our documented guidance, namely:
>
> | Most applications should perform the same actions whether or not
> | @code{__libc_single_threaded} is true, except with less
> | synchronization. If this rule is followed, a process that
> | subsequently becomes multi-threaded is already in a consistent state.
>
So I wonder if
/* Lock the recursive named lock variable. */
#define __libc_lock_lock_recursive(NAME) \
do { \
void *self = THREAD_SELF; \
if ((NAME).owner != self) \
{ \
if (SINGLE_THREAD_P) \
(NAME).lock = 1; \
else \
lll_lock ((NAME).lock, LLL_PRIVATE); \
(NAME).owner = self; \
} \
++(NAME).cnt; \
} while (0)
/* Unlock the recursive named lock variable. */
/* We do no error checking here. */
#define __libc_lock_unlock_recursive(NAME) \
do { \
if (--(NAME).cnt == 0) \
{ \
(NAME).owner = NULL; \
if (SINGLE_THREAD_P) \
(NAME).lock = 0; \
else \
lll_unlock ((NAME).lock, LLL_PRIVATE); \
} \
} while (0)
Or if we are bounded to keep the current practice to check for single-thread and
skip locks internally. It would be good to consolidate all the internal lock
usage and have the single-thread lock optimizations on all locks, not only on
pthread_mutex_lock.
@@ -159,9 +159,6 @@ libc {
# Used by NPTL and librt
__libc_fatal;
- # Used by NPTL
- _IO_enable_locks;
-
__fseeko64;
__ftello64;
}
@@ -32,8 +32,6 @@ _IO_feof (FILE *fp)
{
int result;
CHECK_FILE (fp, EOF);
- if (!_IO_need_lock (fp))
- return _IO_feof_unlocked (fp);
_IO_flockfile (fp);
result = _IO_feof_unlocked (fp);
_IO_funlockfile (fp);
@@ -32,8 +32,6 @@ _IO_ferror (FILE *fp)
{
int result;
CHECK_FILE (fp, EOF);
- if (!_IO_need_lock (fp))
- return _IO_ferror_unlocked (fp);
_IO_flockfile (fp);
result = _IO_ferror_unlocked (fp);
_IO_funlockfile (fp);
@@ -32,8 +32,6 @@ fputc (int c, FILE *fp)
{
int result;
CHECK_FILE (fp, EOF);
- if (!_IO_need_lock (fp))
- return _IO_putc_unlocked (c, fp);
_IO_acquire_lock (fp);
result = _IO_putc_unlocked (c, fp);
_IO_release_lock (fp);
@@ -488,39 +488,11 @@ _IO_init (FILE *fp, int flags)
_IO_init_internal (fp, flags);
}
-static int stdio_needs_locking;
-
-/* In a single-threaded process most stdio locks can be omitted. After
- _IO_enable_locks is called, locks are not optimized away any more.
- It must be first called while the process is still single-threaded.
-
- This lock optimization can be disabled on a per-file basis by setting
- _IO_FLAGS2_NEED_LOCK, because a file can have user-defined callbacks
- or can be locked with flockfile and then a thread may be created
- between a lock and unlock, so omitting the lock is not valid.
-
- Here we have to make sure that the flag is set on all existing files
- and files created later. */
-void
-_IO_enable_locks (void)
-{
- _IO_ITER i;
-
- if (stdio_needs_locking)
- return;
- stdio_needs_locking = 1;
- for (i = _IO_iter_begin (); i != _IO_iter_end (); i = _IO_iter_next (i))
- _IO_iter_file (i)->_flags2 |= _IO_FLAGS2_NEED_LOCK;
-}
-libc_hidden_def (_IO_enable_locks)
-
void
_IO_old_init (FILE *fp, int flags)
{
fp->_flags = _IO_MAGIC|flags;
fp->_flags2 = 0;
- if (stdio_needs_locking)
- fp->_flags2 |= _IO_FLAGS2_NEED_LOCK;
fp->_IO_buf_base = NULL;
fp->_IO_buf_end = NULL;
fp->_IO_read_base = NULL;
@@ -34,8 +34,6 @@ _IO_getc (FILE *fp)
{
int result;
CHECK_FILE (fp, EOF);
- if (!_IO_need_lock (fp))
- return _IO_getc_unlocked (fp);
_IO_acquire_lock (fp);
result = _IO_getc_unlocked (fp);
_IO_release_lock (fp);
@@ -33,10 +33,8 @@ int
getchar (void)
{
int result;
- if (!_IO_need_lock (stdin))
- return _IO_getc_unlocked (stdin);
_IO_acquire_lock (stdin);
result = _IO_getc_unlocked (stdin);
_IO_release_lock (stdin);
return result;
-}
\ No newline at end of file
+}
@@ -162,8 +162,6 @@ _IO_cookie_init (struct _IO_cookie_file *cfile, int read_write,
_IO_mask_flags (&cfile->__fp.file, read_write,
_IO_NO_READS+_IO_NO_WRITES+_IO_IS_APPENDING);
- cfile->__fp.file._flags2 |= _IO_FLAGS2_NEED_LOCK;
-
/* We use a negative number different from -1 for _fileno to mark that
this special stream is not associated with a real file, but still has
to be treated as such. */
@@ -33,8 +33,6 @@ ungetc (int c, FILE *fp)
CHECK_FILE (fp, EOF);
if (c == EOF)
return EOF;
- if (!_IO_need_lock (fp))
- return _IO_sputbackc (fp, (unsigned char) c);
_IO_acquire_lock (fp);
result = _IO_sputbackc (fp, (unsigned char) c);
_IO_release_lock (fp);
@@ -88,7 +88,6 @@ typedef struct
#define _IO_FLAGS2_USER_WBUF 8
#define _IO_FLAGS2_NOCLOSE 32
#define _IO_FLAGS2_CLOEXEC 64
-#define _IO_FLAGS2_NEED_LOCK 128
/* _IO_pos_BAD is an off64_t value indicating error, unknown, or EOF. */
#define _IO_pos_BAD ((off64_t) -1)
@@ -212,9 +211,6 @@ extern int _IO_ftrylockfile (FILE *) __THROW;
#define _IO_cleanup_region_end(_Doit) /**/
#endif
-#define _IO_need_lock(_fp) \
- (((_fp)->_flags2 & _IO_FLAGS2_NEED_LOCK) != 0)
-
extern int _IO_vfscanf (FILE * __restrict, const char * __restrict,
__gnuc_va_list, int *__restrict);
extern __ssize_t _IO_padn (FILE *, int, __ssize_t);
@@ -92,9 +92,6 @@ __open_memstream (char **bufloc, size_t *sizeloc)
new_f->fp.bufloc = bufloc;
new_f->fp.sizeloc = sizeloc;
- /* Disable single thread optimization. BZ 21735. */
- new_f->fp._sf._sbf._f._flags2 |= _IO_FLAGS2_NEED_LOCK;
-
return (FILE *) &new_f->fp._sf._sbf;
}
libc_hidden_def (__open_memstream)
@@ -25,8 +25,6 @@ _IO_putc (int c, FILE *fp)
{
int result;
CHECK_FILE (fp, EOF);
- if (!_IO_need_lock (fp))
- return _IO_putc_unlocked (c, fp);
_IO_acquire_lock (fp);
result = _IO_putc_unlocked (c, fp);
_IO_release_lock (fp);
@@ -94,9 +94,6 @@ open_wmemstream (wchar_t **bufloc, size_t *sizeloc)
new_f->fp.bufloc = bufloc;
new_f->fp.sizeloc = sizeloc;
- /* Disable single thread optimization. BZ 21735. */
- new_f->fp._sf._sbf._f._flags2 |= _IO_FLAGS2_NEED_LOCK;
-
return (FILE *) &new_f->fp._sf._sbf;
}
@@ -740,9 +740,6 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
collect_default_sched (pd);
}
- if (__glibc_unlikely (__nptl_nthreads == 1))
- _IO_enable_locks ();
-
/* Pass the descriptor to the caller. */
*newthread = (pthread_t) pd;
@@ -22,7 +22,6 @@
void
__flockfile (FILE *stream)
{
- stream->_flags2 |= _IO_FLAGS2_NEED_LOCK;
_IO_lock_lock (*stream->_lock);
}
weak_alias (__flockfile, flockfile);
@@ -106,9 +106,9 @@ typedef struct __libc_lock_recursive_opaque__ __libc_lock_recursive_t;
__libc_lock_recursive_t *const __lock = &(NAME); \
void *__self = __libc_lock_owner_self (); \
int __r = 0; \
- if (__self == __lock->owner) \
+ if (!SINGLE_THREAD_P && __self == __lock->owner) \
++__lock->cnt; \
- else if ((__r = lll_trylock (__lock->lock)) == 0) \
+ else if (!SINGLE_THREAD_P && (__r = lll_trylock (__lock->lock)) == 0) \
__lock->owner = __self, __lock->cnt = 1; \
__r; \
})
@@ -117,7 +117,7 @@ typedef struct __libc_lock_recursive_opaque__ __libc_lock_recursive_t;
({ \
__libc_lock_recursive_t *const __lock = &(NAME); \
void *__self = __libc_lock_owner_self (); \
- if (__self != __lock->owner) \
+ if (!SINGLE_THREAD_P && __self != __lock->owner) \
{ \
lll_lock (__lock->lock, 0); \
__lock->owner = __self; \
@@ -132,7 +132,8 @@ typedef struct __libc_lock_recursive_opaque__ __libc_lock_recursive_t;
if (--__lock->cnt == 0) \
{ \
__lock->owner = 0; \
- lll_unlock (__lock->lock, 0); \
+ if (!SINGLE_THREAD_P) \
+ lll_unlock (__lock->lock, 0); \
} \
})
@@ -86,7 +86,7 @@ typedef struct { int lock; int cnt; void *owner; } __libc_lock_recursive_t;
# define __libc_lock_lock_recursive(NAME) \
do { \
void *self = THREAD_SELF; \
- if ((NAME).owner != self) \
+ if (!SINGLE_THREAD_P && (NAME).owner != self) \
{ \
lll_lock ((NAME).lock, LLL_PRIVATE); \
(NAME).owner = self; \
@@ -104,7 +104,7 @@ typedef struct { int lock; int cnt; void *owner; } __libc_lock_recursive_t;
({ \
int result = 0; \
void *self = THREAD_SELF; \
- if ((NAME).owner != self) \
+ if (!SINGLE_THREAD_P && (NAME).owner != self) \
{ \
if (lll_trylock ((NAME).lock) == 0) \
{ \
@@ -131,7 +131,8 @@ typedef struct { int lock; int cnt; void *owner; } __libc_lock_recursive_t;
if (--(NAME).cnt == 0) \
{ \
(NAME).owner = NULL; \
- lll_unlock ((NAME).lock, LLL_PRIVATE); \
+ if (!SINGLE_THREAD_P) \
+ lll_unlock ((NAME).lock, LLL_PRIVATE); \
} \
} while (0)
#else