[RFC] Fix: intl: use nestable locking for reentrancy

Message ID 20210812192459.22129-1-mathieu.desnoyers@efficios.com
State RFC, archived
Headers
Series [RFC] Fix: intl: use nestable locking for reentrancy |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent
dj/TryBot-32bit success Build for i686

Commit Message

Mathieu Desnoyers Aug. 12, 2021, 7:24 p.m. UTC
  malloc interposers calling failing dl API (e.g. dlopen/dlsym) end up
calling i18n translation. It can corrupt the i18n locking state when
malloc is called internally from within i18n code with i18n locking
held. This is an issue for libasan in gcc 8, 9, 10, 11.

This patch applies on top of glibc 2.31.

This rather crude patch is provided as RFC only: I'm not sure whether we
want to convert all intl locks to recursive locks, and whether a non-rw
recursive lock is appropriate.

Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90589
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
 intl/bindtextdom.c | 6 +++---
 intl/dcigettext.c  | 8 ++++----
 intl/finddomain.c  | 8 ++++----
 intl/gettextP.h    | 2 +-
 intl/loadmsgcat.c  | 6 +++---
 intl/textdomain.c  | 6 +++---
 6 files changed, 18 insertions(+), 18 deletions(-)
  

Comments

Mathieu Desnoyers Aug. 19, 2021, 3:10 p.m. UTC | #1
----- On Aug 12, 2021, at 3:24 PM, Mathieu Desnoyers mathieu.desnoyers@efficios.com wrote:

> malloc interposers calling failing dl API (e.g. dlopen/dlsym) end up
> calling i18n translation. It can corrupt the i18n locking state when
> malloc is called internally from within i18n code with i18n locking
> held. This is an issue for libasan in gcc 8, 9, 10, 11.
> 
> This patch applies on top of glibc 2.31.
> 
> This rather crude patch is provided as RFC only: I'm not sure whether we
> want to convert all intl locks to recursive locks, and whether a non-rw
> recursive lock is appropriate.

Hi Carlos, Florian,

Before this issue falls off from my backlog pile, is this patch OK as is
as a glibc fix, in which case I could re-send it without the "RFC" tag,
or should I consider making changes either to this patch or to a patch
on top ? For instance, it's a bit weird to keep the "gl_rwlock_" macro
names mapped to non-rw recursive locks. However, renaming all "gl_rwlock_*"
macros to "gl_lock_*_recursive" implies more code churn, which might not
be welcome in a minimal fix.

Thanks,

Mathieu

> 
> Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90589
> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> ---
> intl/bindtextdom.c | 6 +++---
> intl/dcigettext.c  | 8 ++++----
> intl/finddomain.c  | 8 ++++----
> intl/gettextP.h    | 2 +-
> intl/loadmsgcat.c  | 6 +++---
> intl/textdomain.c  | 6 +++---
> 6 files changed, 18 insertions(+), 18 deletions(-)
> 
> diff --git a/intl/bindtextdom.c b/intl/bindtextdom.c
> index 964d9012a1..06d1ea38d9 100644
> --- a/intl/bindtextdom.c
> +++ b/intl/bindtextdom.c
> @@ -32,9 +32,9 @@
> /* Handle multi-threaded applications.  */
> #ifdef _LIBC
> # include <libc-lock.h>
> -# define gl_rwlock_define __libc_rwlock_define
> -# define gl_rwlock_wrlock __libc_rwlock_wrlock
> -# define gl_rwlock_unlock __libc_rwlock_unlock
> +# define gl_rwlock_rdlock __libc_lock_lock_recursive
> +# define gl_rwlock_wrlock __libc_lock_lock_recursive
> +# define gl_rwlock_unlock __libc_lock_unlock_recursive
> #else
> # include "lock.h"
> #endif
> diff --git a/intl/dcigettext.c b/intl/dcigettext.c
> index bd332e71da..24e07008e9 100644
> --- a/intl/dcigettext.c
> +++ b/intl/dcigettext.c
> @@ -105,10 +105,10 @@ extern int errno;
> /* Handle multi-threaded applications.  */
> #ifdef _LIBC
> # include <libc-lock.h>
> -# define gl_rwlock_define_initialized __libc_rwlock_define_initialized
> -# define gl_rwlock_rdlock __libc_rwlock_rdlock
> -# define gl_rwlock_wrlock __libc_rwlock_wrlock
> -# define gl_rwlock_unlock __libc_rwlock_unlock
> +# define gl_rwlock_define_initialized __libc_lock_define_initialized_recursive
> +# define gl_rwlock_rdlock __libc_lock_lock_recursive
> +# define gl_rwlock_wrlock __libc_lock_lock_recursive
> +# define gl_rwlock_unlock __libc_lock_unlock_recursive
> #else
> # include "lock.h"
> #endif
> diff --git a/intl/finddomain.c b/intl/finddomain.c
> index f88cb89ba0..3fb96ea17f 100644
> --- a/intl/finddomain.c
> +++ b/intl/finddomain.c
> @@ -38,10 +38,10 @@
> /* Handle multi-threaded applications.  */
> #ifdef _LIBC
> # include <libc-lock.h>
> -# define gl_rwlock_define_initialized __libc_rwlock_define_initialized
> -# define gl_rwlock_rdlock __libc_rwlock_rdlock
> -# define gl_rwlock_wrlock __libc_rwlock_wrlock
> -# define gl_rwlock_unlock __libc_rwlock_unlock
> +# define gl_rwlock_define_initialized __libc_lock_define_initialized_recursive
> +# define gl_rwlock_rdlock __libc_lock_lock_recursive
> +# define gl_rwlock_wrlock __libc_lock_lock_recursive
> +# define gl_rwlock_unlock __libc_lock_unlock_recursive
> #else
> # include "lock.h"
> #endif
> diff --git a/intl/gettextP.h b/intl/gettextP.h
> index 5faee93bcc..bc81affb40 100644
> --- a/intl/gettextP.h
> +++ b/intl/gettextP.h
> @@ -31,7 +31,7 @@
> /* Handle multi-threaded applications.  */
> #ifdef _LIBC
> # include <libc-lock.h>
> -# define gl_rwlock_define __libc_rwlock_define
> +# define gl_rwlock_define __libc_lock_define_recursive
> #else
> # include "lock.h"
> #endif
> diff --git a/intl/loadmsgcat.c b/intl/loadmsgcat.c
> index 91c1ef156a..257c910d6d 100644
> --- a/intl/loadmsgcat.c
> +++ b/intl/loadmsgcat.c
> @@ -1250,7 +1250,7 @@ _nl_load_domain (struct loaded_l10nfile *domain_file,
>   domain->conversions = NULL;
>   domain->nconversions = 0;
> #ifdef _LIBC
> -  __libc_rwlock_init (domain->conversions_lock);
> +  __libc_lock_init_recursive (domain->conversions_lock);
> #else
>   gl_rwlock_init (domain->conversions_lock);
> #endif
> @@ -1265,7 +1265,7 @@ _nl_load_domain (struct loaded_l10nfile *domain_file,
>   if (__builtin_expect (nullentry == (char *) -1, 0))
>     {
> #ifdef _LIBC
> -      __libc_rwlock_fini (domain->conversions_lock);
> +      __libc_lock_fini_recursive (domain->conversions_lock);
> #endif
>       goto invalid;
>     }
> @@ -1303,7 +1303,7 @@ _nl_unload_domain (struct loaded_domain *domain)
> 	__gconv_close (convd->conv);
>     }
>   free (domain->conversions);
> -  __libc_rwlock_fini (domain->conversions_lock);
> +  __libc_lock_fini_recursive (domain->conversions_lock);
> 
>   free (domain->malloced);
> 
> diff --git a/intl/textdomain.c b/intl/textdomain.c
> index b0b67230aa..038fe6d0df 100644
> --- a/intl/textdomain.c
> +++ b/intl/textdomain.c
> @@ -31,9 +31,9 @@
> /* Handle multi-threaded applications.  */
> #ifdef _LIBC
> # include <libc-lock.h>
> -# define gl_rwlock_define __libc_rwlock_define
> -# define gl_rwlock_wrlock __libc_rwlock_wrlock
> -# define gl_rwlock_unlock __libc_rwlock_unlock
> +# define gl_rwlock_rdlock __libc_lock_lock_recursive
> +# define gl_rwlock_wrlock __libc_lock_lock_recursive
> +# define gl_rwlock_unlock __libc_lock_unlock_recursive
> #else
> # include "lock.h"
> #endif
> --
> 2.17.1
  
Florian Weimer Aug. 19, 2021, 4:11 p.m. UTC | #2
* Mathieu Desnoyers via Libc-alpha:

> ----- On Aug 12, 2021, at 3:24 PM, Mathieu Desnoyers mathieu.desnoyers@efficios.com wrote:
>
>> malloc interposers calling failing dl API (e.g. dlopen/dlsym) end up
>> calling i18n translation. It can corrupt the i18n locking state when
>> malloc is called internally from within i18n code with i18n locking
>> held. This is an issue for libasan in gcc 8, 9, 10, 11.
>> 
>> This patch applies on top of glibc 2.31.
>> 
>> This rather crude patch is provided as RFC only: I'm not sure whether we
>> want to convert all intl locks to recursive locks, and whether a non-rw
>> recursive lock is appropriate.
>
> Hi Carlos, Florian,
>
> Before this issue falls off from my backlog pile, is this patch OK as is
> as a glibc fix, in which case I could re-send it without the "RFC" tag,
> or should I consider making changes either to this patch or to a patch
> on top ? For instance, it's a bit weird to keep the "gl_rwlock_" macro
> names mapped to non-rw recursive locks. However, renaming all "gl_rwlock_*"
> macros to "gl_lock_*_recursive" implies more code churn, which might not
> be welcome in a minimal fix.

I have yet to see a case where a recursive lock reliably avoids
deadlocks in a callback-based interface.  The self-deadlock with a
single thread is gone, but more complex patterns of deadlocks remain.
The loader lock is particularly prone to this, and I think it would be
involved here as well.

Thanks,
Florian
  
Mathieu Desnoyers Aug. 19, 2021, 7:35 p.m. UTC | #3
----- On Aug 19, 2021, at 12:11 PM, Florian Weimer fweimer@redhat.com wrote:

> * Mathieu Desnoyers via Libc-alpha:
> 
>> ----- On Aug 12, 2021, at 3:24 PM, Mathieu Desnoyers
>> mathieu.desnoyers@efficios.com wrote:
>>
>>> malloc interposers calling failing dl API (e.g. dlopen/dlsym) end up
>>> calling i18n translation. It can corrupt the i18n locking state when
>>> malloc is called internally from within i18n code with i18n locking
>>> held. This is an issue for libasan in gcc 8, 9, 10, 11.
>>> 
>>> This patch applies on top of glibc 2.31.
>>> 
>>> This rather crude patch is provided as RFC only: I'm not sure whether we
>>> want to convert all intl locks to recursive locks, and whether a non-rw
>>> recursive lock is appropriate.
>>
>> Hi Carlos, Florian,
>>
>> Before this issue falls off from my backlog pile, is this patch OK as is
>> as a glibc fix, in which case I could re-send it without the "RFC" tag,
>> or should I consider making changes either to this patch or to a patch
>> on top ? For instance, it's a bit weird to keep the "gl_rwlock_" macro
>> names mapped to non-rw recursive locks. However, renaming all "gl_rwlock_*"
>> macros to "gl_lock_*_recursive" implies more code churn, which might not
>> be welcome in a minimal fix.
> 
> I have yet to see a case where a recursive lock reliably avoids
> deadlocks in a callback-based interface.  The self-deadlock with a
> single thread is gone, but more complex patterns of deadlocks remain.

Agreed.

> The loader lock is particularly prone to this, and I think it would be
> involved here as well.

What makes you think the loader lock is involved here ?

The specific issue I am trying to solve is caused by i18n textdomain code using
malloc with i18n locks held. When a malloc interposer library ends up failing on
dlsym, the call to __dcgettext takes the lock recursively.

There are of course many possible solutions to this issue, some requiring rather
more work than others. A few ideas that come to mind:

1) Redesign the i18n textdomain code to move all memory allocation outside of the
   i18n lock,

2) Redesign textdomain/gettext data structures to allow RCU updates/lookups, and
   introduce RCU synchronization into glibc.

3) Modify __dcgettext to detect when it is used recursively with the textdomain
   code through a TLS variable. When this is detected, skip the lookup entirely
   and return the input string. (this is what I do in my workaround .so)

4) Use a recursive lock rather than non-recursive RW lock (proposed patch). It has
   some downsides though: it requires that all interposed functions (e.g. malloc/free...)
   are placed in the textdomain algorithm in code locations where the protected data
   structures are in a consistent state for lookups.

As you point out, recursive locks is not such a good design, because their recursive
nature do not propagate across a lock dependency chain. Therefore, as soon as those locks are
nested with other locks, deadlocks still happen, but they are harder to reproduce, which is
quite bad.

Given that this is an issue that occurs in the field today, I suspect we may want to have
two distinct discussions: what should we do for a fix which can be shipped in existing
distributions, and what should we do moving forward with the next glibc release ?

Thanks,

Mathieu
  
Florian Weimer Aug. 23, 2021, 8:56 a.m. UTC | #4
* Mathieu Desnoyers:

>> The loader lock is particularly prone to this, and I think it would be
>> involved here as well.
>
> What makes you think the loader lock is involved here ?

You mentioned dlopen.  I expect the loader locks are involved when the
i18n code is called for the dlopen/dlerror message translation.

> The specific issue I am trying to solve is caused by i18n textdomain
> code using malloc with i18n locks held. When a malloc interposer
> library ends up failing on dlsym, the call to __dcgettext takes the
> lock recursively.
>
> There are of course many possible solutions to this issue, some
> requiring rather more work than others. A few ideas that come to mind:

We could also add a variant of dlsym that doesn't translate (or even
construct) the error message.  That might have other benefits, too, like
never needing malloc in the first place.  Without something like that,
calling dlsym from malloc will never work reliably.

I still think the i18n code involvement is a bit of a red herring here,
and the core issue is something else.

Thanks,
Florian
  
Mathieu Desnoyers Aug. 30, 2021, 2:36 p.m. UTC | #5
----- On Aug 23, 2021, at 4:56 AM, Florian Weimer fweimer@redhat.com wrote:

> * Mathieu Desnoyers:
> 
>>> The loader lock is particularly prone to this, and I think it would be
>>> involved here as well.
>>
>> What makes you think the loader lock is involved here ?
> 
> You mentioned dlopen.  I expect the loader locks are involved when the
> i18n code is called for the dlopen/dlerror message translation.

Indeed, but aren't those already recursive locks ?

> 
>> The specific issue I am trying to solve is caused by i18n textdomain
>> code using malloc with i18n locks held. When a malloc interposer
>> library ends up failing on dlsym, the call to __dcgettext takes the
>> lock recursively.
>>
>> There are of course many possible solutions to this issue, some
>> requiring rather more work than others. A few ideas that come to mind:
> 
> We could also add a variant of dlsym that doesn't translate (or even
> construct) the error message.  That might have other benefits, too, like
> never needing malloc in the first place.  Without something like that,
> calling dlsym from malloc will never work reliably.
> 
> I still think the i18n code involvement is a bit of a red herring here,
> and the core issue is something else.

I've faced similar issues in LTTng-UST. I have considered implementing my
own in-library memory allocator to minimize the amount of dependencies on
glibc's memory allocator, to minimize the coupling with atfork handling
and symbol interposition.

From a glibc perspective, there are a few relevant questions:

1) When glibc internally uses the memory allocator, should this use be
   interposable by an external library ?

2) What glibc functions are allowed to be used from the interposed
   functions ? And what other functions are internally used by those
   functions ?

Depending on the answer to those questions, we may then want to discuss
whether glibc should implement a separate memory allocator for its internal
use, or if we should instead make sure that reentrancy is guaranteed for
all interposition scenarios.

As you are proposing, one possible approach would be to extend glibc's API
to provide new symbols designed for interposition reentrancy. This would
solve the problem for future glibc, but not for the existing glibc versions.

If the intent is that the existing glibc i18n and dynamic loader API should
be reentrant for interposition scenarios, we may want to detect those reentrant
uses with a IE TLS variable, and use a simpler fallback in those situations.
This would have the benefit of fixing existing library versions, but the downside
is tracking an additional nesting counter on the common path. Another possible
approach to allow reentrancy is the nestable locking as I proposed in this patch,
with its known downsides wrt nesting with other locks.

Thanks,

Mathieu
  

Patch

diff --git a/intl/bindtextdom.c b/intl/bindtextdom.c
index 964d9012a1..06d1ea38d9 100644
--- a/intl/bindtextdom.c
+++ b/intl/bindtextdom.c
@@ -32,9 +32,9 @@ 
 /* Handle multi-threaded applications.  */
 #ifdef _LIBC
 # include <libc-lock.h>
-# define gl_rwlock_define __libc_rwlock_define
-# define gl_rwlock_wrlock __libc_rwlock_wrlock
-# define gl_rwlock_unlock __libc_rwlock_unlock
+# define gl_rwlock_rdlock __libc_lock_lock_recursive
+# define gl_rwlock_wrlock __libc_lock_lock_recursive
+# define gl_rwlock_unlock __libc_lock_unlock_recursive
 #else
 # include "lock.h"
 #endif
diff --git a/intl/dcigettext.c b/intl/dcigettext.c
index bd332e71da..24e07008e9 100644
--- a/intl/dcigettext.c
+++ b/intl/dcigettext.c
@@ -105,10 +105,10 @@  extern int errno;
 /* Handle multi-threaded applications.  */
 #ifdef _LIBC
 # include <libc-lock.h>
-# define gl_rwlock_define_initialized __libc_rwlock_define_initialized
-# define gl_rwlock_rdlock __libc_rwlock_rdlock
-# define gl_rwlock_wrlock __libc_rwlock_wrlock
-# define gl_rwlock_unlock __libc_rwlock_unlock
+# define gl_rwlock_define_initialized __libc_lock_define_initialized_recursive
+# define gl_rwlock_rdlock __libc_lock_lock_recursive
+# define gl_rwlock_wrlock __libc_lock_lock_recursive
+# define gl_rwlock_unlock __libc_lock_unlock_recursive
 #else
 # include "lock.h"
 #endif
diff --git a/intl/finddomain.c b/intl/finddomain.c
index f88cb89ba0..3fb96ea17f 100644
--- a/intl/finddomain.c
+++ b/intl/finddomain.c
@@ -38,10 +38,10 @@ 
 /* Handle multi-threaded applications.  */
 #ifdef _LIBC
 # include <libc-lock.h>
-# define gl_rwlock_define_initialized __libc_rwlock_define_initialized
-# define gl_rwlock_rdlock __libc_rwlock_rdlock
-# define gl_rwlock_wrlock __libc_rwlock_wrlock
-# define gl_rwlock_unlock __libc_rwlock_unlock
+# define gl_rwlock_define_initialized __libc_lock_define_initialized_recursive
+# define gl_rwlock_rdlock __libc_lock_lock_recursive
+# define gl_rwlock_wrlock __libc_lock_lock_recursive
+# define gl_rwlock_unlock __libc_lock_unlock_recursive
 #else
 # include "lock.h"
 #endif
diff --git a/intl/gettextP.h b/intl/gettextP.h
index 5faee93bcc..bc81affb40 100644
--- a/intl/gettextP.h
+++ b/intl/gettextP.h
@@ -31,7 +31,7 @@ 
 /* Handle multi-threaded applications.  */
 #ifdef _LIBC
 # include <libc-lock.h>
-# define gl_rwlock_define __libc_rwlock_define
+# define gl_rwlock_define __libc_lock_define_recursive
 #else
 # include "lock.h"
 #endif
diff --git a/intl/loadmsgcat.c b/intl/loadmsgcat.c
index 91c1ef156a..257c910d6d 100644
--- a/intl/loadmsgcat.c
+++ b/intl/loadmsgcat.c
@@ -1250,7 +1250,7 @@  _nl_load_domain (struct loaded_l10nfile *domain_file,
   domain->conversions = NULL;
   domain->nconversions = 0;
 #ifdef _LIBC
-  __libc_rwlock_init (domain->conversions_lock);
+  __libc_lock_init_recursive (domain->conversions_lock);
 #else
   gl_rwlock_init (domain->conversions_lock);
 #endif
@@ -1265,7 +1265,7 @@  _nl_load_domain (struct loaded_l10nfile *domain_file,
   if (__builtin_expect (nullentry == (char *) -1, 0))
     {
 #ifdef _LIBC
-      __libc_rwlock_fini (domain->conversions_lock);
+      __libc_lock_fini_recursive (domain->conversions_lock);
 #endif
       goto invalid;
     }
@@ -1303,7 +1303,7 @@  _nl_unload_domain (struct loaded_domain *domain)
 	__gconv_close (convd->conv);
     }
   free (domain->conversions);
-  __libc_rwlock_fini (domain->conversions_lock);
+  __libc_lock_fini_recursive (domain->conversions_lock);
 
   free (domain->malloced);
 
diff --git a/intl/textdomain.c b/intl/textdomain.c
index b0b67230aa..038fe6d0df 100644
--- a/intl/textdomain.c
+++ b/intl/textdomain.c
@@ -31,9 +31,9 @@ 
 /* Handle multi-threaded applications.  */
 #ifdef _LIBC
 # include <libc-lock.h>
-# define gl_rwlock_define __libc_rwlock_define
-# define gl_rwlock_wrlock __libc_rwlock_wrlock
-# define gl_rwlock_unlock __libc_rwlock_unlock
+# define gl_rwlock_rdlock __libc_lock_lock_recursive
+# define gl_rwlock_wrlock __libc_lock_lock_recursive
+# define gl_rwlock_unlock __libc_lock_unlock_recursive
 #else
 # include "lock.h"
 #endif