[12/16] libdw: Make libdw_find_split_unit thread-safe
Commit Message
From: Heather McIntyre <hsm2@rice.edu>
* (try_split_file): Use eu_tsearch.
Add lock for cu->split.
Signed-off-by: Heather S. McIntyre <hsm2@rice.edu>
Signed-off-by: Mark Wielaard <mark@klomp.org>
---
libdw/libdw_find_split_unit.c | 43 +++++++++++++++++++++++++++++------
1 file changed, 36 insertions(+), 7 deletions(-)
Comments
Hi Heather,
On Tue, Oct 10, 2023 at 03:42:56PM +0200, Mark Wielaard wrote:
> From: Heather McIntyre <hsm2@rice.edu>
>
> * (try_split_file): Use eu_tsearch.
> Add lock for cu->split.
>
> Signed-off-by: Heather S. McIntyre <hsm2@rice.edu>
> Signed-off-by: Mark Wielaard <mark@klomp.org>
> ---
> libdw/libdw_find_split_unit.c | 43 +++++++++++++++++++++++++++++------
> 1 file changed, 36 insertions(+), 7 deletions(-)
>
> diff --git a/libdw/libdw_find_split_unit.c b/libdw/libdw_find_split_unit.c
> index 533f8072..a288e9bd 100644
> --- a/libdw/libdw_find_split_unit.c
> +++ b/libdw/libdw_find_split_unit.c
> @@ -34,13 +34,19 @@
> #include "libelfP.h"
>
> #include <limits.h>
> -#include <search.h>
> +#include <eu-search.h>
> #include <stdlib.h>
> #include <string.h>
> #include <sys/types.h>
> #include <sys/stat.h>
> #include <fcntl.h>
>
> +/* __libdw_link_skel_split() modifies "skel->split = split"
> + and "split->split = skel".
> + "cu->split" is read at multiple locations and conditionally updated.
> + Mutual exclusion is enforced to prevent a race. */
> +rwlock_define(static, cu_split_lock);
__libdw_link_skel_split is defined in libdw/libdwP.h and is called in
try_split_file. (It is also called in src/readelf.c but that is a
terrible hack, so ignore that for now.)
A Dwarf_CU is created (see libdw/libdw_findcu.c) with split set to
(Dwarf_CU *) -1 to indicate the split dwarf is not yet set. If
cu->split is NULL it means the split dwarf was searched for, but not
found.
> static void
> try_split_file (Dwarf_CU *cu, const char *dwo_path)
> {
> @@ -57,7 +63,7 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path)
> if (split->unit_type == DW_UT_split_compile
> && cu->unit_id8 == split->unit_id8)
> {
> - if (tsearch (split->dbg, &cu->dbg->split_tree,
> + if (eu_tsearch (split->dbg, &cu->dbg->split_tree,
> __libdw_finddbg_cb) == NULL)
> {
> /* Something went wrong. Don't link. */
> @@ -65,9 +71,13 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path)
> break;
> }
>
> + rwlock_wrlock(cu_split_lock);
> +
> /* Link skeleton and split compile units. */
> __libdw_link_skel_split (cu, split);
>
> + rwlock_unlock(cu_split_lock);
> +
Is this locking wide enough? It seems like multiple threads can race
to this code and set different Dwarfs (which means they'll leak). See
below.
> /* We have everything we need from this ELF
> file. And we are going to close the fd to
> not run out of file descriptors. */
> @@ -75,8 +85,13 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path)
> break;
> }
> }
> +
> + rwlock_rdlock(cu_split_lock);
> +
> if (cu->split == (Dwarf_CU *) -1)
> dwarf_end (split_dwarf);
> +
> + rwlock_unlock(cu_split_lock);
This means __libdw_link_skel_split wasn't called above (so the
split_dwarf created by dwarf_begin didn't contain the expected CU).
> }
> /* Always close, because we don't want to run out of file
> descriptors. See also the elf_fcntl ELF_C_FDDONE call
> @@ -89,9 +104,13 @@ Dwarf_CU *
> internal_function
> __libdw_find_split_unit (Dwarf_CU *cu)
> {
> + rwlock_rdlock(cu_split_lock);
> + Dwarf_CU *cu_split_local = cu->split;
> + rwlock_unlock(cu_split_lock);
> +
> /* Only try once. */
> - if (cu->split != (Dwarf_CU *) -1)
> - return cu->split;
> + if (cu_split_local != (Dwarf_CU *) -1)
> + return cu_split_local;
>
> /* We need a skeleton unit with a comp_dir and [GNU_]dwo_name attributes.
> The split unit will be the first in the dwo file and should have the
> @@ -116,7 +135,11 @@ __libdw_find_split_unit (Dwarf_CU *cu)
> free (dwo_path);
> }
>
> - if (cu->split == (Dwarf_CU *) -1)
> + rwlock_rdlock(cu_split_lock);
> + cu_split_local = cu->split;
> + rwlock_unlock(cu_split_lock);
> +
> + if (cu_split_local == (Dwarf_CU *) -1)
> {
> /* Try compdir plus dwo_name. */
> Dwarf_Attribute compdir;
It looks like multiple threads can end up here after having seen
cu->split/cu_split_local == (Dwarf_CU *) -1) Which means multiple
threads will enter try_split_file and might all call
__libdw_link_skel_split as mentioned above.
> @@ -138,9 +161,15 @@ __libdw_find_split_unit (Dwarf_CU *cu)
> }
> }
>
> + rwlock_wrlock(cu_split_lock);
> +
> /* If we found nothing, make sure we don't try again. */
> if (cu->split == (Dwarf_CU *) -1)
> - cu->split = NULL;
> + {
> + cu->split = NULL;
> + cu_split_local = cu->split;
> + }
>
> - return cu->split;
> + rwlock_unlock(cu_split_lock);
> + return cu_split_local;
> }
I think it would be better to keep the lock during the whole
__libdw_find_split_unit call.
Cheers,
Mark
Hi Mark,
As per your suggestion, I have placed the lock around the entire
__libdw_find_split_unit function call.
On Wed, Oct 11, 2023 at 12:17 PM Mark Wielaard <mark@klomp.org> wrote:
> Hi Heather,
>
> On Tue, Oct 10, 2023 at 03:42:56PM +0200, Mark Wielaard wrote:
> > From: Heather McIntyre <hsm2@rice.edu>
> >
> > * (try_split_file): Use eu_tsearch.
> > Add lock for cu->split.
> >
> > Signed-off-by: Heather S. McIntyre <hsm2@rice.edu>
> > Signed-off-by: Mark Wielaard <mark@klomp.org>
> > ---
> > libdw/libdw_find_split_unit.c | 43 +++++++++++++++++++++++++++++------
> > 1 file changed, 36 insertions(+), 7 deletions(-)
> >
> > diff --git a/libdw/libdw_find_split_unit.c
> b/libdw/libdw_find_split_unit.c
> > index 533f8072..a288e9bd 100644
> > --- a/libdw/libdw_find_split_unit.c
> > +++ b/libdw/libdw_find_split_unit.c
> > @@ -34,13 +34,19 @@
> > #include "libelfP.h"
> >
> > #include <limits.h>
> > -#include <search.h>
> > +#include <eu-search.h>
> > #include <stdlib.h>
> > #include <string.h>
> > #include <sys/types.h>
> > #include <sys/stat.h>
> > #include <fcntl.h>
> >
> > +/* __libdw_link_skel_split() modifies "skel->split = split"
> > + and "split->split = skel".
> > + "cu->split" is read at multiple locations and conditionally updated.
> > + Mutual exclusion is enforced to prevent a race. */
> > +rwlock_define(static, cu_split_lock);
>
> __libdw_link_skel_split is defined in libdw/libdwP.h and is called in
> try_split_file. (It is also called in src/readelf.c but that is a
> terrible hack, so ignore that for now.)
>
> A Dwarf_CU is created (see libdw/libdw_findcu.c) with split set to
> (Dwarf_CU *) -1 to indicate the split dwarf is not yet set. If
> cu->split is NULL it means the split dwarf was searched for, but not
> found.
>
> > static void
> > try_split_file (Dwarf_CU *cu, const char *dwo_path)
> > {
> > @@ -57,7 +63,7 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path)
> > if (split->unit_type == DW_UT_split_compile
> > && cu->unit_id8 == split->unit_id8)
> > {
> > - if (tsearch (split->dbg, &cu->dbg->split_tree,
> > + if (eu_tsearch (split->dbg, &cu->dbg->split_tree,
> > __libdw_finddbg_cb) == NULL)
> > {
> > /* Something went wrong. Don't link. */
> > @@ -65,9 +71,13 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path)
> > break;
> > }
> >
> > + rwlock_wrlock(cu_split_lock);
> > +
> > /* Link skeleton and split compile units. */
> > __libdw_link_skel_split (cu, split);
> >
> > + rwlock_unlock(cu_split_lock);
> > +
>
> Is this locking wide enough? It seems like multiple threads can race
> to this code and set different Dwarfs (which means they'll leak). See
> below.
>
> > /* We have everything we need from this ELF
> > file. And we are going to close the fd to
> > not run out of file descriptors. */
> > @@ -75,8 +85,13 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path)
> > break;
> > }
> > }
> > +
> > + rwlock_rdlock(cu_split_lock);
> > +
> > if (cu->split == (Dwarf_CU *) -1)
> > dwarf_end (split_dwarf);
> > +
> > + rwlock_unlock(cu_split_lock);
>
> This means __libdw_link_skel_split wasn't called above (so the
> split_dwarf created by dwarf_begin didn't contain the expected CU).
>
> > }
> > /* Always close, because we don't want to run out of file
> > descriptors. See also the elf_fcntl ELF_C_FDDONE call
> > @@ -89,9 +104,13 @@ Dwarf_CU *
> > internal_function
> > __libdw_find_split_unit (Dwarf_CU *cu)
> > {
> > + rwlock_rdlock(cu_split_lock);
> > + Dwarf_CU *cu_split_local = cu->split;
> > + rwlock_unlock(cu_split_lock);
> > +
> > /* Only try once. */
> > - if (cu->split != (Dwarf_CU *) -1)
> > - return cu->split;
> > + if (cu_split_local != (Dwarf_CU *) -1)
> > + return cu_split_local;
> >
> > /* We need a skeleton unit with a comp_dir and [GNU_]dwo_name
> attributes.
> > The split unit will be the first in the dwo file and should have
> the
> > @@ -116,7 +135,11 @@ __libdw_find_split_unit (Dwarf_CU *cu)
> > free (dwo_path);
> > }
> >
> > - if (cu->split == (Dwarf_CU *) -1)
> > + rwlock_rdlock(cu_split_lock);
> > + cu_split_local = cu->split;
> > + rwlock_unlock(cu_split_lock);
> > +
> > + if (cu_split_local == (Dwarf_CU *) -1)
> > {
> > /* Try compdir plus dwo_name. */
> > Dwarf_Attribute compdir;
>
> It looks like multiple threads can end up here after having seen
> cu->split/cu_split_local == (Dwarf_CU *) -1) Which means multiple
> threads will enter try_split_file and might all call
> __libdw_link_skel_split as mentioned above.
>
> > @@ -138,9 +161,15 @@ __libdw_find_split_unit (Dwarf_CU *cu)
> > }
> > }
> >
> > + rwlock_wrlock(cu_split_lock);
> > +
> > /* If we found nothing, make sure we don't try again. */
> > if (cu->split == (Dwarf_CU *) -1)
> > - cu->split = NULL;
> > + {
> > + cu->split = NULL;
> > + cu_split_local = cu->split;
> > + }
> >
> > - return cu->split;
> > + rwlock_unlock(cu_split_lock);
> > + return cu_split_local;
> > }
>
> I think it would be better to keep the lock during the whole
> __libdw_find_split_unit call.
>
> Cheers,
>
> Mark
>
@@ -34,13 +34,19 @@
#include "libelfP.h"
#include <limits.h>
-#include <search.h>
+#include <eu-search.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
+/* __libdw_link_skel_split() modifies "skel->split = split"
+ and "split->split = skel".
+ "cu->split" is read at multiple locations and conditionally updated.
+ Mutual exclusion is enforced to prevent a race. */
+rwlock_define(static, cu_split_lock);
+
static void
try_split_file (Dwarf_CU *cu, const char *dwo_path)
{
@@ -57,7 +63,7 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path)
if (split->unit_type == DW_UT_split_compile
&& cu->unit_id8 == split->unit_id8)
{
- if (tsearch (split->dbg, &cu->dbg->split_tree,
+ if (eu_tsearch (split->dbg, &cu->dbg->split_tree,
__libdw_finddbg_cb) == NULL)
{
/* Something went wrong. Don't link. */
@@ -65,9 +71,13 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path)
break;
}
+ rwlock_wrlock(cu_split_lock);
+
/* Link skeleton and split compile units. */
__libdw_link_skel_split (cu, split);
+ rwlock_unlock(cu_split_lock);
+
/* We have everything we need from this ELF
file. And we are going to close the fd to
not run out of file descriptors. */
@@ -75,8 +85,13 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path)
break;
}
}
+
+ rwlock_rdlock(cu_split_lock);
+
if (cu->split == (Dwarf_CU *) -1)
dwarf_end (split_dwarf);
+
+ rwlock_unlock(cu_split_lock);
}
/* Always close, because we don't want to run out of file
descriptors. See also the elf_fcntl ELF_C_FDDONE call
@@ -89,9 +104,13 @@ Dwarf_CU *
internal_function
__libdw_find_split_unit (Dwarf_CU *cu)
{
+ rwlock_rdlock(cu_split_lock);
+ Dwarf_CU *cu_split_local = cu->split;
+ rwlock_unlock(cu_split_lock);
+
/* Only try once. */
- if (cu->split != (Dwarf_CU *) -1)
- return cu->split;
+ if (cu_split_local != (Dwarf_CU *) -1)
+ return cu_split_local;
/* We need a skeleton unit with a comp_dir and [GNU_]dwo_name attributes.
The split unit will be the first in the dwo file and should have the
@@ -116,7 +135,11 @@ __libdw_find_split_unit (Dwarf_CU *cu)
free (dwo_path);
}
- if (cu->split == (Dwarf_CU *) -1)
+ rwlock_rdlock(cu_split_lock);
+ cu_split_local = cu->split;
+ rwlock_unlock(cu_split_lock);
+
+ if (cu_split_local == (Dwarf_CU *) -1)
{
/* Try compdir plus dwo_name. */
Dwarf_Attribute compdir;
@@ -138,9 +161,15 @@ __libdw_find_split_unit (Dwarf_CU *cu)
}
}
+ rwlock_wrlock(cu_split_lock);
+
/* If we found nothing, make sure we don't try again. */
if (cu->split == (Dwarf_CU *) -1)
- cu->split = NULL;
+ {
+ cu->split = NULL;
+ cu_split_local = cu->split;
+ }
- return cu->split;
+ rwlock_unlock(cu_split_lock);
+ return cu_split_local;
}