[12/16] libdw: Make libdw_find_split_unit thread-safe

Message ID 20231010134300.53830-12-mark@klomp.org
State Changes Requested
Headers
Series [01/16] lib: Add new once_define and once macros to eu-config.h |

Commit Message

Mark Wielaard Oct. 10, 2023, 1:42 p.m. UTC
  From: Heather McIntyre <hsm2@rice.edu>

	* (try_split_file): Use eu_tsearch.
	Add lock for cu->split.

Signed-off-by: Heather S. McIntyre <hsm2@rice.edu>
Signed-off-by: Mark Wielaard <mark@klomp.org>
---
 libdw/libdw_find_split_unit.c | 43 +++++++++++++++++++++++++++++------
 1 file changed, 36 insertions(+), 7 deletions(-)
  

Comments

Mark Wielaard Oct. 11, 2023, 5:17 p.m. UTC | #1
Hi Heather,

On Tue, Oct 10, 2023 at 03:42:56PM +0200, Mark Wielaard wrote:
> From: Heather McIntyre <hsm2@rice.edu>
> 
> 	* (try_split_file): Use eu_tsearch.
> 	Add lock for cu->split.
> 
> Signed-off-by: Heather S. McIntyre <hsm2@rice.edu>
> Signed-off-by: Mark Wielaard <mark@klomp.org>
> ---
>  libdw/libdw_find_split_unit.c | 43 +++++++++++++++++++++++++++++------
>  1 file changed, 36 insertions(+), 7 deletions(-)
> 
> diff --git a/libdw/libdw_find_split_unit.c b/libdw/libdw_find_split_unit.c
> index 533f8072..a288e9bd 100644
> --- a/libdw/libdw_find_split_unit.c
> +++ b/libdw/libdw_find_split_unit.c
> @@ -34,13 +34,19 @@
>  #include "libelfP.h"
>  
>  #include <limits.h>
> -#include <search.h>
> +#include <eu-search.h>
>  #include <stdlib.h>
>  #include <string.h>
>  #include <sys/types.h>
>  #include <sys/stat.h>
>  #include <fcntl.h>
>  
> +/* __libdw_link_skel_split() modifies "skel->split = split"
> +   and "split->split = skel".
> +   "cu->split" is read at multiple locations and conditionally updated.
> +   Mutual exclusion is enforced to prevent a race. */
> +rwlock_define(static, cu_split_lock);

__libdw_link_skel_split is defined in libdw/libdwP.h and is called in
try_split_file. (It is also called in src/readelf.c but that is a
terrible hack, so ignore that for now.)

A Dwarf_CU is created (see libdw/libdw_findcu.c) with split set to
(Dwarf_CU *) -1 to indicate the split dwarf is not yet set. If
cu->split is NULL it means the split dwarf was searched for, but not
found.

>  static void
>  try_split_file (Dwarf_CU *cu, const char *dwo_path)
>  {
> @@ -57,7 +63,7 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path)
>  	      if (split->unit_type == DW_UT_split_compile
>  		  && cu->unit_id8 == split->unit_id8)
>  		{
> -		  if (tsearch (split->dbg, &cu->dbg->split_tree,
> +		  if (eu_tsearch (split->dbg, &cu->dbg->split_tree,
>  			       __libdw_finddbg_cb) == NULL)
>  		    {
>  		      /* Something went wrong.  Don't link.  */
> @@ -65,9 +71,13 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path)
>  		      break;
>  		    }
>  
> +		  rwlock_wrlock(cu_split_lock);
> +
>  		  /* Link skeleton and split compile units.  */
>  		  __libdw_link_skel_split (cu, split);
>  
> +		  rwlock_unlock(cu_split_lock);
> +

Is this locking wide enough? It seems like multiple threads can race
to this code and set different Dwarfs (which means they'll leak). See
below.

>  		  /* We have everything we need from this ELF
>  		     file.  And we are going to close the fd to
>  		     not run out of file descriptors.  */
> @@ -75,8 +85,13 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path)
>  		  break;
>  		}
>  	    }
> +
> +	  rwlock_rdlock(cu_split_lock);
> +
>  	  if (cu->split == (Dwarf_CU *) -1)
>  	    dwarf_end (split_dwarf);
> +
> +	  rwlock_unlock(cu_split_lock);

This means __libdw_link_skel_split wasn't called above (so the
split_dwarf created by dwarf_begin didn't contain the expected CU).

>  	}
>        /* Always close, because we don't want to run out of file
>  	 descriptors.  See also the elf_fcntl ELF_C_FDDONE call
> @@ -89,9 +104,13 @@ Dwarf_CU *
>  internal_function
>  __libdw_find_split_unit (Dwarf_CU *cu)
>  {
> +  rwlock_rdlock(cu_split_lock);
> +  Dwarf_CU *cu_split_local = cu->split;
> +  rwlock_unlock(cu_split_lock);
> +
>    /* Only try once.  */
> -  if (cu->split != (Dwarf_CU *) -1)
> -    return cu->split;
> +  if (cu_split_local != (Dwarf_CU *) -1)
> +    return cu_split_local;
>  
>    /* We need a skeleton unit with a comp_dir and [GNU_]dwo_name attributes.
>       The split unit will be the first in the dwo file and should have the
> @@ -116,7 +135,11 @@ __libdw_find_split_unit (Dwarf_CU *cu)
>  	      free (dwo_path);
>  	    }
>  
> -	  if (cu->split == (Dwarf_CU *) -1)
> +	  rwlock_rdlock(cu_split_lock);
> +	  cu_split_local = cu->split;
> +	  rwlock_unlock(cu_split_lock);
> +
> +	  if (cu_split_local == (Dwarf_CU *) -1)
>  	    {
>  	      /* Try compdir plus dwo_name.  */
>  	      Dwarf_Attribute compdir;

It looks like multiple threads can end up here after having seen
cu->split/cu_split_local == (Dwarf_CU *) -1) Which means multiple
threads will enter try_split_file and might all call
__libdw_link_skel_split as mentioned above.

> @@ -138,9 +161,15 @@ __libdw_find_split_unit (Dwarf_CU *cu)
>  	}
>      }
>  
> +  rwlock_wrlock(cu_split_lock);
> +
>    /* If we found nothing, make sure we don't try again.  */
>    if (cu->split == (Dwarf_CU *) -1)
> -    cu->split = NULL;
> +    {
> +      cu->split = NULL;
> +      cu_split_local = cu->split;
> +    }
>  
> -  return cu->split;
> +  rwlock_unlock(cu_split_lock);
> +  return cu_split_local;
>  }

I think it would be better to keep the lock during the whole
__libdw_find_split_unit call.

Cheers,

Mark
  
Heather McIntyre Oct. 17, 2023, 8:01 p.m. UTC | #2
Hi Mark,

As per your suggestion, I have placed the lock around the entire
__libdw_find_split_unit function call.

On Wed, Oct 11, 2023 at 12:17 PM Mark Wielaard <mark@klomp.org> wrote:

> Hi Heather,
>
> On Tue, Oct 10, 2023 at 03:42:56PM +0200, Mark Wielaard wrote:
> > From: Heather McIntyre <hsm2@rice.edu>
> >
> >       * (try_split_file): Use eu_tsearch.
> >       Add lock for cu->split.
> >
> > Signed-off-by: Heather S. McIntyre <hsm2@rice.edu>
> > Signed-off-by: Mark Wielaard <mark@klomp.org>
> > ---
> >  libdw/libdw_find_split_unit.c | 43 +++++++++++++++++++++++++++++------
> >  1 file changed, 36 insertions(+), 7 deletions(-)
> >
> > diff --git a/libdw/libdw_find_split_unit.c
> b/libdw/libdw_find_split_unit.c
> > index 533f8072..a288e9bd 100644
> > --- a/libdw/libdw_find_split_unit.c
> > +++ b/libdw/libdw_find_split_unit.c
> > @@ -34,13 +34,19 @@
> >  #include "libelfP.h"
> >
> >  #include <limits.h>
> > -#include <search.h>
> > +#include <eu-search.h>
> >  #include <stdlib.h>
> >  #include <string.h>
> >  #include <sys/types.h>
> >  #include <sys/stat.h>
> >  #include <fcntl.h>
> >
> > +/* __libdw_link_skel_split() modifies "skel->split = split"
> > +   and "split->split = skel".
> > +   "cu->split" is read at multiple locations and conditionally updated.
> > +   Mutual exclusion is enforced to prevent a race. */
> > +rwlock_define(static, cu_split_lock);
>
> __libdw_link_skel_split is defined in libdw/libdwP.h and is called in
> try_split_file. (It is also called in src/readelf.c but that is a
> terrible hack, so ignore that for now.)
>
> A Dwarf_CU is created (see libdw/libdw_findcu.c) with split set to
> (Dwarf_CU *) -1 to indicate the split dwarf is not yet set. If
> cu->split is NULL it means the split dwarf was searched for, but not
> found.
>
> >  static void
> >  try_split_file (Dwarf_CU *cu, const char *dwo_path)
> >  {
> > @@ -57,7 +63,7 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path)
> >             if (split->unit_type == DW_UT_split_compile
> >                 && cu->unit_id8 == split->unit_id8)
> >               {
> > -               if (tsearch (split->dbg, &cu->dbg->split_tree,
> > +               if (eu_tsearch (split->dbg, &cu->dbg->split_tree,
> >                              __libdw_finddbg_cb) == NULL)
> >                   {
> >                     /* Something went wrong.  Don't link.  */
> > @@ -65,9 +71,13 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path)
> >                     break;
> >                   }
> >
> > +               rwlock_wrlock(cu_split_lock);
> > +
> >                 /* Link skeleton and split compile units.  */
> >                 __libdw_link_skel_split (cu, split);
> >
> > +               rwlock_unlock(cu_split_lock);
> > +
>
> Is this locking wide enough? It seems like multiple threads can race
> to this code and set different Dwarfs (which means they'll leak). See
> below.
>
> >                 /* We have everything we need from this ELF
> >                    file.  And we are going to close the fd to
> >                    not run out of file descriptors.  */
> > @@ -75,8 +85,13 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path)
> >                 break;
> >               }
> >           }
> > +
> > +       rwlock_rdlock(cu_split_lock);
> > +
> >         if (cu->split == (Dwarf_CU *) -1)
> >           dwarf_end (split_dwarf);
> > +
> > +       rwlock_unlock(cu_split_lock);
>
> This means __libdw_link_skel_split wasn't called above (so the
> split_dwarf created by dwarf_begin didn't contain the expected CU).
>
> >       }
> >        /* Always close, because we don't want to run out of file
> >        descriptors.  See also the elf_fcntl ELF_C_FDDONE call
> > @@ -89,9 +104,13 @@ Dwarf_CU *
> >  internal_function
> >  __libdw_find_split_unit (Dwarf_CU *cu)
> >  {
> > +  rwlock_rdlock(cu_split_lock);
> > +  Dwarf_CU *cu_split_local = cu->split;
> > +  rwlock_unlock(cu_split_lock);
> > +
> >    /* Only try once.  */
> > -  if (cu->split != (Dwarf_CU *) -1)
> > -    return cu->split;
> > +  if (cu_split_local != (Dwarf_CU *) -1)
> > +    return cu_split_local;
> >
> >    /* We need a skeleton unit with a comp_dir and [GNU_]dwo_name
> attributes.
> >       The split unit will be the first in the dwo file and should have
> the
> > @@ -116,7 +135,11 @@ __libdw_find_split_unit (Dwarf_CU *cu)
> >             free (dwo_path);
> >           }
> >
> > -       if (cu->split == (Dwarf_CU *) -1)
> > +       rwlock_rdlock(cu_split_lock);
> > +       cu_split_local = cu->split;
> > +       rwlock_unlock(cu_split_lock);
> > +
> > +       if (cu_split_local == (Dwarf_CU *) -1)
> >           {
> >             /* Try compdir plus dwo_name.  */
> >             Dwarf_Attribute compdir;
>
> It looks like multiple threads can end up here after having seen
> cu->split/cu_split_local == (Dwarf_CU *) -1) Which means multiple
> threads will enter try_split_file and might all call
> __libdw_link_skel_split as mentioned above.
>
> > @@ -138,9 +161,15 @@ __libdw_find_split_unit (Dwarf_CU *cu)
> >       }
> >      }
> >
> > +  rwlock_wrlock(cu_split_lock);
> > +
> >    /* If we found nothing, make sure we don't try again.  */
> >    if (cu->split == (Dwarf_CU *) -1)
> > -    cu->split = NULL;
> > +    {
> > +      cu->split = NULL;
> > +      cu_split_local = cu->split;
> > +    }
> >
> > -  return cu->split;
> > +  rwlock_unlock(cu_split_lock);
> > +  return cu_split_local;
> >  }
>
> I think it would be better to keep the lock during the whole
> __libdw_find_split_unit call.
>
> Cheers,
>
> Mark
>
  

Patch

diff --git a/libdw/libdw_find_split_unit.c b/libdw/libdw_find_split_unit.c
index 533f8072..a288e9bd 100644
--- a/libdw/libdw_find_split_unit.c
+++ b/libdw/libdw_find_split_unit.c
@@ -34,13 +34,19 @@ 
 #include "libelfP.h"
 
 #include <limits.h>
-#include <search.h>
+#include <eu-search.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 
+/* __libdw_link_skel_split() modifies "skel->split = split"
+   and "split->split = skel".
+   "cu->split" is read at multiple locations and conditionally updated.
+   Mutual exclusion is enforced to prevent a race. */
+rwlock_define(static, cu_split_lock);
+
 static void
 try_split_file (Dwarf_CU *cu, const char *dwo_path)
 {
@@ -57,7 +63,7 @@  try_split_file (Dwarf_CU *cu, const char *dwo_path)
 	      if (split->unit_type == DW_UT_split_compile
 		  && cu->unit_id8 == split->unit_id8)
 		{
-		  if (tsearch (split->dbg, &cu->dbg->split_tree,
+		  if (eu_tsearch (split->dbg, &cu->dbg->split_tree,
 			       __libdw_finddbg_cb) == NULL)
 		    {
 		      /* Something went wrong.  Don't link.  */
@@ -65,9 +71,13 @@  try_split_file (Dwarf_CU *cu, const char *dwo_path)
 		      break;
 		    }
 
+		  rwlock_wrlock(cu_split_lock);
+
 		  /* Link skeleton and split compile units.  */
 		  __libdw_link_skel_split (cu, split);
 
+		  rwlock_unlock(cu_split_lock);
+
 		  /* We have everything we need from this ELF
 		     file.  And we are going to close the fd to
 		     not run out of file descriptors.  */
@@ -75,8 +85,13 @@  try_split_file (Dwarf_CU *cu, const char *dwo_path)
 		  break;
 		}
 	    }
+
+	  rwlock_rdlock(cu_split_lock);
+
 	  if (cu->split == (Dwarf_CU *) -1)
 	    dwarf_end (split_dwarf);
+
+	  rwlock_unlock(cu_split_lock);
 	}
       /* Always close, because we don't want to run out of file
 	 descriptors.  See also the elf_fcntl ELF_C_FDDONE call
@@ -89,9 +104,13 @@  Dwarf_CU *
 internal_function
 __libdw_find_split_unit (Dwarf_CU *cu)
 {
+  rwlock_rdlock(cu_split_lock);
+  Dwarf_CU *cu_split_local = cu->split;
+  rwlock_unlock(cu_split_lock);
+
   /* Only try once.  */
-  if (cu->split != (Dwarf_CU *) -1)
-    return cu->split;
+  if (cu_split_local != (Dwarf_CU *) -1)
+    return cu_split_local;
 
   /* We need a skeleton unit with a comp_dir and [GNU_]dwo_name attributes.
      The split unit will be the first in the dwo file and should have the
@@ -116,7 +135,11 @@  __libdw_find_split_unit (Dwarf_CU *cu)
 	      free (dwo_path);
 	    }
 
-	  if (cu->split == (Dwarf_CU *) -1)
+	  rwlock_rdlock(cu_split_lock);
+	  cu_split_local = cu->split;
+	  rwlock_unlock(cu_split_lock);
+
+	  if (cu_split_local == (Dwarf_CU *) -1)
 	    {
 	      /* Try compdir plus dwo_name.  */
 	      Dwarf_Attribute compdir;
@@ -138,9 +161,15 @@  __libdw_find_split_unit (Dwarf_CU *cu)
 	}
     }
 
+  rwlock_wrlock(cu_split_lock);
+
   /* If we found nothing, make sure we don't try again.  */
   if (cu->split == (Dwarf_CU *) -1)
-    cu->split = NULL;
+    {
+      cu->split = NULL;
+      cu_split_local = cu->split;
+    }
 
-  return cu->split;
+  rwlock_unlock(cu_split_lock);
+  return cu_split_local;
 }