[1/9,v2] libelf: Fix deadlock in __libelf_readall

Message ID 20240717223409.1788719-2-amerey@redhat.com
State Superseded
Headers
Series Fix thread-safety for elfutils |

Commit Message

Aaron Merey July 17, 2024, 10:34 p.m. UTC
  From: Heather McIntyre <hsm2@rice.edu>

Apply locking during __libelf_readall.

Signed-off-by: Heather S. McIntyre <hsm2@rice.edu>
Signed-off-by: Aaron Merey <amerey@redhat.com>
Signed-off-by: Mark Wielaard <mark@klomp.org>

---
v2 changes:

Add locking for all early returns in __libelf_readall.

libelf_{acquire,release}_all have been renamed to
libelf_{acquire,release}_all_children.  These functions also no longer
acquire/release the parent's lock.  This is done in order to simplify
lock management in __libelf_readall.

 libelf/common.h      | 16 ++++++++--------
 libelf/elf_readall.c |  4 ++--
 2 files changed, 10 insertions(+), 10 deletions(-)
  

Comments

Mark Wielaard July 18, 2024, 5:23 p.m. UTC | #1
On Wed, 2024-07-17 at 18:34 -0400, Aaron Merey wrote:
> From: Heather McIntyre <hsm2@rice.edu>
> 
> Apply locking during __libelf_readall.
> 
> Signed-off-by: Heather S. McIntyre <hsm2@rice.edu>
> Signed-off-by: Aaron Merey <amerey@redhat.com>
> Signed-off-by: Mark Wielaard <mark@klomp.org>
> 
> ---
> v2 changes:
> 
> Add locking for all early returns in __libelf_readall.
> 
> libelf_{acquire,release}_all have been renamed to
> libelf_{acquire,release}_all_children.  These functions also no longer
> acquire/release the parent's lock.  This is done in order to simplify
> lock management in __libelf_readall.

OK, so now we have __libelf_readall take the lock on the Elf handle at
the start. If there is a bad fildes we immediately unlock and return.
If we don't have read everything yet (checking map_address) we call
libelf_acquire_all_children. Every (error) path ends up at the end of
the lexical block where we call libelf_release_all_children. Then we
call rwlock_unlock and return.

So assuming libelf_acquire_all_children/libelf_release_all_children do
as promised, this looks fine.

A comment (about a comment) below. And a nitpick about the locking
order of the (recursive) children.

>  libelf/common.h      | 16 ++++++++--------
>  libelf/elf_readall.c |  4 ++--
>  2 files changed, 10 insertions(+), 10 deletions(-)
> 
> diff --git a/libelf/common.h b/libelf/common.h
> index 9b2a856d..b7226ee8 100644
> --- a/libelf/common.h
> +++ b/libelf/common.h
> @@ -92,10 +92,8 @@ allocate_elf (int fildes, void *map_address, int64_t offset, size_t maxsize,
>  /* Acquire lock for the descriptor and all children.  */

This ^ comment is outdated now. It should say something like:

  /* Caller must hold a lock for the descriptor. If there are
     children a lock will be acquired for each of them
     (recursively).  */

>  static void
>  __attribute__ ((unused))
> -libelf_acquire_all (Elf *elf)
> +libelf_acquire_all_children (Elf *elf)
>  {
> -  rwlock_wrlock (elf->lock);
> -
>    if (elf->kind == ELF_K_AR)
>      {
>        Elf *child = elf->state.ar.children;
> @@ -103,7 +101,9 @@ libelf_acquire_all (Elf *elf)
>        while (child != NULL)
>  	{
>  	  if (child->ref_count != 0)
> -	    libelf_acquire_all (child);
> +	    libelf_acquire_all_children (child);
> +
> +	  rwlock_wrlock(child->lock);

So, if my suggestion for the function comment is correct, this should
first call rwlock_wrlock (child->lock) and then
libelf_acquire_all_children (child).

>  	  child = child->next;
>  	}
>      }
> @@ -112,7 +112,7 @@ libelf_acquire_all (Elf *elf)
>  /* Release own lock and those of the children.  */

^ likewise.

>  static void
>  __attribute__ ((unused))
> -libelf_release_all (Elf *elf)
> +libelf_release_all_children (Elf *elf)
>  {
>    if (elf->kind == ELF_K_AR)
>      {
> @@ -121,12 +121,12 @@ libelf_release_all (Elf *elf)
>        while (child != NULL)
>  	{
>  	  if (child->ref_count != 0)
> -	    libelf_release_all (child);
> +	    libelf_release_all_children (child);
> +
> +	  rwlock_unlock (child->lock);
>  	  child = child->next;

Here the sequence seems correct, first release the locks on all
children, then release the lock on the child descriptor itself.

I don't think this can actually lead to deadlock because the top-level
Elf descriptor lock is held and I don't think children can be held by
two different top-level Elf handles. But it feels better to acquire the
locks in the prescribed order (and unlock in the reverse).

>  	}
>      }
> -
> -  rwlock_unlock (elf->lock);
>  }
>  
>  
> diff --git a/libelf/elf_readall.c b/libelf/elf_readall.c
> index d0f9a28c..4ef8fe97 100644
> --- a/libelf/elf_readall.c
> +++ b/libelf/elf_readall.c
> @@ -84,7 +84,7 @@ __libelf_readall (Elf *elf)
>  
>        /* If this is an archive and we have derived descriptors get the
>  	 locks for all of them.  */
> -      libelf_acquire_all (elf);
> +      libelf_acquire_all_children (elf);
>  
>        if (elf->maximum_size == ~((size_t) 0))
>  	{
> @@ -141,7 +141,7 @@ __libelf_readall (Elf *elf)
>  	__libelf_seterrno (ELF_E_NOMEM);
>  
>        /* Free the locks on the children.  */
> -      libelf_release_all (elf);
> +      libelf_release_all_children (elf);
>      }
>  
>    rwlock_unlock (elf->lock);
  

Patch

diff --git a/libelf/common.h b/libelf/common.h
index 9b2a856d..b7226ee8 100644
--- a/libelf/common.h
+++ b/libelf/common.h
@@ -92,10 +92,8 @@  allocate_elf (int fildes, void *map_address, int64_t offset, size_t maxsize,
 /* Acquire lock for the descriptor and all children.  */
 static void
 __attribute__ ((unused))
-libelf_acquire_all (Elf *elf)
+libelf_acquire_all_children (Elf *elf)
 {
-  rwlock_wrlock (elf->lock);
-
   if (elf->kind == ELF_K_AR)
     {
       Elf *child = elf->state.ar.children;
@@ -103,7 +101,9 @@  libelf_acquire_all (Elf *elf)
       while (child != NULL)
 	{
 	  if (child->ref_count != 0)
-	    libelf_acquire_all (child);
+	    libelf_acquire_all_children (child);
+
+	  rwlock_wrlock(child->lock);
 	  child = child->next;
 	}
     }
@@ -112,7 +112,7 @@  libelf_acquire_all (Elf *elf)
 /* Release own lock and those of the children.  */
 static void
 __attribute__ ((unused))
-libelf_release_all (Elf *elf)
+libelf_release_all_children (Elf *elf)
 {
   if (elf->kind == ELF_K_AR)
     {
@@ -121,12 +121,12 @@  libelf_release_all (Elf *elf)
       while (child != NULL)
 	{
 	  if (child->ref_count != 0)
-	    libelf_release_all (child);
+	    libelf_release_all_children (child);
+
+	  rwlock_unlock (child->lock);
 	  child = child->next;
 	}
     }
-
-  rwlock_unlock (elf->lock);
 }
 
 
diff --git a/libelf/elf_readall.c b/libelf/elf_readall.c
index d0f9a28c..4ef8fe97 100644
--- a/libelf/elf_readall.c
+++ b/libelf/elf_readall.c
@@ -84,7 +84,7 @@  __libelf_readall (Elf *elf)
 
       /* If this is an archive and we have derived descriptors get the
 	 locks for all of them.  */
-      libelf_acquire_all (elf);
+      libelf_acquire_all_children (elf);
 
       if (elf->maximum_size == ~((size_t) 0))
 	{
@@ -141,7 +141,7 @@  __libelf_readall (Elf *elf)
 	__libelf_seterrno (ELF_E_NOMEM);
 
       /* Free the locks on the children.  */
-      libelf_release_all (elf);
+      libelf_release_all_children (elf);
     }
 
   rwlock_unlock (elf->lock);