Fix double-checked locking in _res_hconf_reorder_addrs [BZ #19074]

Message ID 5613BF47.9000503@redhat.com
State Superseded
Headers

Commit Message

Florian Weimer Oct. 6, 2015, 12:32 p.m. UTC
  This addresses a data race in libresolv.  The race is not entirely
benign, it could cause programs to access a partially initialized
ifaddrs array.

I made sure this compiles on x86_64, but this code is difficult to test.

A potential follow-up optimization would be to move the socket creation
under the lock.  No need to create a socket if we lose the race to the lock.

Florian
  

Comments

Siddhesh Poyarekar Oct. 6, 2015, 2:08 p.m. UTC | #1
On Tuesday 06 October 2015 06:02 PM, Florian Weimer wrote:
> This addresses a data race in libresolv.  The race is not entirely
> benign, it could cause programs to access a partially initialized
> ifaddrs array.
> 
> I made sure this compiles on x86_64, but this code is difficult to test.
> 
> A potential follow-up optimization would be to move the socket creation
> under the lock.  No need to create a socket if we lose the race to the lock.

The code needs verbose comments to describe the rationale for the
ordering semantics you've decided on.

>  #if IS_IN (libc)
>  # define fgets_unlocked __fgets_unlocked
> @@ -393,6 +394,7 @@ _res_hconf_reorder_addrs (struct hostent *hp)
>    int i, j;
>    /* Number of interfaces.  */

Expand the comment to describe at a high level what code paths can
concurrently access this and what the effect would be/ought to be.

>    static int num_ifs = -1;
> +  int num_ifs_local;
>    /* We need to protect the dynamic buffer handling.  */
>    __libc_lock_define_initialized (static, lock);
>  
> @@ -404,7 +406,8 @@ _res_hconf_reorder_addrs (struct hostent *hp)
>    if (hp->h_addrtype != AF_INET)
>      return;
>  
> -  if (num_ifs <= 0)
> +  num_ifs_local = atomic_load_acquire (&num_ifs);
> +  if (num_ifs_local <= 0)

Why is acquire necessary/sufficient?

>      {
>        struct ifreq *ifr, *cur_ifr;
>        int sd, num, i;
> @@ -422,7 +425,7 @@ _res_hconf_reorder_addrs (struct hostent *hp)
>        __libc_lock_lock (lock);
>  
>        /* Recheck, somebody else might have done the work by now.  */
> -      if (num_ifs <= 0)
> +      if (atomic_load_acquire (&num_ifs) <= 0)

Likewise.

>  	{
>  	  int new_num_ifs = 0;
>  
> @@ -472,7 +475,8 @@ _res_hconf_reorder_addrs (struct hostent *hp)
>  	  /* Release lock, preserve error value, and close socket.  */
>  	  errno = save;
>  
> -	  num_ifs = new_num_ifs;
> +	  atomic_store_release (&num_ifs, new_num_ifs);

Likewise.

> +	  num_ifs_local = new_num_ifs;
>  	}
>  
>        __libc_lock_unlock (lock);
> @@ -480,7 +484,7 @@ _res_hconf_reorder_addrs (struct hostent *hp)
>        __close (sd);
>      }
>  
> -  if (num_ifs == 0)
> +  if (num_ifs_local == 0)
>      return;
>  
>    /* Find an address for which we have a direct connection.  */
> @@ -488,7 +492,7 @@ _res_hconf_reorder_addrs (struct hostent *hp)
>      {
>        struct in_addr *haddr = (struct in_addr *) hp->h_addr_list[i];
>  
> -      for (j = 0; j < num_ifs; ++j)
> +      for (j = 0; j < num_ifs_local; ++j)
>  	{
>  	  u_int32_t if_addr    = ifaddrs[j].u.ipv4.addr;
>  	  u_int32_t if_netmask = ifaddrs[j].u.ipv4.mask;
>
  
Florian Weimer Oct. 6, 2015, 2:33 p.m. UTC | #2
On 10/06/2015 04:08 PM, Siddhesh Poyarekar wrote:
> On Tuesday 06 October 2015 06:02 PM, Florian Weimer wrote:
>> This addresses a data race in libresolv.  The race is not entirely
>> benign, it could cause programs to access a partially initialized
>> ifaddrs array.
>>
>> I made sure this compiles on x86_64, but this code is difficult to test.
>>
>> A potential follow-up optimization would be to move the socket creation
>> under the lock.  No need to create a socket if we lose the race to the lock.
> 
> The code needs verbose comments to describe the rationale for the
> ordering semantics you've decided on.

Do we have documentation on the wiki for common concurrency idioms?
It's probably best to document things that work and way, and stick to
that, instead of using ad-hoc schemes all over the place.  (I think the
code I showed is sufficiently close to the usual double-checked locking
idiom, except that the guarded section may run multiple times.)

Florian
  
Siddhesh Poyarekar Oct. 6, 2015, 4:16 p.m. UTC | #3
On Tuesday 06 October 2015 08:03 PM, Florian Weimer wrote:
> Do we have documentation on the wiki for common concurrency idioms?

Not that I know.

> It's probably best to document things that work and way, and stick to
> that, instead of using ad-hoc schemes all over the place.  (I think the
> code I showed is sufficiently close to the usual double-checked locking
> idiom, except that the guarded section may run multiple times.)

A wiki page may be useful, but it's not a replacement for a code comment
that explains why it works, which is specific to the code block in question.

Siddhesh
  
Torvald Riegel Oct. 7, 2015, 10:28 a.m. UTC | #4
On Tue, 2015-10-06 at 16:33 +0200, Florian Weimer wrote:
> On 10/06/2015 04:08 PM, Siddhesh Poyarekar wrote:
> > On Tuesday 06 October 2015 06:02 PM, Florian Weimer wrote:
> >> This addresses a data race in libresolv.  The race is not entirely
> >> benign, it could cause programs to access a partially initialized
> >> ifaddrs array.
> >>
> >> I made sure this compiles on x86_64, but this code is difficult to test.
> >>
> >> A potential follow-up optimization would be to move the socket creation
> >> under the lock.  No need to create a socket if we lose the race to the lock.
> > 
> > The code needs verbose comments to describe the rationale for the
> > ordering semantics you've decided on.
> 
> Do we have documentation on the wiki for common concurrency idioms?

Not yet.  A section on the Concurrency might be a good place for that.

If you should write this page, check whether you really need the acquire
MO in the load in the critical section, or whether relaxed MO is
sufficient ;)

> It's probably best to document things that work and way, and stick to
> that, instead of using ad-hoc schemes all over the place.

I agree that it's good to follow common patterns.  Nonetheless, you
still need to say why the pattern applies, and make sure that all
readers see the pattern and can check that the pattern is applied
properly.  This can be brief -- but if people in the community feel like
they need more details in the comments, it should be provided in most
cases, I think.

>  (I think the
> code I showed is sufficiently close to the usual double-checked locking
> idiom, except that the guarded section may run multiple times.)

Are you referring to that the critical section may be entered
unnecessarily?  That is common for double-checked locking.  Or are you
referring to something else?
  

Patch

2015-10-06  Florian Weimer  <fweimer@redhat.com>

	[BZ #19074]
	* resolv/res_hconf.c (_res_hconf_reorder_addrs): Use atomics to
	load and store num_ifs.

diff --git a/NEWS b/NEWS
index 3852e7f..412effe 100644
--- a/NEWS
+++ b/NEWS
@@ -49,7 +49,7 @@  Version 2.22
   18533, 18534, 18536, 18539, 18540, 18542, 18544, 18545, 18546, 18547,
   18549, 18553, 18557, 18558, 18569, 18583, 18585, 18586, 18592, 18593,
   18594, 18602, 18612, 18613, 18619, 18633, 18635, 18641, 18643, 18648,
-  18657, 18676, 18694, 18696, 18887.
+  18657, 18676, 18694, 18696, 18887, 19074.
 
 * Cache information can be queried via sysconf() function on s390 e.g. with
   _SC_LEVEL1_ICACHE_SIZE as argument.
diff --git a/resolv/res_hconf.c b/resolv/res_hconf.c
index 692d948..11e2f2d 100644
--- a/resolv/res_hconf.c
+++ b/resolv/res_hconf.c
@@ -45,6 +45,7 @@ 
 #include "ifreq.h"
 #include "res_hconf.h"
 #include <wchar.h>
+#include <atomic.h>
 
 #if IS_IN (libc)
 # define fgets_unlocked __fgets_unlocked
@@ -393,6 +394,7 @@  _res_hconf_reorder_addrs (struct hostent *hp)
   int i, j;
   /* Number of interfaces.  */
   static int num_ifs = -1;
+  int num_ifs_local;
   /* We need to protect the dynamic buffer handling.  */
   __libc_lock_define_initialized (static, lock);
 
@@ -404,7 +406,8 @@  _res_hconf_reorder_addrs (struct hostent *hp)
   if (hp->h_addrtype != AF_INET)
     return;
 
-  if (num_ifs <= 0)
+  num_ifs_local = atomic_load_acquire (&num_ifs);
+  if (num_ifs_local <= 0)
     {
       struct ifreq *ifr, *cur_ifr;
       int sd, num, i;
@@ -422,7 +425,7 @@  _res_hconf_reorder_addrs (struct hostent *hp)
       __libc_lock_lock (lock);
 
       /* Recheck, somebody else might have done the work by now.  */
-      if (num_ifs <= 0)
+      if (atomic_load_acquire (&num_ifs) <= 0)
 	{
 	  int new_num_ifs = 0;
 
@@ -472,7 +475,8 @@  _res_hconf_reorder_addrs (struct hostent *hp)
 	  /* Release lock, preserve error value, and close socket.  */
 	  errno = save;
 
-	  num_ifs = new_num_ifs;
+	  atomic_store_release (&num_ifs, new_num_ifs);
+	  num_ifs_local = new_num_ifs;
 	}
 
       __libc_lock_unlock (lock);
@@ -480,7 +484,7 @@  _res_hconf_reorder_addrs (struct hostent *hp)
       __close (sd);
     }
 
-  if (num_ifs == 0)
+  if (num_ifs_local == 0)
     return;
 
   /* Find an address for which we have a direct connection.  */
@@ -488,7 +492,7 @@  _res_hconf_reorder_addrs (struct hostent *hp)
     {
       struct in_addr *haddr = (struct in_addr *) hp->h_addr_list[i];
 
-      for (j = 0; j < num_ifs; ++j)
+      for (j = 0; j < num_ifs_local; ++j)
 	{
 	  u_int32_t if_addr    = ifaddrs[j].u.ipv4.addr;
 	  u_int32_t if_netmask = ifaddrs[j].u.ipv4.mask;
-- 
2.4.3