Linux: Implement opensock using Netlink sockets

Message ID 20180618105015.5E8B340292859@oldenburg.str.redhat.com
State Rejected
Headers

Commit Message

Florian Weimer June 18, 2018, 10:50 a.m. UTC
  inet/tst-inet6_scopeid_pton uses __opensock indirectly, to call ioctl
with SIOCGIFINDEX, and it still works after this change.

2018-06-18  Florian Weimer  <fweimer@redhat.com>

	* sysdeps/unix/sysv/linux/opensock.c (__opensock): Unconditionally
	return a Netlink socket.
	* sysdeps/unix/sysv/linux/s390/opensock.c: Remove file.
  

Comments

Adhemerval Zanella June 18, 2018, 1:56 p.m. UTC | #1
On 18/06/2018 07:50, Florian Weimer wrote:
> inet/tst-inet6_scopeid_pton uses __opensock indirectly, to call ioctl
> with SIOCGIFINDEX, and it still works after this change.
> 
> 2018-06-18  Florian Weimer  <fweimer@redhat.com>
> 
> 	* sysdeps/unix/sysv/linux/opensock.c (__opensock): Unconditionally
> 	return a Netlink socket.
> 	* sysdeps/unix/sysv/linux/s390/opensock.c: Remove file.

Similar to a previous attempt to use AF_NETLINK sockets as default [1],
I am seeing some regression on testcases with Linux 4.4.0-116-generic:

FAIL: inet/bug-if1
FAIL: inet/test_ifindex
FAIL: inet/tst-inet6_scopeid_pton

$ ./testrun.sh inet/bug-if1
errno = 95 (Operation not supported), expected 6 (No such device or address)

$ strace -f ./testrun.sh inet/bug-if1 --direct
[...]
socket(AF_NETLINK, SOCK_DGRAM|SOCK_CLOEXEC, NETLINK_ROUTE) = 3
ioctl(3, SIOCGIFNAME, {ifr_index=0})    = -1 EOPNOTSUPP (Operation not supported)
[...]


$ ./testrun.sh inet/test_ifindex
Idx            Name | Idx           Name
  1              lo |   0         (null)      fail
  2            eth0 |   0         (null)      fail
  3          virbr0 |   0         (null)      fail
  4      virbr0-nic |   0         (null)      fail
  5          lxcbr0 |   0         (null)      fail
  6         docker0 |   0         (null)      fail

$ strace -f ./testrun.sh inet/test_ifindex --direct
[...]
socket(AF_NETLINK, SOCK_DGRAM|SOCK_CLOEXEC, NETLINK_ROUTE) = 3
ioctl(3, SIOCGIFINDEX, {ifr_name="lxcbr0"}) = -1 EOPNOTSUPP (Operation not supported)
[...]


$ ./testrun.sh inet/tst-inet6_scopeid_pton
error: unexpected failure for fe80::1%lo
error: unexpected result for for fe80::1%lo
  expected: 1
  actual:   2
error: unexpected getaddrinfo failure for fe80::1%lo (AF_UNSPEC)
error: unexpected getaddrinfo failure for fe80::1%lo (AF_INET6)
error: unexpected failure for ff02::1%lo
error: unexpected result for for ff02::1%lo
  expected: 1
  actual:   2
error: unexpected getaddrinfo failure for ff02::1%lo (AF_UNSPEC)
error: unexpected getaddrinfo failure for ff02::1%lo (AF_INET6)
error: unexpected failure for ff01::1%lo
error: unexpected result for for ff01::1%lo
  expected: 1
  actual:   2
error: unexpected getaddrinfo failure for ff01::1%lo (AF_UNSPEC)
error: unexpected getaddrinfo failure for ff01::1%lo (AF_INET6)
error: 12 test failures

$ strace -f ./testrun.sh inet/tst-inet6_scopeid_pton --direct
[...]
socket(AF_NETLINK, SOCK_DGRAM|SOCK_CLOEXEC, NETLINK_ROUTE) = 3
ioctl(3, SIOCGIFINDEX, {ifr_name="lo"}) = -1 EOPNOTSUPP (Operation not supported)
[...]

[1] https://sourceware.org/ml/libc-alpha/2017-08/msg01091.html

> 
> diff --git a/sysdeps/unix/sysv/linux/opensock.c b/sysdeps/unix/sysv/linux/opensock.c
> index fd9445bc3b..f5d7322c35 100644
> --- a/sysdeps/unix/sysv/linux/opensock.c
> +++ b/sysdeps/unix/sysv/linux/opensock.c
> @@ -15,100 +15,15 @@
>     License along with the GNU C Library; if not, see
>     <http://www.gnu.org/licenses/>.  */
>  
> -#include <assert.h>
> -#include <errno.h>
> -#include <stdio.h>
> -#include <string.h>
> -#include <unistd.h>
>  #include <sys/socket.h>
> +#include <linux/netlink.h>
>  
>  /* Return a socket of any type.  The socket can be used in subsequent
>     ioctl calls to talk to the kernel.  */
>  int
>  __opensock (void)
>  {
> -  static int last_family;	/* Available socket family we will use.  */
> -  static int last_type;
> -  static const struct
> -  {
> -    int family;
> -    const char procname[15];
> -  } afs[] =
> -    {
> -      { AF_UNIX, "net/unix" },
> -      { AF_INET, "" },
> -      { AF_INET6, "net/if_inet6" },
> -      { AF_AX25, "net/ax25" },
> -      { AF_NETROM, "net/nr" },
> -      { AF_ROSE, "net/rose" },
> -      { AF_IPX, "net/ipx" },
> -      { AF_APPLETALK, "net/appletalk" },
> -      { AF_ECONET, "sys/net/econet" },
> -      { AF_ASH, "sys/net/ash" },
> -      { AF_X25, "net/x25" },
> -#ifdef NEED_AF_IUCV
> -      { AF_IUCV, "net/iucv" }
> -#endif
> -    };
> -#define nafs (sizeof (afs) / sizeof (afs[0]))
> -  char fname[sizeof "/proc/" + 14];
> -  int result;
> -  int has_proc;
> -  size_t cnt;
> -
> -  /* We already know which family to use from the last call.  Use it
> -     again.  */
> -  if (last_family != 0)
> -    {
> -      assert (last_type != 0);
> -
> -      result = __socket (last_family, last_type | SOCK_CLOEXEC, 0);
> -      if (result != -1 || errno != EAFNOSUPPORT)
> -	/* Maybe the socket type isn't supported anymore (module is
> -	   unloaded).  In this case again try to find the type.  */
> -	return result;
> -
> -      /* Reset the values.  They seem not valid anymore.  */
> -      last_family = 0;
> -      last_type = 0;
> -    }
> -
> -  /* Check whether the /proc filesystem is available.  */
> -  has_proc = __access ("/proc/net", R_OK) != -1;
> -  strcpy (fname, "/proc/");
> -
> -  /* Iterate over the interface families and find one which is
> -     available.  */
> -  for (cnt = 0; cnt < nafs; ++cnt)
> -    {
> -      int type = SOCK_DGRAM;
> -
> -      if (has_proc && afs[cnt].procname[0] != '\0')
> -	{
> -	  strcpy (fname + 6, afs[cnt].procname);
> -	  if (__access (fname, R_OK) == -1)
> -	    /* The /proc entry is not available.  I.e., we cannot
> -	       create a socket of this type (without loading the
> -	       module).  Don't look for it since this might trigger
> -	       loading the module.  */
> -	    continue;
> -	}
> -
> -      if (afs[cnt].family == AF_NETROM || afs[cnt].family == AF_X25)
> -	type = SOCK_SEQPACKET;
> -
> -      result = __socket (afs[cnt].family, type | SOCK_CLOEXEC, 0);
> -      if (result != -1)
> -	{
> -	  /* Found an available family.  */
> -	  last_type = type;
> -	  last_family = afs[cnt].family;
> -	  return result;
> -	}
> -    }
> -
> -  /* None of the protocol families is available.  It is unclear what kind
> -     of error is returned.  ENOENT seems like a reasonable choice.  */
> -  __set_errno (ENOENT);
> -  return -1;
> +  /* Netlink support is always part of the kernel, so we just use a
> +     netlink socket.  */
> +  return __socket (PF_NETLINK, SOCK_DGRAM | SOCK_CLOEXEC, NETLINK_ROUTE);
>  }
> diff --git a/sysdeps/unix/sysv/linux/s390/opensock.c b/sysdeps/unix/sysv/linux/s390/opensock.c
> deleted file mode 100644
> index f099d651ff..0000000000
> --- a/sysdeps/unix/sysv/linux/s390/opensock.c
> +++ /dev/null
> @@ -1,2 +0,0 @@
> -#define NEED_AF_IUCV 1
> -#include "../opensock.c"
>
  
Florian Weimer June 18, 2018, 2:25 p.m. UTC | #2
On 06/18/2018 03:56 PM, Adhemerval Zanella wrote:
> 
> 
> On 18/06/2018 07:50, Florian Weimer wrote:
>> inet/tst-inet6_scopeid_pton uses __opensock indirectly, to call ioctl
>> with SIOCGIFINDEX, and it still works after this change.
>>
>> 2018-06-18  Florian Weimer  <fweimer@redhat.com>
>>
>> 	* sysdeps/unix/sysv/linux/opensock.c (__opensock): Unconditionally
>> 	return a Netlink socket.
>> 	* sysdeps/unix/sysv/linux/s390/opensock.c: Remove file.
> 
> Similar to a previous attempt to use AF_NETLINK sockets as default [1],
> I am seeing some regression on testcases with Linux 4.4.0-116-generic:
> 
> FAIL: inet/bug-if1
> FAIL: inet/test_ifindex
> FAIL: inet/tst-inet6_scopeid_pton
> 
> $ ./testrun.sh inet/bug-if1
> errno = 95 (Operation not supported), expected 6 (No such device or address)
> 
> $ strace -f ./testrun.sh inet/bug-if1 --direct
> [...]
> socket(AF_NETLINK, SOCK_DGRAM|SOCK_CLOEXEC, NETLINK_ROUTE) = 3
> ioctl(3, SIOCGIFNAME, {ifr_index=0})    = -1 EOPNOTSUPP (Operation not supported)
> [...]

Huh.  Any idea why this happens?  I don't see how this is possible based 
on the kernel sources.

Ahh, it's been fixed since then.  Netlink sockets were one of the few 
which did not perform ENOIOCTLCMD fallback.  This was changed in kernel 4.6:

commit 025c68186e07afaededa84143f1a22f273cd3f67
Author: David Decotigny <decot@googlers.com>
Date:   Mon Mar 21 10:15:35 2016 -0700

     netlink: add support for NIC driver ioctls

     By returning -ENOIOCTLCMD, sock_do_ioctl() falls back to calling
     dev_ioctl(), which provides support for NIC driver ioctls, which
     includes ethtool support. This is similar to the way ioctls are
     handled in udp.c or tcp.c.

     This removes the requirement that ethtool for example be tied to the
     support of a specific L3 protocol (ethtool uses an AF_INET socket
     today).

It's unfortunate that it took that long to make this change.

Thanks,
Florian
  
Ben Hutchings June 18, 2018, 4:46 p.m. UTC | #3
On Mon, 2018-06-18 at 12:50 +0200, Florian Weimer wrote:
> inet/tst-inet6_scopeid_pton uses __opensock indirectly, to call ioctl
> with SIOCGIFINDEX, and it still works after this change.
> 
> 2018-06-18  Florian Weimer  <fweimer@redhat.com>
> 
> 	* sysdeps/unix/sysv/linux/opensock.c (__opensock): Unconditionally
> 	return a Netlink socket.
> 	* sysdeps/unix/sysv/linux/s390/opensock.c: Remove file.
[...]

I agree that we can assume the kernel implements AF_NETLINK, but the
available socket address families might be restricted by a security
policy (e.g. systemd's RestrictAddressFamilies property).  I'm not sure
whether it's safe to assume that they will always allow AF_NETLINK.

Ben.
  
Adhemerval Zanella June 18, 2018, 5:09 p.m. UTC | #4
On 18/06/2018 11:25, Florian Weimer wrote:
> On 06/18/2018 03:56 PM, Adhemerval Zanella wrote:
>>
>>
>> On 18/06/2018 07:50, Florian Weimer wrote:
>>> inet/tst-inet6_scopeid_pton uses __opensock indirectly, to call ioctl
>>> with SIOCGIFINDEX, and it still works after this change.
>>>
>>> 2018-06-18  Florian Weimer  <fweimer@redhat.com>
>>>
>>>     * sysdeps/unix/sysv/linux/opensock.c (__opensock): Unconditionally
>>>     return a Netlink socket.
>>>     * sysdeps/unix/sysv/linux/s390/opensock.c: Remove file.
>>
>> Similar to a previous attempt to use AF_NETLINK sockets as default [1],
>> I am seeing some regression on testcases with Linux 4.4.0-116-generic:
>>
>> FAIL: inet/bug-if1
>> FAIL: inet/test_ifindex
>> FAIL: inet/tst-inet6_scopeid_pton
>>
>> $ ./testrun.sh inet/bug-if1
>> errno = 95 (Operation not supported), expected 6 (No such device or address)
>>
>> $ strace -f ./testrun.sh inet/bug-if1 --direct
>> [...]
>> socket(AF_NETLINK, SOCK_DGRAM|SOCK_CLOEXEC, NETLINK_ROUTE) = 3
>> ioctl(3, SIOCGIFNAME, {ifr_index=0})    = -1 EOPNOTSUPP (Operation not supported)
>> [...]
> 
> Huh.  Any idea why this happens?  I don't see how this is possible based on the kernel sources.
> 
> Ahh, it's been fixed since then.  Netlink sockets were one of the few which did not perform ENOIOCTLCMD fallback.  This was changed in kernel 4.6:
> 
> commit 025c68186e07afaededa84143f1a22f273cd3f67
> Author: David Decotigny <decot@googlers.com>
> Date:   Mon Mar 21 10:15:35 2016 -0700
> 
>     netlink: add support for NIC driver ioctls
> 
>     By returning -ENOIOCTLCMD, sock_do_ioctl() falls back to calling
>     dev_ioctl(), which provides support for NIC driver ioctls, which
>     includes ethtool support. This is similar to the way ioctls are
>     handled in udp.c or tcp.c.
> 
>     This removes the requirement that ethtool for example be tied to the
>     support of a specific L3 protocol (ethtool uses an AF_INET socket
>     today).
> 
> It's unfortunate that it took that long to make this change.
> 
> Thanks,
> Florian

Indeed, so I think we will need the fallback options (maybe we can restrict
to some options).
  
Florian Weimer June 18, 2018, 5:09 p.m. UTC | #5
* Adhemerval Zanella:

>> Ahh, it's been fixed since then.  Netlink sockets were one of the
>> few which did not perform ENOIOCTLCMD fallback.  This was changed in
>> kernel 4.6:
>> 
>> commit 025c68186e07afaededa84143f1a22f273cd3f67
>> Author: David Decotigny <decot@googlers.com>
>> Date:   Mon Mar 21 10:15:35 2016 -0700
>> 
>>     netlink: add support for NIC driver ioctls
>> 
>>     By returning -ENOIOCTLCMD, sock_do_ioctl() falls back to calling
>>     dev_ioctl(), which provides support for NIC driver ioctls, which
>>     includes ethtool support. This is similar to the way ioctls are
>>     handled in udp.c or tcp.c.
>> 
>>     This removes the requirement that ethtool for example be tied to the
>>     support of a specific L3 protocol (ethtool uses an AF_INET socket
>>     today).
>> 
>> It's unfortunate that it took that long to make this change.
>> 
>> Thanks,
>> Florian
>
> Indeed, so I think we will need the fallback options (maybe we can restrict
> to some options).

The real question is whether it is important to avoid module loading.
If we could just try

AF_NETLINK
AF_UNIX
AF_INET
AF_INET6

in this order, irrespective of the module load status, the code would
already be much, much simpler.
  
Florian Weimer June 18, 2018, 5:10 p.m. UTC | #6
* Ben Hutchings:

> On Mon, 2018-06-18 at 12:50 +0200, Florian Weimer wrote:
>> inet/tst-inet6_scopeid_pton uses __opensock indirectly, to call ioctl
>> with SIOCGIFINDEX, and it still works after this change.
>> 
>> 2018-06-18  Florian Weimer  <fweimer@redhat.com>
>> 
>> 	* sysdeps/unix/sysv/linux/opensock.c (__opensock): Unconditionally
>> 	return a Netlink socket.
>> 	* sysdeps/unix/sysv/linux/s390/opensock.c: Remove file.
> [...]
>
> I agree that we can assume the kernel implements AF_NETLINK, but the
> available socket address families might be restricted by a security
> policy (e.g. systemd's RestrictAddressFamilies property).  I'm not sure
> whether it's safe to assume that they will always allow AF_NETLINK.

If it actually worked, the AF_NETLINK approach seems cleaner in this
regard.  The current code goes hunting for information in /proc (to
avoid loading unwanted kernel modules), which requires a much larger
footprint and is harder to filter.
  
Adhemerval Zanella June 18, 2018, 5:17 p.m. UTC | #7
On 18/06/2018 14:09, Florian Weimer wrote:
> * Adhemerval Zanella:
> 
>>> Ahh, it's been fixed since then.  Netlink sockets were one of the
>>> few which did not perform ENOIOCTLCMD fallback.  This was changed in
>>> kernel 4.6:
>>>
>>> commit 025c68186e07afaededa84143f1a22f273cd3f67
>>> Author: David Decotigny <decot@googlers.com>
>>> Date:   Mon Mar 21 10:15:35 2016 -0700
>>>
>>>     netlink: add support for NIC driver ioctls
>>>
>>>     By returning -ENOIOCTLCMD, sock_do_ioctl() falls back to calling
>>>     dev_ioctl(), which provides support for NIC driver ioctls, which
>>>     includes ethtool support. This is similar to the way ioctls are
>>>     handled in udp.c or tcp.c.
>>>
>>>     This removes the requirement that ethtool for example be tied to the
>>>     support of a specific L3 protocol (ethtool uses an AF_INET socket
>>>     today).
>>>
>>> It's unfortunate that it took that long to make this change.
>>>
>>> Thanks,
>>> Florian
>>
>> Indeed, so I think we will need the fallback options (maybe we can restrict
>> to some options).
> 
> The real question is whether it is important to avoid module loading.
> If we could just try
> 
> AF_NETLINK
> AF_UNIX
> AF_INET
> AF_INET6
> 
> in this order, irrespective of the module load status, the code would
> already be much, much simpler.
> 

I think it should cover the expected kernel support (it would be unusual to
use the getifaddr functions with a kernel without tcpip support).

The issues is we will need to ensure the returned socket works with
SIOCGIFNAME (maybe by issuing a ioctl to make it sure).
  
Florian Weimer June 18, 2018, 5:27 p.m. UTC | #8
* Adhemerval Zanella:

>> The real question is whether it is important to avoid module loading.
>> If we could just try
>> 
>> AF_NETLINK
>> AF_UNIX
>> AF_INET
>> AF_INET6
>> 
>> in this order, irrespective of the module load status, the code would
>> already be much, much simpler.
>> 
>
> I think it should cover the expected kernel support (it would be unusual to
> use the getifaddr functions with a kernel without tcpip support).
>
> The issues is we will need to ensure the returned socket works with
> SIOCGIFNAME (maybe by issuing a ioctl to make it sure).

Hmm, yes.  For AF_NETLINK, such a check would definitely be needed.
I'll have to check if this leads to the desired simplification.
  

Patch

diff --git a/sysdeps/unix/sysv/linux/opensock.c b/sysdeps/unix/sysv/linux/opensock.c
index fd9445bc3b..f5d7322c35 100644
--- a/sysdeps/unix/sysv/linux/opensock.c
+++ b/sysdeps/unix/sysv/linux/opensock.c
@@ -15,100 +15,15 @@ 
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <assert.h>
-#include <errno.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
 #include <sys/socket.h>
+#include <linux/netlink.h>
 
 /* Return a socket of any type.  The socket can be used in subsequent
    ioctl calls to talk to the kernel.  */
 int
 __opensock (void)
 {
-  static int last_family;	/* Available socket family we will use.  */
-  static int last_type;
-  static const struct
-  {
-    int family;
-    const char procname[15];
-  } afs[] =
-    {
-      { AF_UNIX, "net/unix" },
-      { AF_INET, "" },
-      { AF_INET6, "net/if_inet6" },
-      { AF_AX25, "net/ax25" },
-      { AF_NETROM, "net/nr" },
-      { AF_ROSE, "net/rose" },
-      { AF_IPX, "net/ipx" },
-      { AF_APPLETALK, "net/appletalk" },
-      { AF_ECONET, "sys/net/econet" },
-      { AF_ASH, "sys/net/ash" },
-      { AF_X25, "net/x25" },
-#ifdef NEED_AF_IUCV
-      { AF_IUCV, "net/iucv" }
-#endif
-    };
-#define nafs (sizeof (afs) / sizeof (afs[0]))
-  char fname[sizeof "/proc/" + 14];
-  int result;
-  int has_proc;
-  size_t cnt;
-
-  /* We already know which family to use from the last call.  Use it
-     again.  */
-  if (last_family != 0)
-    {
-      assert (last_type != 0);
-
-      result = __socket (last_family, last_type | SOCK_CLOEXEC, 0);
-      if (result != -1 || errno != EAFNOSUPPORT)
-	/* Maybe the socket type isn't supported anymore (module is
-	   unloaded).  In this case again try to find the type.  */
-	return result;
-
-      /* Reset the values.  They seem not valid anymore.  */
-      last_family = 0;
-      last_type = 0;
-    }
-
-  /* Check whether the /proc filesystem is available.  */
-  has_proc = __access ("/proc/net", R_OK) != -1;
-  strcpy (fname, "/proc/");
-
-  /* Iterate over the interface families and find one which is
-     available.  */
-  for (cnt = 0; cnt < nafs; ++cnt)
-    {
-      int type = SOCK_DGRAM;
-
-      if (has_proc && afs[cnt].procname[0] != '\0')
-	{
-	  strcpy (fname + 6, afs[cnt].procname);
-	  if (__access (fname, R_OK) == -1)
-	    /* The /proc entry is not available.  I.e., we cannot
-	       create a socket of this type (without loading the
-	       module).  Don't look for it since this might trigger
-	       loading the module.  */
-	    continue;
-	}
-
-      if (afs[cnt].family == AF_NETROM || afs[cnt].family == AF_X25)
-	type = SOCK_SEQPACKET;
-
-      result = __socket (afs[cnt].family, type | SOCK_CLOEXEC, 0);
-      if (result != -1)
-	{
-	  /* Found an available family.  */
-	  last_type = type;
-	  last_family = afs[cnt].family;
-	  return result;
-	}
-    }
-
-  /* None of the protocol families is available.  It is unclear what kind
-     of error is returned.  ENOENT seems like a reasonable choice.  */
-  __set_errno (ENOENT);
-  return -1;
+  /* Netlink support is always part of the kernel, so we just use a
+     netlink socket.  */
+  return __socket (PF_NETLINK, SOCK_DGRAM | SOCK_CLOEXEC, NETLINK_ROUTE);
 }
diff --git a/sysdeps/unix/sysv/linux/s390/opensock.c b/sysdeps/unix/sysv/linux/s390/opensock.c
deleted file mode 100644
index f099d651ff..0000000000
--- a/sysdeps/unix/sysv/linux/s390/opensock.c
+++ /dev/null
@@ -1,2 +0,0 @@ 
-#define NEED_AF_IUCV 1
-#include "../opensock.c"