Simplify sendmmsg code

Message ID alpine.DEB.2.20.1705091552540.16186@digraph.polyomino.org.uk
State New, archived
Headers

Commit Message

Joseph Myers May 9, 2017, 3:54 p.m. UTC
  Now we can assume a kernel with sendmmsg support, this patch
simplifies the implementation to be similar to that for accept4:
either using socketcall or the syscall according to whether the
syscall is known to be available, without further fallback
implementations.  The __ASSUME_SENDMMSG macro is kept (now defined
unconditionally), since it's used in resolv/res_send.c.

Tested for x86_64 and x86.

2017-05-09  Joseph Myers  <joseph@codesourcery.com>

	* sysdeps/unix/sysv/linux/kernel-features.h
	(__ASSUME_SENDMMSG_SYSCALL): Define unconditionally.
	(__ASSUME_SENDMMSG): Likewise.
	(__ASSUME_SENDMMSG_SOCKETCALL): Remove macro.
	* sysdeps/unix/sysv/linux/sendmmsg.c (__sendmmsg): Define using
	sendmmsg syscall if that can be assumed to be present, socketcall
	otherwise, with no fallback for runtime failure.
  

Comments

Adhemerval Zanella May 9, 2017, 6:41 p.m. UTC | #1
On 09/05/2017 12:54, Joseph Myers wrote:
> Now we can assume a kernel with sendmmsg support, this patch
> simplifies the implementation to be similar to that for accept4:
> either using socketcall or the syscall according to whether the
> syscall is known to be available, without further fallback
> implementations.  The __ASSUME_SENDMMSG macro is kept (now defined
> unconditionally), since it's used in resolv/res_send.c.

I think we can add a sysdeps/posix sendmmsg based on sendmsg so we can
safely remove this macro usage on resolv code.  I will work on this
after your patch inclusion.

> 
> Tested for x86_64 and x86.
> 
> 2017-05-09  Joseph Myers  <joseph@codesourcery.com>
> 
> 	* sysdeps/unix/sysv/linux/kernel-features.h
> 	(__ASSUME_SENDMMSG_SYSCALL): Define unconditionally.
> 	(__ASSUME_SENDMMSG): Likewise.
> 	(__ASSUME_SENDMMSG_SOCKETCALL): Remove macro.
> 	* sysdeps/unix/sysv/linux/sendmmsg.c (__sendmmsg): Define using
> 	sendmmsg syscall if that can be assumed to be present, socketcall
> 	otherwise, with no fallback for runtime failure.

LGTM, thanks.

> 
> diff --git a/sysdeps/unix/sysv/linux/kernel-features.h b/sysdeps/unix/sysv/linux/kernel-features.h
> index 74ac627..c9212b4 100644
> --- a/sysdeps/unix/sysv/linux/kernel-features.h
> +++ b/sysdeps/unix/sysv/linux/kernel-features.h
> @@ -105,13 +105,8 @@
>  
>  /* Support for sendmmsg functionality was added in 3.0.  The macros
>     defined correspond to those for accept4 and recvmmsg.  */
> -#if __LINUX_KERNEL_VERSION >= 0x030000
> -# ifdef __ASSUME_SOCKETCALL
> -#  define __ASSUME_SENDMMSG_SOCKETCALL	1
> -# endif
> -# define __ASSUME_SENDMMSG_SYSCALL	1
> -# define __ASSUME_SENDMMSG	1
> -#endif
> +#define __ASSUME_SENDMMSG_SYSCALL	1
> +#define __ASSUME_SENDMMSG	1
>  
>  /* On most architectures, most socket syscalls are supported for all
>     supported kernel versions, but on some socketcall architectures
> diff --git a/sysdeps/unix/sysv/linux/sendmmsg.c b/sysdeps/unix/sysv/linux/sendmmsg.c
> index 2dd0eba..c559623 100644
> --- a/sysdeps/unix/sysv/linux/sendmmsg.c
> +++ b/sysdeps/unix/sysv/linux/sendmmsg.c
> @@ -21,73 +21,22 @@
>  
>  #include <sysdep-cancel.h>
>  #include <sys/syscall.h>
> +#include <socketcall.h>
>  #include <kernel-features.h>
>  
> -/* Do not use the sendmmsg syscall on socketcall architectures unless
> -   it was added at the same time as the socketcall support or can be
> -   assumed to be present.  */
> -#if defined __ASSUME_SOCKETCALL \
> -    && !defined __ASSUME_SENDMMSG_SYSCALL_WITH_SOCKETCALL \
> -    && !defined __ASSUME_SENDMMSG_SYSCALL
> -# undef __NR_sendmmsg
> -#endif
> -
> -#ifdef __NR_sendmmsg
> -int
> -__sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags)
> -{
> -  return SYSCALL_CANCEL (sendmmsg, fd, vmessages, vlen, flags);
> -}
> -libc_hidden_def (__sendmmsg)
> -weak_alias (__sendmmsg, sendmmsg)
> -#elif defined __NR_socketcall
> -# include <socketcall.h>
> -# ifdef __ASSUME_SENDMMSG_SOCKETCALL
>  int
>  __sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags)
>  {
> +  /* Do not use the sendmmsg syscall on socketcall architectures unless
> +     it was added at the same time as the socketcall support or can be
> +     assumed to be present.  */
> +#if defined __ASSUME_SOCKETCALL \
> +    && !defined __ASSUME_SENDMMSG_SYSCALL_WITH_SOCKETCALL \
> +    && !defined __ASSUME_SENDMMSG_SYSCALL
>    return SOCKETCALL_CANCEL (sendmmsg, fd, vmessages, vlen, flags);
> +#else
> +  return SYSCALL_CANCEL (sendmmsg, fd, vmessages, vlen, flags);
> +#endif
>  }
> -# else
> -static int have_sendmmsg;
> -
> -int
> -__sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags)
> -{
> -  if (__glibc_likely (have_sendmmsg >= 0))
> -    {
> -      int ret = SOCKETCALL_CANCEL (sendmmsg, fd, vmessages, vlen, flags);
> -      /* The kernel returns -EINVAL for unknown socket operations.
> -	 We need to convert that error to an ENOSYS error.  */
> -      if (__builtin_expect (ret < 0, 0)
> -	  && have_sendmmsg == 0
> -	  && errno == EINVAL)
> -	{
> -	  /* Try another call, this time with an invalid file
> -	     descriptor and all other parameters cleared.  This call
> -	     will not cause any harm and it will return
> -	     immediately.  */
> -	  ret = SOCKETCALL_CANCEL (invalid, -1);
> -	  if (errno == EINVAL)
> -	    {
> -	      have_sendmmsg = -1;
> -	      __set_errno (ENOSYS);
> -	    }
> -	  else
> -	    {
> -	      have_sendmmsg = 1;
> -	      __set_errno (EINVAL);
> -	    }
> -	  return -1;
> -	}
> -      return ret;
> -    }
> -  __set_errno (ENOSYS);
> -  return -1;
> -}
> -# endif /* __ASSUME_SENDMMSG_SOCKETCALL  */
>  libc_hidden_def (__sendmmsg)
>  weak_alias (__sendmmsg, sendmmsg)
> -#else
> -# include <socket/sendmmsg.c>
> -#endif
>
  
Florian Weimer May 9, 2017, 8:28 p.m. UTC | #2
On 05/09/2017 08:41 PM, Adhemerval Zanella wrote:
> I think we can add a sysdeps/posix sendmmsg based on sendmsg so we can
> safely remove this macro usage on resolv code.  I will work on this
> after your patch inclusion.

The use of sendmmsg in the resolv code is problematic.  We need to use 
two separate sockets with different source ports to improve 
compatibility anyway.  This might also help firewalls with tearing down 
state more quickly (single request/response very likely does not bump 
the UDP timeout, but two requests and two responses might switch into 
stream mode with a prolonged timeout; maybe that's why we originally hit 
the Netfilter table overflow in some of the glibc resolver tests).

I'm not sure how quickly I will get to fixing this (I hope for 2.26), so 
please do whatever you feel is necessary in the meantime.

Thanks,
Florian
  
Adhemerval Zanella May 9, 2017, 8:58 p.m. UTC | #3
On 09/05/2017 17:28, Florian Weimer wrote:
> On 05/09/2017 08:41 PM, Adhemerval Zanella wrote:
>> I think we can add a sysdeps/posix sendmmsg based on sendmsg so we can
>> safely remove this macro usage on resolv code.  I will work on this
>> after your patch inclusion.
> 
> The use of sendmmsg in the resolv code is problematic.  We need to use two separate sockets with different source ports to improve compatibility anyway.  This might also help firewalls with tearing down state more quickly (single request/response very likely does not bump the UDP timeout, but two requests and two responses might switch into stream mode with a prolonged timeout; maybe that's why we originally hit the Netfilter table overflow in some of the glibc resolver tests).
> 
> I'm not sure how quickly I will get to fixing this (I hope for 2.26), so please do whatever you feel is necessary in the meantime.
> 
> Thanks,
> Florian

So if I understood correctly, the idea is to *remove* sendmmsg on resolv code then?
Because now with __ASSUME_SENDMMSG being used as default for Linux this code will
be used on all architectures.

In any case, my understanding for sendmmsg is it is a GNU extension (similar to
pwritev), so the idea is to have a fallback usable code on sysdeps/{posix,unix}
to we do not need to actually tests its usability in the rest of glibc code.
That's why I think that if the idea is actually get rid of sendmmsg usage in
resolv, we can add a generic implementation for sendmmsg and then remove its
usage on two cleanup patches.
  
Florian Weimer May 10, 2017, 7:41 a.m. UTC | #4
On 05/09/2017 10:58 PM, Adhemerval Zanella wrote:

> So if I understood correctly, the idea is to *remove* sendmmsg on resolv code then?
> Because now with __ASSUME_SENDMMSG being used as default for Linux this code will
> be used on all architectures.

Yes, once we have two sockets, we need to call sendmsg for each one 
separately.  But that's an *algorithmic* change, it's not simply about 
replacing one call to sendmmsg with two to sendmsg.  But that would be 
fine as an intermediate step as well if you want to simplify things in 
this way.

> In any case, my understanding for sendmmsg is it is a GNU extension (similar to
> pwritev), so the idea is to have a fallback usable code on sysdeps/{posix,unix}
> to we do not need to actually tests its usability in the rest of glibc code.

As far as I understand things, both sendmmsg and pwritev cannot be 
emulated in userspace because the fallback implementation would not have 
the atomicity guarantees provided by the kernel implementation.  IMHO, 
the generic versions should just return ENOSYS (or not linked in at all, 
so that configure scripts can note their absence).

Thanks,
Florian
  
Adhemerval Zanella May 10, 2017, 12:14 p.m. UTC | #5
On 10/05/2017 04:41, Florian Weimer wrote:
> On 05/09/2017 10:58 PM, Adhemerval Zanella wrote:
> 
>> So if I understood correctly, the idea is to *remove* sendmmsg on resolv code then?
>> Because now with __ASSUME_SENDMMSG being used as default for Linux this code will
>> be used on all architectures.
> 
> Yes, once we have two sockets, we need to call sendmsg for each one separately.  But that's an *algorithmic* change, it's not simply about replacing one call to sendmmsg with two to sendmsg.  But that would be fine as an intermediate step as well if you want to simplify things in this way.
> 

Right, so I think we an proceed with the removal of sendmmsg and later change the
two socket utilization.

>> In any case, my understanding for sendmmsg is it is a GNU extension (similar to
>> pwritev), so the idea is to have a fallback usable code on sysdeps/{posix,unix}
>> to we do not need to actually tests its usability in the rest of glibc code.
> 
> As far as I understand things, both sendmmsg and pwritev cannot be emulated in userspace because the fallback implementation would not have the atomicity guarantees provided by the kernel implementation.  IMHO, the generic versions should just return ENOSYS (or not linked in at all, so that configure scripts can note their absence).

Indeed the emulation brings a lot of issue, but it allows us to use the system
specific syscalls on generic code without relying on additional checks for
functionality existence.  However I think we can to Linux optimizations in
generic code less intrusive way if necessary with a different approach, maybe
by implementing the functionally in a more constrained way and reimplement 
in the target system.

What I would like to avoid is multiple build option for the same implementation
which are not actually tested and makes us with non exercised code paths
(in a lot of time dead code).
  
Florian Weimer May 12, 2017, 9:17 a.m. UTC | #6
On 05/10/2017 02:14 PM, Adhemerval Zanella wrote:
> Indeed the emulation brings a lot of issue, but it allows us to use the system
> specific syscalls on generic code without relying on additional checks for
> functionality existence.  However I think we can to Linux optimizations in
> generic code less intrusive way if necessary with a different approach, maybe
> by implementing the functionally in a more constrained way and reimplement
> in the target system.

Yes, optimizations are fine.  But I think it is wrong to expose the 
fallback implementation to applications as an implementation of the 
system call, when in fact the semantics are different.  Every time we do 
this, it comes back to haunt as eventually (see posix_fallocate or 
euidaccess for examples).

Thanks,
Florian
  
Adhemerval Zanella May 12, 2017, 1:52 p.m. UTC | #7
On 12/05/2017 06:17, Florian Weimer wrote:
> On 05/10/2017 02:14 PM, Adhemerval Zanella wrote:
>> Indeed the emulation brings a lot of issue, but it allows us to use the system
>> specific syscalls on generic code without relying on additional checks for
>> functionality existence.  However I think we can to Linux optimizations in
>> generic code less intrusive way if necessary with a different approach, maybe
>> by implementing the functionally in a more constrained way and reimplement
>> in the target system.
> 
> Yes, optimizations are fine.  But I think it is wrong to expose the fallback implementation to applications as an implementation of the system call, when in fact the semantics are different.  Every time we do this, it comes back to haunt as eventually (see posix_fallocate or euidaccess for examples).

I tend to agree with you, but I think we should at least lay ground on how we 
will handle possible future GNU extensions and/or new syscall wrapper.  My 
understanding is we should aim to provide fallback implementation iff the
semantics of the implementation matches the one we aim to emulate, otherwise
we should return ENOTSUP/EINVAL.

That is what I am aiming for p{read,write}v2 and the p{read,write}v.  For 
former my current proposal returns ENOTSUPP for all flags different than
zero and for former we still use p{read,pwrite} with an aligned buffer
(p{read,write} may still be emulated, although not current in Linux).
  
Joseph Myers May 12, 2017, 2:12 p.m. UTC | #8
On Fri, 12 May 2017, Florian Weimer wrote:

> On 05/10/2017 02:14 PM, Adhemerval Zanella wrote:
> > Indeed the emulation brings a lot of issue, but it allows us to use the
> > system
> > specific syscalls on generic code without relying on additional checks for
> > functionality existence.  However I think we can to Linux optimizations in
> > generic code less intrusive way if necessary with a different approach,
> > maybe
> > by implementing the functionally in a more constrained way and reimplement
> > in the target system.
> 
> Yes, optimizations are fine.  But I think it is wrong to expose the fallback
> implementation to applications as an implementation of the system call, when
> in fact the semantics are different.  Every time we do this, it comes back to
> haunt as eventually (see posix_fallocate or euidaccess for examples).

Or the pselect emulation (for Linux, now only used on MicroBlaze); see bug 
9813.
  

Patch

diff --git a/sysdeps/unix/sysv/linux/kernel-features.h b/sysdeps/unix/sysv/linux/kernel-features.h
index 74ac627..c9212b4 100644
--- a/sysdeps/unix/sysv/linux/kernel-features.h
+++ b/sysdeps/unix/sysv/linux/kernel-features.h
@@ -105,13 +105,8 @@ 
 
 /* Support for sendmmsg functionality was added in 3.0.  The macros
    defined correspond to those for accept4 and recvmmsg.  */
-#if __LINUX_KERNEL_VERSION >= 0x030000
-# ifdef __ASSUME_SOCKETCALL
-#  define __ASSUME_SENDMMSG_SOCKETCALL	1
-# endif
-# define __ASSUME_SENDMMSG_SYSCALL	1
-# define __ASSUME_SENDMMSG	1
-#endif
+#define __ASSUME_SENDMMSG_SYSCALL	1
+#define __ASSUME_SENDMMSG	1
 
 /* On most architectures, most socket syscalls are supported for all
    supported kernel versions, but on some socketcall architectures
diff --git a/sysdeps/unix/sysv/linux/sendmmsg.c b/sysdeps/unix/sysv/linux/sendmmsg.c
index 2dd0eba..c559623 100644
--- a/sysdeps/unix/sysv/linux/sendmmsg.c
+++ b/sysdeps/unix/sysv/linux/sendmmsg.c
@@ -21,73 +21,22 @@ 
 
 #include <sysdep-cancel.h>
 #include <sys/syscall.h>
+#include <socketcall.h>
 #include <kernel-features.h>
 
-/* Do not use the sendmmsg syscall on socketcall architectures unless
-   it was added at the same time as the socketcall support or can be
-   assumed to be present.  */
-#if defined __ASSUME_SOCKETCALL \
-    && !defined __ASSUME_SENDMMSG_SYSCALL_WITH_SOCKETCALL \
-    && !defined __ASSUME_SENDMMSG_SYSCALL
-# undef __NR_sendmmsg
-#endif
-
-#ifdef __NR_sendmmsg
-int
-__sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags)
-{
-  return SYSCALL_CANCEL (sendmmsg, fd, vmessages, vlen, flags);
-}
-libc_hidden_def (__sendmmsg)
-weak_alias (__sendmmsg, sendmmsg)
-#elif defined __NR_socketcall
-# include <socketcall.h>
-# ifdef __ASSUME_SENDMMSG_SOCKETCALL
 int
 __sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags)
 {
+  /* Do not use the sendmmsg syscall on socketcall architectures unless
+     it was added at the same time as the socketcall support or can be
+     assumed to be present.  */
+#if defined __ASSUME_SOCKETCALL \
+    && !defined __ASSUME_SENDMMSG_SYSCALL_WITH_SOCKETCALL \
+    && !defined __ASSUME_SENDMMSG_SYSCALL
   return SOCKETCALL_CANCEL (sendmmsg, fd, vmessages, vlen, flags);
+#else
+  return SYSCALL_CANCEL (sendmmsg, fd, vmessages, vlen, flags);
+#endif
 }
-# else
-static int have_sendmmsg;
-
-int
-__sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags)
-{
-  if (__glibc_likely (have_sendmmsg >= 0))
-    {
-      int ret = SOCKETCALL_CANCEL (sendmmsg, fd, vmessages, vlen, flags);
-      /* The kernel returns -EINVAL for unknown socket operations.
-	 We need to convert that error to an ENOSYS error.  */
-      if (__builtin_expect (ret < 0, 0)
-	  && have_sendmmsg == 0
-	  && errno == EINVAL)
-	{
-	  /* Try another call, this time with an invalid file
-	     descriptor and all other parameters cleared.  This call
-	     will not cause any harm and it will return
-	     immediately.  */
-	  ret = SOCKETCALL_CANCEL (invalid, -1);
-	  if (errno == EINVAL)
-	    {
-	      have_sendmmsg = -1;
-	      __set_errno (ENOSYS);
-	    }
-	  else
-	    {
-	      have_sendmmsg = 1;
-	      __set_errno (EINVAL);
-	    }
-	  return -1;
-	}
-      return ret;
-    }
-  __set_errno (ENOSYS);
-  return -1;
-}
-# endif /* __ASSUME_SENDMMSG_SOCKETCALL  */
 libc_hidden_def (__sendmmsg)
 weak_alias (__sendmmsg, sendmmsg)
-#else
-# include <socket/sendmmsg.c>
-#endif