[v5,4/5] x86-64: Add the clone3 wrapper

Message ID 20210515123442.1432385-5-hjl.tools@gmail.com
State Superseded
Headers
Series Add an internal wrapper for clone, clone2 and clone3 |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

H.J. Lu May 15, 2021, 12:34 p.m. UTC
  extern int clone3 (struct clone_args *__cl_args,
		   int (*__func) (void *__arg), void *__arg);
---
 sysdeps/unix/sysv/linux/x86_64/clone3.S | 92 +++++++++++++++++++++++++
 sysdeps/unix/sysv/linux/x86_64/sysdep.h |  2 +
 2 files changed, 94 insertions(+)
 create mode 100644 sysdeps/unix/sysv/linux/x86_64/clone3.S
  

Comments

Florian Weimer May 20, 2021, 2:53 p.m. UTC | #1
* H. J. Lu:

> extern int clone3 (struct clone_args *__cl_args,
> 		   int (*__func) (void *__arg), void *__arg);
> ---
>  sysdeps/unix/sysv/linux/x86_64/clone3.S | 92 +++++++++++++++++++++++++
>  sysdeps/unix/sysv/linux/x86_64/sysdep.h |  2 +
>  2 files changed, 94 insertions(+)
>  create mode 100644 sysdeps/unix/sysv/linux/x86_64/clone3.S
>
> diff --git a/sysdeps/unix/sysv/linux/x86_64/clone3.S b/sysdeps/unix/sysv/linux/x86_64/clone3.S
> new file mode 100644
> index 0000000000..f7d4036a6a

> +        .text
> +ENTRY (__clone3)
> +	/* Sanity check arguments.  */
> +	movq	$-EINVAL, %rax
> +	testq	%rdi, %rdi		/* No NULL cl_args pointer.  */
> +	jz	SYSCALL_ERROR_LABEL
> +	testq	%rsi, %rsi		/* No NULL function pointer.  */
> +	jz	SYSCALL_ERROR_LABEL

I think some of these register aren't x32-compatible.  Isn't the upper
half undefined?

> +	/* Save the function pointer in R8 which is preserved by the
> +	   syscall.  */
> +	movq	%rsi, %r8
> +
> +	/* Put sizeof (struct clone_args) in ESI.  */
> +	movl	$CLONE_ARGS_SIZE , %esi

If this is in preparation of the public wrapper, this should actually be
an argument.  Sorry didn't realize this was the direction.

> +L(thread_start):
> +	cfi_startproc
> +	/* Clearing frame pointer is insufficient, use CFI.  */
> +	cfi_undefined (rip)
> +	/* Clear the frame pointer.  The ABI suggests this be done, to mark
> +	   the outermost frame obviously.  */
> +	xorl	%ebp, %ebp
> +
> +	/* Set up arguments for the function call.  */
> +	movq	%rdx, %rdi	/* Argument.  */
> +	call	*%r8		/* Call function.  */
> +	/* Call exit with return value from function call. */
> +	movq	%rax, %rdi
> +	movl	$SYS_ify(exit), %eax
> +	syscall
> +	cfi_endproc
> +
> +	cfi_startproc
> +PSEUDO_END (__clone3)

If this is a public wrapper, should it round up %rsp to 16 bytes
at the point of the caller, to follow the x86-64 calling convention?

Thanks,
Florian
  
Noah Goldstein May 20, 2021, 6:35 p.m. UTC | #2
On Sat, May 15, 2021 at 9:23 AM H.J. Lu via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> extern int clone3 (struct clone_args *__cl_args,
>                    int (*__func) (void *__arg), void *__arg);
> ---
>  sysdeps/unix/sysv/linux/x86_64/clone3.S | 92 +++++++++++++++++++++++++
>  sysdeps/unix/sysv/linux/x86_64/sysdep.h |  2 +
>  2 files changed, 94 insertions(+)
>  create mode 100644 sysdeps/unix/sysv/linux/x86_64/clone3.S
>
> diff --git a/sysdeps/unix/sysv/linux/x86_64/clone3.S b/sysdeps/unix/sysv/linux/x86_64/clone3.S
> new file mode 100644
> index 0000000000..f7d4036a6a
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/x86_64/clone3.S
> @@ -0,0 +1,92 @@
> +/* The clone3 syscall wrapper.  Linux/x86-64 version.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +/* clone3() is even more special than fork() as it mucks with stacks
> +   and invokes a function in the right context after its all over.  */
> +
> +#include <sysdep.h>
> +#include <clone-offsets.h>
> +
> +/* The userland implementation is:
> +   int clone3 (struct clone_args *cl_args, int (*func)(void *arg),
> +              void *arg);
> +   the kernel entry is:
> +   int clone3 (struct clone_args *cl_args, size_t size);
> +
> +   The parameters are passed in registers from userland:
> +   rdi: cl_args
> +   rsi: func
> +   rdx: arg
> +
> +   The kernel expects:
> +   rax: system call number
> +   rdi: cl_args
> +   rsi: size  */
> +
> +        .text
> +ENTRY (__clone3)
> +       /* Sanity check arguments.  */
> +       movq    $-EINVAL, %rax

Can this be movl?

> +       testq   %rdi, %rdi              /* No NULL cl_args pointer.  */
> +       jz      SYSCALL_ERROR_LABEL
> +       testq   %rsi, %rsi              /* No NULL function pointer.  */
> +       jz      SYSCALL_ERROR_LABEL
> +
> +       /* Save the function pointer in R8 which is preserved by the
> +          syscall.  */
> +       movq    %rsi, %r8
> +
> +       /* Put sizeof (struct clone_args) in ESI.  */
> +       movl    $CLONE_ARGS_SIZE , %esi
> +
> +       /* Do the system call.  */
> +       movl    $SYS_ify(clone3), %eax
> +
> +       /* End FDE now, because in the child the unwind info will be
> +          wrong.  */
> +       cfi_endproc
> +       syscall
> +
> +       test    %RAX_LP, %RAX_LP
> +       jl      SYSCALL_ERROR_LABEL
> +       jz      L(thread_start)
> +

Is expectation to go to L(thread_start)? If so
think jnz L(ret) and fallthrough is probably
better.

> +       ret
> +
> +L(thread_start):
> +       cfi_startproc
> +       /* Clearing frame pointer is insufficient, use CFI.  */
> +       cfi_undefined (rip)
> +       /* Clear the frame pointer.  The ABI suggests this be done, to mark
> +          the outermost frame obviously.  */
> +       xorl    %ebp, %ebp
> +
> +       /* Set up arguments for the function call.  */
> +       movq    %rdx, %rdi      /* Argument.  */
> +       call    *%r8            /* Call function.  */
> +       /* Call exit with return value from function call. */
> +       movq    %rax, %rdi
> +       movl    $SYS_ify(exit), %eax
> +       syscall
> +       cfi_endproc
> +
> +       cfi_startproc
> +PSEUDO_END (__clone3)
> +
> +libc_hidden_def (__clone3)
> +weak_alias (__clone3, clone3)
> diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> index dbad2c788a..f26ffc68ae 100644
> --- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> +++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> @@ -377,6 +377,8 @@
>  # define HAVE_GETCPU_VSYSCALL          "__vdso_getcpu"
>  # define HAVE_CLOCK_GETRES64_VSYSCALL   "__vdso_clock_getres"
>
> +# define HAVE_CLONE3_WAPPER                    1
> +
>  # define SINGLE_THREAD_BY_GLOBAL               1
>
>  #endif /* __ASSEMBLER__ */
> --
> 2.31.1
>
  
Noah Goldstein May 20, 2021, 6:39 p.m. UTC | #3
On Thu, May 20, 2021 at 2:35 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Sat, May 15, 2021 at 9:23 AM H.J. Lu via Libc-alpha
> <libc-alpha@sourceware.org> wrote:
> >
> > extern int clone3 (struct clone_args *__cl_args,
> >                    int (*__func) (void *__arg), void *__arg);
> > ---
> >  sysdeps/unix/sysv/linux/x86_64/clone3.S | 92 +++++++++++++++++++++++++
> >  sysdeps/unix/sysv/linux/x86_64/sysdep.h |  2 +
> >  2 files changed, 94 insertions(+)
> >  create mode 100644 sysdeps/unix/sysv/linux/x86_64/clone3.S
> >
> > diff --git a/sysdeps/unix/sysv/linux/x86_64/clone3.S b/sysdeps/unix/sysv/linux/x86_64/clone3.S
> > new file mode 100644
> > index 0000000000..f7d4036a6a
> > --- /dev/null
> > +++ b/sysdeps/unix/sysv/linux/x86_64/clone3.S
> > @@ -0,0 +1,92 @@
> > +/* The clone3 syscall wrapper.  Linux/x86-64 version.
> > +   Copyright (C) 2021 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <https://www.gnu.org/licenses/>.  */
> > +
> > +/* clone3() is even more special than fork() as it mucks with stacks
> > +   and invokes a function in the right context after its all over.  */
> > +
> > +#include <sysdep.h>
> > +#include <clone-offsets.h>
> > +
> > +/* The userland implementation is:
> > +   int clone3 (struct clone_args *cl_args, int (*func)(void *arg),
> > +              void *arg);
> > +   the kernel entry is:
> > +   int clone3 (struct clone_args *cl_args, size_t size);
> > +
> > +   The parameters are passed in registers from userland:
> > +   rdi: cl_args
> > +   rsi: func
> > +   rdx: arg
> > +
> > +   The kernel expects:
> > +   rax: system call number
> > +   rdi: cl_args
> > +   rsi: size  */
> > +
> > +        .text
> > +ENTRY (__clone3)
> > +       /* Sanity check arguments.  */
> > +       movq    $-EINVAL, %rax
>
> Can this be movl?
>
> > +       testq   %rdi, %rdi              /* No NULL cl_args pointer.  */
> > +       jz      SYSCALL_ERROR_LABEL
> > +       testq   %rsi, %rsi              /* No NULL function pointer.  */
> > +       jz      SYSCALL_ERROR_LABEL
> > +
> > +       /* Save the function pointer in R8 which is preserved by the
> > +          syscall.  */
> > +       movq    %rsi, %r8
> > +
> > +       /* Put sizeof (struct clone_args) in ESI.  */
> > +       movl    $CLONE_ARGS_SIZE , %esi
> > +
> > +       /* Do the system call.  */
> > +       movl    $SYS_ify(clone3), %eax
> > +
> > +       /* End FDE now, because in the child the unwind info will be
> > +          wrong.  */
> > +       cfi_endproc
> > +       syscall
> > +
> > +       test    %RAX_LP, %RAX_LP
> > +       jl      SYSCALL_ERROR_LABEL
> > +       jz      L(thread_start)
> > +
>
> Is expectation to go to L(thread_start)? If so
> think jnz L(ret) and fallthrough is probably
> better.

Or better take the error check branch off
the critical path with jnz L(error_or_ret) then jl
in L(error_or_ret)

>
> > +       ret
> > +
> > +L(thread_start):
> > +       cfi_startproc
> > +       /* Clearing frame pointer is insufficient, use CFI.  */
> > +       cfi_undefined (rip)
> > +       /* Clear the frame pointer.  The ABI suggests this be done, to mark
> > +          the outermost frame obviously.  */
> > +       xorl    %ebp, %ebp
> > +
> > +       /* Set up arguments for the function call.  */
> > +       movq    %rdx, %rdi      /* Argument.  */
> > +       call    *%r8            /* Call function.  */
> > +       /* Call exit with return value from function call. */
> > +       movq    %rax, %rdi
> > +       movl    $SYS_ify(exit), %eax
> > +       syscall
> > +       cfi_endproc
> > +
> > +       cfi_startproc
> > +PSEUDO_END (__clone3)
> > +
> > +libc_hidden_def (__clone3)
> > +weak_alias (__clone3, clone3)
> > diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> > index dbad2c788a..f26ffc68ae 100644
> > --- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> > +++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> > @@ -377,6 +377,8 @@
> >  # define HAVE_GETCPU_VSYSCALL          "__vdso_getcpu"
> >  # define HAVE_CLOCK_GETRES64_VSYSCALL   "__vdso_clock_getres"
> >
> > +# define HAVE_CLONE3_WAPPER                    1
> > +
> >  # define SINGLE_THREAD_BY_GLOBAL               1
> >
> >  #endif /* __ASSEMBLER__ */
> > --
> > 2.31.1
> >
  
H.J. Lu May 22, 2021, 1:38 a.m. UTC | #4
On Thu, May 20, 2021 at 7:53 AM Florian Weimer <fweimer@redhat.com> wrote:
>
> * H. J. Lu:
>
> > extern int clone3 (struct clone_args *__cl_args,
> >                  int (*__func) (void *__arg), void *__arg);
> > ---
> >  sysdeps/unix/sysv/linux/x86_64/clone3.S | 92 +++++++++++++++++++++++++
> >  sysdeps/unix/sysv/linux/x86_64/sysdep.h |  2 +
> >  2 files changed, 94 insertions(+)
> >  create mode 100644 sysdeps/unix/sysv/linux/x86_64/clone3.S
> >
> > diff --git a/sysdeps/unix/sysv/linux/x86_64/clone3.S b/sysdeps/unix/sysv/linux/x86_64/clone3.S
> > new file mode 100644
> > index 0000000000..f7d4036a6a
>
> > +        .text
> > +ENTRY (__clone3)
> > +     /* Sanity check arguments.  */
> > +     movq    $-EINVAL, %rax
> > +     testq   %rdi, %rdi              /* No NULL cl_args pointer.  */
> > +     jz      SYSCALL_ERROR_LABEL
> > +     testq   %rsi, %rsi              /* No NULL function pointer.  */
> > +     jz      SYSCALL_ERROR_LABEL
>
> I think some of these register aren't x32-compatible.  Isn't the upper
> half undefined?

All pointers passed in registers are zero-extended to 64 bits.
I changed it to use REG_LP macros to avoid the REX prefix.

> > +     /* Save the function pointer in R8 which is preserved by the
> > +        syscall.  */
> > +     movq    %rsi, %r8
> > +
> > +     /* Put sizeof (struct clone_args) in ESI.  */
> > +     movl    $CLONE_ARGS_SIZE , %esi
>
> If this is in preparation of the public wrapper, this should actually be
> an argument.  Sorry didn't realize this was the direction.

Fixed.

> > +L(thread_start):
> > +     cfi_startproc
> > +     /* Clearing frame pointer is insufficient, use CFI.  */
> > +     cfi_undefined (rip)
> > +     /* Clear the frame pointer.  The ABI suggests this be done, to mark
> > +        the outermost frame obviously.  */
> > +     xorl    %ebp, %ebp
> > +
> > +     /* Set up arguments for the function call.  */
> > +     movq    %rdx, %rdi      /* Argument.  */
> > +     call    *%r8            /* Call function.  */
> > +     /* Call exit with return value from function call. */
> > +     movq    %rax, %rdi
> > +     movl    $SYS_ify(exit), %eax
> > +     syscall
> > +     cfi_endproc
> > +
> > +     cfi_startproc
> > +PSEUDO_END (__clone3)
>
> If this is a public wrapper, should it round up %rsp to 16 bytes

Fixed.

> at the point of the caller, to follow the x86-64 calling convention?
>
> Thanks,
> Florian
>

Thanks.
  
H.J. Lu May 22, 2021, 1:52 a.m. UTC | #5
On Thu, May 20, 2021 at 11:39 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Thu, May 20, 2021 at 2:35 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > On Sat, May 15, 2021 at 9:23 AM H.J. Lu via Libc-alpha
> > <libc-alpha@sourceware.org> wrote:
> > >
> > > extern int clone3 (struct clone_args *__cl_args,
> > >                    int (*__func) (void *__arg), void *__arg);
> > > ---
> > >  sysdeps/unix/sysv/linux/x86_64/clone3.S | 92 +++++++++++++++++++++++++
> > >  sysdeps/unix/sysv/linux/x86_64/sysdep.h |  2 +
> > >  2 files changed, 94 insertions(+)
> > >  create mode 100644 sysdeps/unix/sysv/linux/x86_64/clone3.S
> > >
> > > diff --git a/sysdeps/unix/sysv/linux/x86_64/clone3.S b/sysdeps/unix/sysv/linux/x86_64/clone3.S
> > > new file mode 100644
> > > index 0000000000..f7d4036a6a
> > > --- /dev/null
> > > +++ b/sysdeps/unix/sysv/linux/x86_64/clone3.S
> > > @@ -0,0 +1,92 @@
> > > +/* The clone3 syscall wrapper.  Linux/x86-64 version.
> > > +   Copyright (C) 2021 Free Software Foundation, Inc.
> > > +   This file is part of the GNU C Library.
> > > +
> > > +   The GNU C Library is free software; you can redistribute it and/or
> > > +   modify it under the terms of the GNU Lesser General Public
> > > +   License as published by the Free Software Foundation; either
> > > +   version 2.1 of the License, or (at your option) any later version.
> > > +
> > > +   The GNU C Library is distributed in the hope that it will be useful,
> > > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > > +   Lesser General Public License for more details.
> > > +
> > > +   You should have received a copy of the GNU Lesser General Public
> > > +   License along with the GNU C Library; if not, see
> > > +   <https://www.gnu.org/licenses/>.  */
> > > +
> > > +/* clone3() is even more special than fork() as it mucks with stacks
> > > +   and invokes a function in the right context after its all over.  */
> > > +
> > > +#include <sysdep.h>
> > > +#include <clone-offsets.h>
> > > +
> > > +/* The userland implementation is:
> > > +   int clone3 (struct clone_args *cl_args, int (*func)(void *arg),
> > > +              void *arg);
> > > +   the kernel entry is:
> > > +   int clone3 (struct clone_args *cl_args, size_t size);
> > > +
> > > +   The parameters are passed in registers from userland:
> > > +   rdi: cl_args
> > > +   rsi: func
> > > +   rdx: arg
> > > +
> > > +   The kernel expects:
> > > +   rax: system call number
> > > +   rdi: cl_args
> > > +   rsi: size  */
> > > +
> > > +        .text
> > > +ENTRY (__clone3)
> > > +       /* Sanity check arguments.  */
> > > +       movq    $-EINVAL, %rax
> >
> > Can this be movl?

Yes.  Fixed.

> > > +       testq   %rdi, %rdi              /* No NULL cl_args pointer.  */
> > > +       jz      SYSCALL_ERROR_LABEL
> > > +       testq   %rsi, %rsi              /* No NULL function pointer.  */
> > > +       jz      SYSCALL_ERROR_LABEL
> > > +
> > > +       /* Save the function pointer in R8 which is preserved by the
> > > +          syscall.  */
> > > +       movq    %rsi, %r8
> > > +
> > > +       /* Put sizeof (struct clone_args) in ESI.  */
> > > +       movl    $CLONE_ARGS_SIZE , %esi
> > > +
> > > +       /* Do the system call.  */
> > > +       movl    $SYS_ify(clone3), %eax
> > > +
> > > +       /* End FDE now, because in the child the unwind info will be
> > > +          wrong.  */
> > > +       cfi_endproc
> > > +       syscall
> > > +
> > > +       test    %RAX_LP, %RAX_LP
> > > +       jl      SYSCALL_ERROR_LABEL
> > > +       jz      L(thread_start)
> > > +
> >
> > Is expectation to go to L(thread_start)? If so
> > think jnz L(ret) and fallthrough is probably
> > better.
>
> Or better take the error check branch off
> the critical path with jnz L(error_or_ret) then jl
> in L(error_or_ret)

I don't think the clone wrapper is on the critical path.
Since the same code is executed by both child and parent.
I check the error return first.

> >
> > > +       ret
> > > +
> > > +L(thread_start):
> > > +       cfi_startproc
> > > +       /* Clearing frame pointer is insufficient, use CFI.  */
> > > +       cfi_undefined (rip)
> > > +       /* Clear the frame pointer.  The ABI suggests this be done, to mark
> > > +          the outermost frame obviously.  */
> > > +       xorl    %ebp, %ebp
> > > +
> > > +       /* Set up arguments for the function call.  */
> > > +       movq    %rdx, %rdi      /* Argument.  */
> > > +       call    *%r8            /* Call function.  */
> > > +       /* Call exit with return value from function call. */
> > > +       movq    %rax, %rdi
> > > +       movl    $SYS_ify(exit), %eax
> > > +       syscall
> > > +       cfi_endproc
> > > +
> > > +       cfi_startproc
> > > +PSEUDO_END (__clone3)
> > > +
> > > +libc_hidden_def (__clone3)
> > > +weak_alias (__clone3, clone3)
> > > diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> > > index dbad2c788a..f26ffc68ae 100644
> > > --- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> > > +++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
> > > @@ -377,6 +377,8 @@
> > >  # define HAVE_GETCPU_VSYSCALL          "__vdso_getcpu"
> > >  # define HAVE_CLOCK_GETRES64_VSYSCALL   "__vdso_clock_getres"
> > >
> > > +# define HAVE_CLONE3_WAPPER                    1
> > > +
> > >  # define SINGLE_THREAD_BY_GLOBAL               1
> > >
> > >  #endif /* __ASSEMBLER__ */
> > > --
> > > 2.31.1
> > >

Thanks.
  

Patch

diff --git a/sysdeps/unix/sysv/linux/x86_64/clone3.S b/sysdeps/unix/sysv/linux/x86_64/clone3.S
new file mode 100644
index 0000000000..f7d4036a6a
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86_64/clone3.S
@@ -0,0 +1,92 @@ 
+/* The clone3 syscall wrapper.  Linux/x86-64 version.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* clone3() is even more special than fork() as it mucks with stacks
+   and invokes a function in the right context after its all over.  */
+
+#include <sysdep.h>
+#include <clone-offsets.h>
+
+/* The userland implementation is:
+   int clone3 (struct clone_args *cl_args, int (*func)(void *arg),
+	       void *arg);
+   the kernel entry is:
+   int clone3 (struct clone_args *cl_args, size_t size);
+
+   The parameters are passed in registers from userland:
+   rdi: cl_args
+   rsi: func
+   rdx: arg
+
+   The kernel expects:
+   rax: system call number
+   rdi: cl_args
+   rsi: size  */
+
+        .text
+ENTRY (__clone3)
+	/* Sanity check arguments.  */
+	movq	$-EINVAL, %rax
+	testq	%rdi, %rdi		/* No NULL cl_args pointer.  */
+	jz	SYSCALL_ERROR_LABEL
+	testq	%rsi, %rsi		/* No NULL function pointer.  */
+	jz	SYSCALL_ERROR_LABEL
+
+	/* Save the function pointer in R8 which is preserved by the
+	   syscall.  */
+	movq	%rsi, %r8
+
+	/* Put sizeof (struct clone_args) in ESI.  */
+	movl	$CLONE_ARGS_SIZE , %esi
+
+	/* Do the system call.  */
+	movl	$SYS_ify(clone3), %eax
+
+	/* End FDE now, because in the child the unwind info will be
+	   wrong.  */
+	cfi_endproc
+	syscall
+
+	test	%RAX_LP, %RAX_LP
+	jl	SYSCALL_ERROR_LABEL
+	jz	L(thread_start)
+
+	ret
+
+L(thread_start):
+	cfi_startproc
+	/* Clearing frame pointer is insufficient, use CFI.  */
+	cfi_undefined (rip)
+	/* Clear the frame pointer.  The ABI suggests this be done, to mark
+	   the outermost frame obviously.  */
+	xorl	%ebp, %ebp
+
+	/* Set up arguments for the function call.  */
+	movq	%rdx, %rdi	/* Argument.  */
+	call	*%r8		/* Call function.  */
+	/* Call exit with return value from function call. */
+	movq	%rax, %rdi
+	movl	$SYS_ify(exit), %eax
+	syscall
+	cfi_endproc
+
+	cfi_startproc
+PSEUDO_END (__clone3)
+
+libc_hidden_def (__clone3)
+weak_alias (__clone3, clone3)
diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
index dbad2c788a..f26ffc68ae 100644
--- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
+++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
@@ -377,6 +377,8 @@ 
 # define HAVE_GETCPU_VSYSCALL		"__vdso_getcpu"
 # define HAVE_CLOCK_GETRES64_VSYSCALL   "__vdso_clock_getres"
 
+# define HAVE_CLONE3_WAPPER			1
+
 # define SINGLE_THREAD_BY_GLOBAL		1
 
 #endif	/* __ASSEMBLER__ */