i386: Add the clone3 wrapper
Checks
Commit Message
extern int clone3 (struct clone_args *__cl_args, size_t __size,
int (*__func) (void *__arg), void *__arg);
---
sysdeps/unix/sysv/linux/i386/clone3.S | 123 ++++++++++++++++++++++++++
sysdeps/unix/sysv/linux/i386/sysdep.h | 2 +
2 files changed, 125 insertions(+)
create mode 100644 sysdeps/unix/sysv/linux/i386/clone3.S
Comments
On Wed, Jul 14, 2021 at 9:33 AM H.J. Lu via Libc-alpha <
libc-alpha@sourceware.org> wrote:
> extern int clone3 (struct clone_args *__cl_args, size_t __size,
> int (*__func) (void *__arg), void *__arg);
> ---
> sysdeps/unix/sysv/linux/i386/clone3.S | 123 ++++++++++++++++++++++++++
> sysdeps/unix/sysv/linux/i386/sysdep.h | 2 +
> 2 files changed, 125 insertions(+)
> create mode 100644 sysdeps/unix/sysv/linux/i386/clone3.S
>
> diff --git a/sysdeps/unix/sysv/linux/i386/clone3.S
> b/sysdeps/unix/sysv/linux/i386/clone3.S
> new file mode 100644
> index 0000000000..bef3ce0455
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/i386/clone3.S
> @@ -0,0 +1,123 @@
> +/* The clone3 syscall wrapper. Linux/i386 version.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +/* clone3() is even more special than fork() as it mucks with stacks
> + and invokes a function in the right context after its all over. */
> +
> +#include <sysdep.h>
> +
> +/* The userland implementation is:
> + int clone3 (struct clone_args *cl_args, size_t size,
> + int (*func)(void *arg), void *arg);
> + the kernel entry is:
> + int clone3 (struct clone_args *cl_args, size_t size);
> +
> + The parameters are passed on stack from userland:
> + 16(%esp) arg
> + 12(%esp) func
> + 8(%esp) size
> + 4(%esp) cl_args
> + (%esp) Return address
> +
> + The kernel expects:
> + eax: system call number
> + ebx: cl_args
> + ecx: size
> + */
> +
> +#define CL_ARGS 4
> +#define SIZE 8
> +#define FUNC 12
> +#define ARG 16
> +
> + .text
> +ENTRY (__clone3)
> + /* Sanity check arguments. */
> + movl $-EINVAL, %eax
> + movl CL_ARGS(%esp), %ecx /* No NULL cl_args pointer. */
> + testl %ecx, %ecx
> + jz SYSCALL_ERROR_LABEL
> + /* Save the function pointer in EDX which is preserved by the
> + system call. */
> + movl FUNC(%esp), %edx /* No NULL function pointer. */
> + testl %edx, %edx
> + jz SYSCALL_ERROR_LABEL
> +
> + /* Save EBX and ESI. */
> + pushl %ebx
> + cfi_adjust_cfa_offset (4)
> + pushl %esi
> + cfi_adjust_cfa_offset (4)
> +
> + /* Save the function argument in ESI which is preserved by the
> + system call. */
> + movl (ARG + 8)(%esp), %esi
> +
> + /* Put cl_args in EBX. */
> + movl %ecx, %ebx
> +
> + /* Put size in ECX. */
> + movl (SIZE + 8)(%esp), %ecx
> +
> + /* Do the system call. */
> + movl $SYS_ify(clone3), %eax
> +
> + /* End FDE now, because in the child the unwind info will be
> + wrong. */
> + cfi_endproc
> +
> + int $0x80
> + test %eax, %eax
> + /* No need to restore EBX and ESI in child. */
>
does esp need to be adjusted?
+ jz L(thread_start)
> +
> + /* Restore EBX and ESI in parent. */
> + pop %esi
> + pop %ebx
> + jl SYSCALL_ERROR_LABEL
> +
> + ret
> +
> +L(thread_start):
> + cfi_startproc
> + /* Clearing frame pointer is insufficient, use CFI. */
> + cfi_undefined (eip)
> + xorl %ebp, %ebp /* Terminate the stack frame. */
> +
> + /* Align stack to 16 bytes per the i386 psABI. */
> + andl $-16, %esp
+
> + /* The PUSH below will decrement stack pointer by 4 bytes. */
> + subl $12, %esp
> +
> + /* Set up the argument for the function call. */
> + pushl %esi /* Argument. */
>
Can you pushl then align and drop the subl? Or does esp need to be aligned
before
the pushl?
> + cfi_adjust_cfa_offset (4)
> + call *%edx /* Call function. */
> +
> + /* Call exit with return value from function call. */
> + movl %eax, %ebx
> + movl $SYS_ify(exit), %eax
> + ENTER_KERNEL
> + cfi_endproc
> +
> + cfi_startproc
> +PSEUDO_END (__clone3)
> +
> +libc_hidden_def (__clone3)
> +weak_alias (__clone3, clone3)
> diff --git a/sysdeps/unix/sysv/linux/i386/sysdep.h
> b/sysdeps/unix/sysv/linux/i386/sysdep.h
> index 8680b49bf7..3927a1a6e0 100644
> --- a/sysdeps/unix/sysv/linux/i386/sysdep.h
> +++ b/sysdeps/unix/sysv/linux/i386/sysdep.h
> @@ -291,6 +291,8 @@ struct libc_do_syscall_args
> # define HAVE_TIME_VSYSCALL "__vdso_time"
> # define HAVE_CLOCK_GETRES_VSYSCALL "__vdso_clock_getres"
>
> +# define HAVE_CLONE3_WAPPER 1
> +
> # undef HAVE_INTERNAL_BRK_ADDR_SYMBOL
> # define HAVE_INTERNAL_BRK_ADDR_SYMBOL 1
>
> --
> 2.31.1
>
>
On Wed, Jul 14, 2021 at 11:19 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
>
>
> On Wed, Jul 14, 2021 at 9:33 AM H.J. Lu via Libc-alpha <libc-alpha@sourceware.org> wrote:
>>
>> extern int clone3 (struct clone_args *__cl_args, size_t __size,
>> int (*__func) (void *__arg), void *__arg);
>> ---
>> sysdeps/unix/sysv/linux/i386/clone3.S | 123 ++++++++++++++++++++++++++
>> sysdeps/unix/sysv/linux/i386/sysdep.h | 2 +
>> 2 files changed, 125 insertions(+)
>> create mode 100644 sysdeps/unix/sysv/linux/i386/clone3.S
>>
>> diff --git a/sysdeps/unix/sysv/linux/i386/clone3.S b/sysdeps/unix/sysv/linux/i386/clone3.S
>> new file mode 100644
>> index 0000000000..bef3ce0455
>> --- /dev/null
>> +++ b/sysdeps/unix/sysv/linux/i386/clone3.S
>> @@ -0,0 +1,123 @@
>> +/* The clone3 syscall wrapper. Linux/i386 version.
>> + Copyright (C) 2021 Free Software Foundation, Inc.
>> + This file is part of the GNU C Library.
>> +
>> + The GNU C Library is free software; you can redistribute it and/or
>> + modify it under the terms of the GNU Lesser General Public
>> + License as published by the Free Software Foundation; either
>> + version 2.1 of the License, or (at your option) any later version.
>> +
>> + The GNU C Library is distributed in the hope that it will be useful,
>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + Lesser General Public License for more details.
>> +
>> + You should have received a copy of the GNU Lesser General Public
>> + License along with the GNU C Library; if not, see
>> + <https://www.gnu.org/licenses/>. */
>> +
>> +/* clone3() is even more special than fork() as it mucks with stacks
>> + and invokes a function in the right context after its all over. */
>> +
>> +#include <sysdep.h>
>> +
>> +/* The userland implementation is:
>> + int clone3 (struct clone_args *cl_args, size_t size,
>> + int (*func)(void *arg), void *arg);
>> + the kernel entry is:
>> + int clone3 (struct clone_args *cl_args, size_t size);
>> +
>> + The parameters are passed on stack from userland:
>> + 16(%esp) arg
>> + 12(%esp) func
>> + 8(%esp) size
>> + 4(%esp) cl_args
>> + (%esp) Return address
>> +
>> + The kernel expects:
>> + eax: system call number
>> + ebx: cl_args
>> + ecx: size
>> + */
>> +
>> +#define CL_ARGS 4
>> +#define SIZE 8
>> +#define FUNC 12
>> +#define ARG 16
>> +
>> + .text
>> +ENTRY (__clone3)
>> + /* Sanity check arguments. */
>> + movl $-EINVAL, %eax
>> + movl CL_ARGS(%esp), %ecx /* No NULL cl_args pointer. */
>> + testl %ecx, %ecx
>> + jz SYSCALL_ERROR_LABEL
>> + /* Save the function pointer in EDX which is preserved by the
>> + system call. */
>> + movl FUNC(%esp), %edx /* No NULL function pointer. */
>> + testl %edx, %edx
>> + jz SYSCALL_ERROR_LABEL
>> +
>> + /* Save EBX and ESI. */
>> + pushl %ebx
>> + cfi_adjust_cfa_offset (4)
>> + pushl %esi
>> + cfi_adjust_cfa_offset (4)
>> +
>> + /* Save the function argument in ESI which is preserved by the
>> + system call. */
>> + movl (ARG + 8)(%esp), %esi
>> +
>> + /* Put cl_args in EBX. */
>> + movl %ecx, %ebx
>> +
>> + /* Put size in ECX. */
>> + movl (SIZE + 8)(%esp), %ecx
>> +
>> + /* Do the system call. */
>> + movl $SYS_ify(clone3), %eax
>> +
>> + /* End FDE now, because in the child the unwind info will be
>> + wrong. */
>> + cfi_endproc
>> +
>> + int $0x80
>> + test %eax, %eax
>> + /* No need to restore EBX and ESI in child. */
>
> does esp need to be adjusted?
We don't need to adjust ESP in parent and we will set up
the child stack a few lines below.
>> + jz L(thread_start)
>> +
>> + /* Restore EBX and ESI in parent. */
>> + pop %esi
>> + pop %ebx
>> + jl SYSCALL_ERROR_LABEL
>> +
>> + ret
>> +
>> +L(thread_start):
>> + cfi_startproc
>> + /* Clearing frame pointer is insufficient, use CFI. */
>> + cfi_undefined (eip)
>> + xorl %ebp, %ebp /* Terminate the stack frame. */
>> +
>> + /* Align stack to 16 bytes per the i386 psABI. */
>> + andl $-16, %esp
>>
>> +
>> + /* The PUSH below will decrement stack pointer by 4 bytes. */
>> + subl $12, %esp
>> +
>> + /* Set up the argument for the function call. */
>> + pushl %esi /* Argument. */
>
> Can you pushl then align and drop the subl? Or does esp need to be aligned before
> the pushl?
We need to align the child stack to 16 bytes first and then push the argument
onto stack for the child function.
>>
>> + cfi_adjust_cfa_offset (4)
>> + call *%edx /* Call function. */
>> +
>> + /* Call exit with return value from function call. */
>> + movl %eax, %ebx
>> + movl $SYS_ify(exit), %eax
>> + ENTER_KERNEL
>> + cfi_endproc
>> +
>> + cfi_startproc
>> +PSEUDO_END (__clone3)
>> +
>> +libc_hidden_def (__clone3)
>> +weak_alias (__clone3, clone3)
>> diff --git a/sysdeps/unix/sysv/linux/i386/sysdep.h b/sysdeps/unix/sysv/linux/i386/sysdep.h
>> index 8680b49bf7..3927a1a6e0 100644
>> --- a/sysdeps/unix/sysv/linux/i386/sysdep.h
>> +++ b/sysdeps/unix/sysv/linux/i386/sysdep.h
>> @@ -291,6 +291,8 @@ struct libc_do_syscall_args
>> # define HAVE_TIME_VSYSCALL "__vdso_time"
>> # define HAVE_CLOCK_GETRES_VSYSCALL "__vdso_clock_getres"
>>
>> +# define HAVE_CLONE3_WAPPER 1
>> +
>> # undef HAVE_INTERNAL_BRK_ADDR_SYMBOL
>> # define HAVE_INTERNAL_BRK_ADDR_SYMBOL 1
>>
>> --
>> 2.31.1
>>
On 7/14/21 9:33 AM, H.J. Lu via Libc-alpha wrote:
> extern int clone3 (struct clone_args *__cl_args, size_t __size,
> int (*__func) (void *__arg), void *__arg);
OK for glibc 2.34.
I think this is important for x86 overall. I'd like to see this
in the release.
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
> ---
> sysdeps/unix/sysv/linux/i386/clone3.S | 123 ++++++++++++++++++++++++++
> sysdeps/unix/sysv/linux/i386/sysdep.h | 2 +
> 2 files changed, 125 insertions(+)
> create mode 100644 sysdeps/unix/sysv/linux/i386/clone3.S
>
> diff --git a/sysdeps/unix/sysv/linux/i386/clone3.S b/sysdeps/unix/sysv/linux/i386/clone3.S
> new file mode 100644
> index 0000000000..bef3ce0455
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/i386/clone3.S
> @@ -0,0 +1,123 @@
> +/* The clone3 syscall wrapper. Linux/i386 version.
OK.
> + Copyright (C) 2021 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +/* clone3() is even more special than fork() as it mucks with stacks
> + and invokes a function in the right context after its all over. */
OK.
> +
> +#include <sysdep.h>
> +
> +/* The userland implementation is:
> + int clone3 (struct clone_args *cl_args, size_t size,
> + int (*func)(void *arg), void *arg);
> + the kernel entry is:
> + int clone3 (struct clone_args *cl_args, size_t size);
> +
> + The parameters are passed on stack from userland:
> + 16(%esp) arg
> + 12(%esp) func
> + 8(%esp) size
> + 4(%esp) cl_args
> + (%esp) Return address
> +
> + The kernel expects:
> + eax: system call number
> + ebx: cl_args
> + ecx: size
> + */
OK. Nice comment.
> +
> +#define CL_ARGS 4
> +#define SIZE 8
> +#define FUNC 12
> +#define ARG 16
> +
> + .text
> +ENTRY (__clone3)
> + /* Sanity check arguments. */
> + movl $-EINVAL, %eax
> + movl CL_ARGS(%esp), %ecx /* No NULL cl_args pointer. */
> + testl %ecx, %ecx
> + jz SYSCALL_ERROR_LABEL
> + /* Save the function pointer in EDX which is preserved by the
> + system call. */
> + movl FUNC(%esp), %edx /* No NULL function pointer. */
> + testl %edx, %edx
> + jz SYSCALL_ERROR_LABEL
> +
> + /* Save EBX and ESI. */
> + pushl %ebx
> + cfi_adjust_cfa_offset (4)
> + pushl %esi
> + cfi_adjust_cfa_offset (4)
> +
> + /* Save the function argument in ESI which is preserved by the
> + system call. */
> + movl (ARG + 8)(%esp), %esi
> +
> + /* Put cl_args in EBX. */
> + movl %ecx, %ebx
> +
> + /* Put size in ECX. */
> + movl (SIZE + 8)(%esp), %ecx
> +
> + /* Do the system call. */
> + movl $SYS_ify(clone3), %eax
OK.
> +
> + /* End FDE now, because in the child the unwind info will be
> + wrong. */
> + cfi_endproc
> +
> + int $0x80
> + test %eax, %eax
> + /* No need to restore EBX and ESI in child. */
> + jz L(thread_start)
> +
> + /* Restore EBX and ESI in parent. */
> + pop %esi
> + pop %ebx
> + jl SYSCALL_ERROR_LABEL
> +
> + ret
> +
> +L(thread_start):
> + cfi_startproc
> + /* Clearing frame pointer is insufficient, use CFI. */
> + cfi_undefined (eip)
> + xorl %ebp, %ebp /* Terminate the stack frame. */
> +
> + /* Align stack to 16 bytes per the i386 psABI. */
> + andl $-16, %esp
> +
> + /* The PUSH below will decrement stack pointer by 4 bytes. */
> + subl $12, %esp
> +
> + /* Set up the argument for the function call. */
> + pushl %esi /* Argument. */
> + cfi_adjust_cfa_offset (4)
> + call *%edx /* Call function. */
> +
> + /* Call exit with return value from function call. */
> + movl %eax, %ebx
> + movl $SYS_ify(exit), %eax
> + ENTER_KERNEL
> + cfi_endproc
> +
> + cfi_startproc
> +PSEUDO_END (__clone3)
> +
> +libc_hidden_def (__clone3)
> +weak_alias (__clone3, clone3)
> diff --git a/sysdeps/unix/sysv/linux/i386/sysdep.h b/sysdeps/unix/sysv/linux/i386/sysdep.h
> index 8680b49bf7..3927a1a6e0 100644
> --- a/sysdeps/unix/sysv/linux/i386/sysdep.h
> +++ b/sysdeps/unix/sysv/linux/i386/sysdep.h
> @@ -291,6 +291,8 @@ struct libc_do_syscall_args
> # define HAVE_TIME_VSYSCALL "__vdso_time"
> # define HAVE_CLOCK_GETRES_VSYSCALL "__vdso_clock_getres"
>
> +# define HAVE_CLONE3_WAPPER 1
OK.
> +
> # undef HAVE_INTERNAL_BRK_ADDR_SYMBOL
> # define HAVE_INTERNAL_BRK_ADDR_SYMBOL 1
>
>
new file mode 100644
@@ -0,0 +1,123 @@
+/* The clone3 syscall wrapper. Linux/i386 version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* clone3() is even more special than fork() as it mucks with stacks
+ and invokes a function in the right context after its all over. */
+
+#include <sysdep.h>
+
+/* The userland implementation is:
+ int clone3 (struct clone_args *cl_args, size_t size,
+ int (*func)(void *arg), void *arg);
+ the kernel entry is:
+ int clone3 (struct clone_args *cl_args, size_t size);
+
+ The parameters are passed on stack from userland:
+ 16(%esp) arg
+ 12(%esp) func
+ 8(%esp) size
+ 4(%esp) cl_args
+ (%esp) Return address
+
+ The kernel expects:
+ eax: system call number
+ ebx: cl_args
+ ecx: size
+ */
+
+#define CL_ARGS 4
+#define SIZE 8
+#define FUNC 12
+#define ARG 16
+
+ .text
+ENTRY (__clone3)
+ /* Sanity check arguments. */
+ movl $-EINVAL, %eax
+ movl CL_ARGS(%esp), %ecx /* No NULL cl_args pointer. */
+ testl %ecx, %ecx
+ jz SYSCALL_ERROR_LABEL
+ /* Save the function pointer in EDX which is preserved by the
+ system call. */
+ movl FUNC(%esp), %edx /* No NULL function pointer. */
+ testl %edx, %edx
+ jz SYSCALL_ERROR_LABEL
+
+ /* Save EBX and ESI. */
+ pushl %ebx
+ cfi_adjust_cfa_offset (4)
+ pushl %esi
+ cfi_adjust_cfa_offset (4)
+
+ /* Save the function argument in ESI which is preserved by the
+ system call. */
+ movl (ARG + 8)(%esp), %esi
+
+ /* Put cl_args in EBX. */
+ movl %ecx, %ebx
+
+ /* Put size in ECX. */
+ movl (SIZE + 8)(%esp), %ecx
+
+ /* Do the system call. */
+ movl $SYS_ify(clone3), %eax
+
+ /* End FDE now, because in the child the unwind info will be
+ wrong. */
+ cfi_endproc
+
+ int $0x80
+ test %eax, %eax
+ /* No need to restore EBX and ESI in child. */
+ jz L(thread_start)
+
+ /* Restore EBX and ESI in parent. */
+ pop %esi
+ pop %ebx
+ jl SYSCALL_ERROR_LABEL
+
+ ret
+
+L(thread_start):
+ cfi_startproc
+ /* Clearing frame pointer is insufficient, use CFI. */
+ cfi_undefined (eip)
+ xorl %ebp, %ebp /* Terminate the stack frame. */
+
+ /* Align stack to 16 bytes per the i386 psABI. */
+ andl $-16, %esp
+
+ /* The PUSH below will decrement stack pointer by 4 bytes. */
+ subl $12, %esp
+
+ /* Set up the argument for the function call. */
+ pushl %esi /* Argument. */
+ cfi_adjust_cfa_offset (4)
+ call *%edx /* Call function. */
+
+ /* Call exit with return value from function call. */
+ movl %eax, %ebx
+ movl $SYS_ify(exit), %eax
+ ENTER_KERNEL
+ cfi_endproc
+
+ cfi_startproc
+PSEUDO_END (__clone3)
+
+libc_hidden_def (__clone3)
+weak_alias (__clone3, clone3)
@@ -291,6 +291,8 @@ struct libc_do_syscall_args
# define HAVE_TIME_VSYSCALL "__vdso_time"
# define HAVE_CLOCK_GETRES_VSYSCALL "__vdso_clock_getres"
+# define HAVE_CLONE3_WAPPER 1
+
# undef HAVE_INTERNAL_BRK_ADDR_SYMBOL
# define HAVE_INTERNAL_BRK_ADDR_SYMBOL 1