[1/3] Linux: Add close_range
Commit Message
It was added on Linux 5.9 (278a5fbaed89). Although FreeBSD has added
the same syscall, this only adds the symbol on Linux ports. This
syscall is required to provided a fail-safe way to implement the
closefrom symbol (BZ #10353).
Similar to close, this is marked as a cancellation entrypoint.
The tst-close_range-consts.py requires a toolchain with an updated
kernel headers that provides the close_range.h UAPI kernel header.
Checked on x86_64-linux-gnu on kernel v5.9 and v5.4.
---
NEWS | 2 +
include/bits/unistd_ext.h | 6 +
sysdeps/unix/sysv/linux/Makefile | 13 +-
sysdeps/unix/sysv/linux/Versions | 3 +
sysdeps/unix/sysv/linux/aarch64/libc.abilist | 1 +
sysdeps/unix/sysv/linux/alpha/libc.abilist | 1 +
sysdeps/unix/sysv/linux/arc/libc.abilist | 1 +
sysdeps/unix/sysv/linux/arm/le/libc.abilist | 1 +
sysdeps/unix/sysv/linux/bits/unistd_ext.h | 11 ++
sysdeps/unix/sysv/linux/close_range.c | 28 +++
sysdeps/unix/sysv/linux/csky/libc.abilist | 1 +
sysdeps/unix/sysv/linux/hppa/libc.abilist | 1 +
sysdeps/unix/sysv/linux/i386/libc.abilist | 1 +
sysdeps/unix/sysv/linux/ia64/libc.abilist | 1 +
.../unix/sysv/linux/m68k/m680x0/libc.abilist | 1 +
.../sysv/linux/microblaze/be/libc.abilist | 1 +
.../sysv/linux/mips/mips32/fpu/libc.abilist | 1 +
.../sysv/linux/mips/mips64/n32/libc.abilist | 1 +
.../sysv/linux/mips/mips64/n64/libc.abilist | 1 +
sysdeps/unix/sysv/linux/nios2/libc.abilist | 1 +
.../linux/powerpc/powerpc32/fpu/libc.abilist | 1 +
.../linux/powerpc/powerpc64/be/libc.abilist | 1 +
.../linux/powerpc/powerpc64/le/libc.abilist | 1 +
.../unix/sysv/linux/riscv/rv32/libc.abilist | 1 +
.../unix/sysv/linux/riscv/rv64/libc.abilist | 1 +
.../unix/sysv/linux/s390/s390-32/libc.abilist | 1 +
.../unix/sysv/linux/s390/s390-64/libc.abilist | 1 +
sysdeps/unix/sysv/linux/sh/le/libc.abilist | 1 +
.../sysv/linux/sparc/sparc32/libc.abilist | 1 +
.../sysv/linux/sparc/sparc64/libc.abilist | 1 +
sysdeps/unix/sysv/linux/tst-clone.h | 48 +++++
.../unix/sysv/linux/tst-close_range-consts.py | 49 +++++
sysdeps/unix/sysv/linux/tst-close_range.c | 175 ++++++++++++++++++
.../unix/sysv/linux/x86_64/64/libc.abilist | 1 +
.../unix/sysv/linux/x86_64/x32/libc.abilist | 1 +
35 files changed, 359 insertions(+), 2 deletions(-)
create mode 100644 include/bits/unistd_ext.h
create mode 100644 sysdeps/unix/sysv/linux/close_range.c
create mode 100644 sysdeps/unix/sysv/linux/tst-clone.h
create mode 100644 sysdeps/unix/sysv/linux/tst-close_range-consts.py
create mode 100644 sysdeps/unix/sysv/linux/tst-close_range.c
Comments
* Adhemerval Zanella via Libc-alpha:
> diff --git a/include/bits/unistd_ext.h b/include/bits/unistd_ext.h
> new file mode 100644
> index 0000000000..277be05746
> --- /dev/null
> +++ b/include/bits/unistd_ext.h
> @@ -0,0 +1,6 @@
> +#include_next <bits/unistd_ext.h>
> +
> +#ifndef _ISOMAC
> +extern int __close_range (unsigned int lowfd, unsigned int highfd, int flags);
> +libc_hidden_proto (__close_range);
> +#endif
> diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
Missing __THROW? (But see below.)
> diff --git a/sysdeps/unix/sysv/linux/bits/unistd_ext.h b/sysdeps/unix/sysv/linux/bits/unistd_ext.h
> index c315cc5cb8..799c59512c 100644
> --- a/sysdeps/unix/sysv/linux/bits/unistd_ext.h
> +++ b/sysdeps/unix/sysv/linux/bits/unistd_ext.h
> @@ -33,4 +33,15 @@
> not detached and has not been joined. */
> extern __pid_t gettid (void) __THROW;
>
> +/* Unshare the file descriptor table before closing file descriptors. */
> +#define CLOSE_RANGE_UNSHARE (1U << 1)
> +
> +/* Close all file descriptors in the range FD up to MAX_FD. The flag FLAGS
> + are define by the CLOSE_RANGE prefix. This function behaves like close
> + on the range, but in a fail-safe where it will either fail and not close
> + any file descriptor or close all of them. Returns 0 on successor or -1
> + for failure (and sets errno accordingly). */
> +extern int close_range (unsigned int __fd, unsigned int __max_fd,
> + int __flags) __THROW;
> +
> #endif
I think we generally use int for file descriptors, following POSIX.
CLOSE_RANGE_CLOEXEC is coming (but it's not in the released 5.10). I
think it makes sense to adjust the comment to reflect that.
__THROW is incompatible with close_range as a cancellation point. I'm
not sure if it is useful for this function to be a cancellation point,
given that it's mostly used for cleanup. It's true that close can block
for an indefinite time for certain descriptors, but I don't know if
close_range inherits that behavior. It certainly does not apply to
CLOSE_RANGE_CLOEXEC.
> diff --git a/sysdeps/unix/sysv/linux/close_range.c b/sysdeps/unix/sysv/linux/close_range.c
> new file mode 100644
> index 0000000000..a664e158ee
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/close_range.c
> +int
> +__close_range (unsigned int lowfd, unsigned int highfd, int flags)
> +{
> + return SYSCALL_CANCEL (close_range, lowfd, highfd, flags);
> +}
> +libc_hidden_def (__close_range)
> +weak_alias (__close_range, close_range)
See above, this should probably be INLINE_SYSCALL_CALL. And you could
use the syscall lists file to define the system call either way.
> diff --git a/sysdeps/unix/sysv/linux/tst-clone.h b/sysdeps/unix/sysv/linux/tst-clone.h
> new file mode 100644
> index 0000000000..2344ac5101
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/tst-clone.h
> @@ -0,0 +1,48 @@
> +/* Auxiliary wrapper to issue the clone symbol.
It's not clear to me what this means.
> + Copyright (C) 2020 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#ifndef _TST_CLONE_H
> +#define _TST_CLONE_H
> +
> +#include <sched.h>
> +#include <stackinfo.h> /* For _STACK_GROWS_{UP,DOWN}. */
> +
> +#define DEFINE_STACK(name, size) \
> + char name[size] __attribute__ ((aligned))
> +
> +static inline pid_t
> +clone_call (int (*fn) (void *arg), void *arg, void *stack, size_t stack_size,
> + int flags)
> +{
> +#ifdef __ia64__
> + extern int __clone2 (int (*fn) (void *arg), void *stack, size_t stack_size,
> + int flags, void *arg, ...);
> + return __clone2 (f, stack, stack_size, flags, arg, /* ptid */ NULL,
> + /* tls */ NULL, /* ctid */ ctid);
> +#else
> +# if _STACK_GROWS_DOWN
> + return clone (fn, stack + stack_size, flags, arg, /* ptid */ NULL,
> + /* tls */ NULL, /* ctid */ NULL);
> +# elif _STACK_GROWS_UP
> + return clone (fn, stack, flags, arg, /* ptid */ NULL, /* tls */ NULL,
> + &ctid);
> +# endif
> +#endif
> +}
> +
> +#endif
Should this go into support/?
> diff --git a/sysdeps/unix/sysv/linux/tst-close_range.c b/sysdeps/unix/sysv/linux/tst-close_range.c
> new file mode 100644
> index 0000000000..a685e7d359
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/tst-close_range.c
> +static unsigned int
> +first_invalid_flag (void)
> +{
> + static const unsigned int flags[] = {
> + CLOSE_RANGE_UNSHARE,
> + };
> +
> + unsigned int r = 1;
> + for (int i = 0; i < array_length (flags); i++)
> + if ((1U << r) & flags[i])
> + r++;
> + return 1U << r;
> +}
> +
> +enum
> +{
> + maximum_fd = 100,
> + half_fd = maximum_fd / 2,
> + gap_1 = maximum_fd - 8,
> + gap_0 = (maximum_fd * 3) / 4
> +};
> +
> +static void
> +close_range_test_common (int *fds, unsigned int flags)
> +{
> + /* Basic test for invalid flags, bail out if unsupported. */
> + TEST_COMPARE (close_range (fds[0], fds[half_fd], first_invalid_flag()), -1);
This test is not future-proof at all. I don't think it's valuable.
> + if (errno == ENOSYS)
> + FAIL_UNSUPPORTED ("close_range not supported");
> + TEST_COMPARE (errno, EINVAL);
> +
> + /* Close half of the descriptors and check result. */
> + TEST_COMPARE (close_range (fds[0], fds[half_fd], flags), 0);
> + for (int i = 0; i <= half_fd; i++)
> + {
> + TEST_COMPARE (fcntl (fds[i], F_GETFL), -1);
> + TEST_COMPARE (errno, EBADF);
> + }
I have some difficulty figuring out whether you expect the descriptors
to be in a continuous range or not.
I think it would make sense to create one specific layout of file
descriptors and test with that.
> +static int
> +close_range_unshare_test_fn (void *arg)
> +{
> + close_range_test_common (arg, CLOSE_RANGE_UNSHARE);
> + exit (EXIT_SUCCESS);
> +}
> +
> +/* Check if a close_range with CLOSE_RANGE_UNSHARE issued from a subprocess
> + created with CLONE_FILES does not close the parent file descriptor list. */
> +static void
> +close_range_unshare_test (void)
> +{
I think there could also be a test without CLOSE_RANGE_UNSHARE that
verifies that the subprocess operators on the shared file descriptor
table.
Thanks,
Florian
On 22/12/2020 06:05, Florian Weimer wrote:
> * Adhemerval Zanella via Libc-alpha:
>
>> diff --git a/include/bits/unistd_ext.h b/include/bits/unistd_ext.h
>> new file mode 100644
>> index 0000000000..277be05746
>> --- /dev/null
>> +++ b/include/bits/unistd_ext.h
>> @@ -0,0 +1,6 @@
>> +#include_next <bits/unistd_ext.h>
>> +
>> +#ifndef _ISOMAC
>> +extern int __close_range (unsigned int lowfd, unsigned int highfd, int flags);
>> +libc_hidden_proto (__close_range);
>> +#endif
>> diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
>
> Missing __THROW? (But see below.)
>
>> diff --git a/sysdeps/unix/sysv/linux/bits/unistd_ext.h b/sysdeps/unix/sysv/linux/bits/unistd_ext.h
>> index c315cc5cb8..799c59512c 100644
>> --- a/sysdeps/unix/sysv/linux/bits/unistd_ext.h
>> +++ b/sysdeps/unix/sysv/linux/bits/unistd_ext.h
>> @@ -33,4 +33,15 @@
>> not detached and has not been joined. */
>> extern __pid_t gettid (void) __THROW;
>>
>> +/* Unshare the file descriptor table before closing file descriptors. */
>> +#define CLOSE_RANGE_UNSHARE (1U << 1)
>> +
>> +/* Close all file descriptors in the range FD up to MAX_FD. The flag FLAGS
>> + are define by the CLOSE_RANGE prefix. This function behaves like close
>> + on the range, but in a fail-safe where it will either fail and not close
>> + any file descriptor or close all of them. Returns 0 on successor or -1
>> + for failure (and sets errno accordingly). */
>> +extern int close_range (unsigned int __fd, unsigned int __max_fd,
>> + int __flags) __THROW;
>> +
>> #endif
>
> I think we generally use int for file descriptors, following POSIX.
The Linux interface uses unsigned integer, meaning that negative values
won't really trigger an invalid usage (kernel will return EINVAL if
second argument is larger than first one though).
I would prefer for syscall wrapper to be close as possible of the kernel
interface, otherwise it would require to add additional semantic to
handle potential pitfall cases.
On this case, if we go for 'int' as argument should we return EBADF
for invalid handles?
>
> CLOSE_RANGE_CLOEXEC is coming (but it's not in the released 5.10). I
> think it makes sense to adjust the comment to reflect that.
I have a patch to add CLOSE_RANGE_CLOEXEC as soon it is on a released
kernel, it includes changes in the enum and some tests.
>
> __THROW is incompatible with close_range as a cancellation point. I'm
> not sure if it is useful for this function to be a cancellation point,
> given that it's mostly used for cleanup. It's true that close can block
> for an indefinite time for certain descriptors, but I don't know if
> close_range inherits that behavior. It certainly does not apply to
> CLOSE_RANGE_CLOEXEC.
I wasn't sure about making a cancellation entrypoint, so I followed
current close semantic. The cancellation entrypoing would act also
in an early mode, meaning that asynchronous cancellation is enabled
and thread signaled the cancellation will act prior the syscall
execution.
In the other hand, reading the close_range code it does seen it
returns with side-effects due signal interrupt. So maybe not making
a cancellation entrypoint is indeed a better option.
The FreeBSD implementation does not seem to make close_range a
cancellation entrypoint.
>
>> diff --git a/sysdeps/unix/sysv/linux/close_range.c b/sysdeps/unix/sysv/linux/close_range.c
>> new file mode 100644
>> index 0000000000..a664e158ee
>> --- /dev/null
>> +++ b/sysdeps/unix/sysv/linux/close_range.c
>
>> +int
>> +__close_range (unsigned int lowfd, unsigned int highfd, int flags)
>> +{
>> + return SYSCALL_CANCEL (close_range, lowfd, highfd, flags);
>> +}
>> +libc_hidden_def (__close_range)
>> +weak_alias (__close_range, close_range)
>
> See above, this should probably be INLINE_SYSCALL_CALL. And you could
> use the syscall lists file to define the system call either way.
>
>> diff --git a/sysdeps/unix/sysv/linux/tst-clone.h b/sysdeps/unix/sysv/linux/tst-clone.h
>> new file mode 100644
>> index 0000000000..2344ac5101
>> --- /dev/null
>> +++ b/sysdeps/unix/sysv/linux/tst-clone.h
>> @@ -0,0 +1,48 @@
>> +/* Auxiliary wrapper to issue the clone symbol.
>
> It's not clear to me what this means.
Maybe:
Auxiliary functions to issue the clone syscall. ?
>
>> + Copyright (C) 2020 Free Software Foundation, Inc.
>> + This file is part of the GNU C Library.
>> +
>> + The GNU C Library is free software; you can redistribute it and/or
>> + modify it under the terms of the GNU Lesser General Public
>> + License as published by the Free Software Foundation; either
>> + version 2.1 of the License, or (at your option) any later version.
>> +
>> + The GNU C Library is distributed in the hope that it will be useful,
>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + Lesser General Public License for more details.
>> +
>> + You should have received a copy of the GNU Lesser General Public
>> + License along with the GNU C Library; if not, see
>> + <https://www.gnu.org/licenses/>. */
>> +
>> +#ifndef _TST_CLONE_H
>> +#define _TST_CLONE_H
>> +
>> +#include <sched.h>
>> +#include <stackinfo.h> /* For _STACK_GROWS_{UP,DOWN}. */
>> +
>> +#define DEFINE_STACK(name, size) \
>> + char name[size] __attribute__ ((aligned))
>> +
>> +static inline pid_t
>> +clone_call (int (*fn) (void *arg), void *arg, void *stack, size_t stack_size,
>> + int flags)
>> +{
>> +#ifdef __ia64__
>> + extern int __clone2 (int (*fn) (void *arg), void *stack, size_t stack_size,
>> + int flags, void *arg, ...);
>> + return __clone2 (f, stack, stack_size, flags, arg, /* ptid */ NULL,
>> + /* tls */ NULL, /* ctid */ ctid);
>> +#else
>> +# if _STACK_GROWS_DOWN
>> + return clone (fn, stack + stack_size, flags, arg, /* ptid */ NULL,
>> + /* tls */ NULL, /* ctid */ NULL);
>> +# elif _STACK_GROWS_UP
>> + return clone (fn, stack, flags, arg, /* ptid */ NULL, /* tls */ NULL,
>> + &ctid);
>> +# endif
>> +#endif
>> +}
>> +
>> +#endif
>
> Should this go into support/?
Ok, although it would be a Linux only interface.
>
>> diff --git a/sysdeps/unix/sysv/linux/tst-close_range.c b/sysdeps/unix/sysv/linux/tst-close_range.c
>> new file mode 100644
>> index 0000000000..a685e7d359
>> --- /dev/null
>> +++ b/sysdeps/unix/sysv/linux/tst-close_range.c
>
>> +static unsigned int
>> +first_invalid_flag (void)
>> +{
>> + static const unsigned int flags[] = {
>> + CLOSE_RANGE_UNSHARE,
>> + };
>> +
>> + unsigned int r = 1;
>> + for (int i = 0; i < array_length (flags); i++)
>> + if ((1U << r) & flags[i])
>> + r++;
>> + return 1U << r;
>> +}
>> +
>> +enum
>> +{
>> + maximum_fd = 100,
>> + half_fd = maximum_fd / 2,
>> + gap_1 = maximum_fd - 8,
>> + gap_0 = (maximum_fd * 3) / 4
>> +};
>> +
>> +static void
>> +close_range_test_common (int *fds, unsigned int flags)
>> +{
>> + /* Basic test for invalid flags, bail out if unsupported. */
>> + TEST_COMPARE (close_range (fds[0], fds[half_fd], first_invalid_flag()), -1);
>
> This test is not future-proof at all. I don't think it's valuable.
But the idea is exactly to raise a warning if the underlying kernel
supports more flags than the one support by glibc.
>
>> + if (errno == ENOSYS)
>> + FAIL_UNSUPPORTED ("close_range not supported");
>> + TEST_COMPARE (errno, EINVAL);
>> +
>> + /* Close half of the descriptors and check result. */
>> + TEST_COMPARE (close_range (fds[0], fds[half_fd], flags), 0);
>> + for (int i = 0; i <= half_fd; i++)
>> + {
>> + TEST_COMPARE (fcntl (fds[i], F_GETFL), -1);
>> + TEST_COMPARE (errno, EBADF);
>> + }
>
> I have some difficulty figuring out whether you expect the descriptors
> to be in a continuous range or not.
>
> I think it would make sense to create one specific layout of file
> descriptors and test with that.
I think it is a fair assumption that descriptors are a continuous range.
I modeled this tests using the kernel one as base, which make the same
assumptions [1]
[1] tools/testing/selftests/core/close_range_test.c
>
>> +static int
>> +close_range_unshare_test_fn (void *arg)
>> +{
>> + close_range_test_common (arg, CLOSE_RANGE_UNSHARE);
>> + exit (EXIT_SUCCESS);
>> +}
>> +
>> +/* Check if a close_range with CLOSE_RANGE_UNSHARE issued from a subprocess
>> + created with CLONE_FILES does not close the parent file descriptor list. */
>> +static void
>> +close_range_unshare_test (void)
>> +{
>
> I think there could also be a test without CLOSE_RANGE_UNSHARE that
> verifies that the subprocess operators on the shared file descriptor
> table.
Do you mean issue a ? I may add it, although it would tests
more CLONE_FILES support than close_range.
* Adhemerval Zanella:
>> I think we generally use int for file descriptors, following POSIX.
>
> The Linux interface uses unsigned integer, meaning that negative values
> won't really trigger an invalid usage (kernel will return EINVAL if
> second argument is larger than first one though).
>
> I would prefer for syscall wrapper to be close as possible of the kernel
> interface, otherwise it would require to add additional semantic to
> handle potential pitfall cases.
>
> On this case, if we go for 'int' as argument should we return EBADF
> for invalid handles?
Hmm. I think unsigned int is needed for ~0U to work, which is what you
used in the tests. If that's what applications use today when issuing
the syscall directly, I think we need to stick to unsigned int.
>> __THROW is incompatible with close_range as a cancellation point. I'm
>> not sure if it is useful for this function to be a cancellation point,
>> given that it's mostly used for cleanup. It's true that close can block
>> for an indefinite time for certain descriptors, but I don't know if
>> close_range inherits that behavior. It certainly does not apply to
>> CLOSE_RANGE_CLOEXEC.
>
> I wasn't sure about making a cancellation entrypoint, so I followed
> current close semantic. The cancellation entrypoing would act also
> in an early mode, meaning that asynchronous cancellation is enabled
> and thread signaled the cancellation will act prior the syscall
> execution.
>
> In the other hand, reading the close_range code it does seen it
> returns with side-effects due signal interrupt. So maybe not making
> a cancellation entrypoint is indeed a better option.
>
> The FreeBSD implementation does not seem to make close_range a
> cancellation entrypoint.
Yes, let's do not act upon cancellation.
>>> diff --git a/sysdeps/unix/sysv/linux/tst-clone.h b/sysdeps/unix/sysv/linux/tst-clone.h
>>> new file mode 100644
>>> index 0000000000..2344ac5101
>>> --- /dev/null
>>> +++ b/sysdeps/unix/sysv/linux/tst-clone.h
>>> @@ -0,0 +1,48 @@
>>> +/* Auxiliary wrapper to issue the clone symbol.
>>
>> It's not clear to me what this means.
>
> Maybe:
>
> Auxiliary functions to issue the clone syscall. ?
That's better, thanks.
>>> diff --git a/sysdeps/unix/sysv/linux/tst-close_range.c b/sysdeps/unix/sysv/linux/tst-close_range.c
>>> new file mode 100644
>>> index 0000000000..a685e7d359
>>> --- /dev/null
>>> +++ b/sysdeps/unix/sysv/linux/tst-close_range.c
>>
>>> +static unsigned int
>>> +first_invalid_flag (void)
>>> +{
>>> + static const unsigned int flags[] = {
>>> + CLOSE_RANGE_UNSHARE,
>>> + };
>>> +
>>> + unsigned int r = 1;
>>> + for (int i = 0; i < array_length (flags); i++)
>>> + if ((1U << r) & flags[i])
>>> + r++;
>>> + return 1U << r;
>>> +}
>>> +
>>> +enum
>>> +{
>>> + maximum_fd = 100,
>>> + half_fd = maximum_fd / 2,
>>> + gap_1 = maximum_fd - 8,
>>> + gap_0 = (maximum_fd * 3) / 4
>>> +};
>>> +
>>> +static void
>>> +close_range_test_common (int *fds, unsigned int flags)
>>> +{
>>> + /* Basic test for invalid flags, bail out if unsupported. */
>>> + TEST_COMPARE (close_range (fds[0], fds[half_fd], first_invalid_flag()), -1);
>>
>> This test is not future-proof at all. I don't think it's valuable.
>
> But the idea is exactly to raise a warning if the underlying kernel
> supports more flags than the one support by glibc.
But it causes a test failure, not a warning. I don't think it's valid
to poke for unknown system call flags like this, so even a proper
warning would be hard.
>>> + if (errno == ENOSYS)
>>> + FAIL_UNSUPPORTED ("close_range not supported");
>>> + TEST_COMPARE (errno, EINVAL);
>>> +
>>> + /* Close half of the descriptors and check result. */
>>> + TEST_COMPARE (close_range (fds[0], fds[half_fd], flags), 0);
>>> + for (int i = 0; i <= half_fd; i++)
>>> + {
>>> + TEST_COMPARE (fcntl (fds[i], F_GETFL), -1);
>>> + TEST_COMPARE (errno, EBADF);
>>> + }
>>
>> I have some difficulty figuring out whether you expect the descriptors
>> to be in a continuous range or not.
>>
>> I think it would make sense to create one specific layout of file
>> descriptors and test with that.
>
> I think it is a fair assumption that descriptors are a continuous range.
> I modeled this tests using the kernel one as base, which make the same
> assumptions [1]
>
> [1] tools/testing/selftests/core/close_range_test.c
The you don't need the fds array. It's what makes this confusing.
>>> +static int
>>> +close_range_unshare_test_fn (void *arg)
>>> +{
>>> + close_range_test_common (arg, CLOSE_RANGE_UNSHARE);
>>> + exit (EXIT_SUCCESS);
>>> +}
>>> +
>>> +/* Check if a close_range with CLOSE_RANGE_UNSHARE issued from a subprocess
>>> + created with CLONE_FILES does not close the parent file descriptor list. */
>>> +static void
>>> +close_range_unshare_test (void)
>>> +{
>>
>> I think there could also be a test without CLOSE_RANGE_UNSHARE that
>> verifies that the subprocess operators on the shared file descriptor
>> table.
>
> Do you mean issue a ? I may add it, although it would tests
> more CLONE_FILES support than close_range.
Sorry? I meant a test that shows that flags == 0 and flags ==
CLOSE_RANGE_UNSHARE behave differently.
Thanks,
Florian
On 22/12/2020 08:41, Florian Weimer wrote:
> * Adhemerval Zanella:
>
>>> I think we generally use int for file descriptors, following POSIX.
>>
>> The Linux interface uses unsigned integer, meaning that negative values
>> won't really trigger an invalid usage (kernel will return EINVAL if
>> second argument is larger than first one though).
>>
>> I would prefer for syscall wrapper to be close as possible of the kernel
>> interface, otherwise it would require to add additional semantic to
>> handle potential pitfall cases.
>>
>> On this case, if we go for 'int' as argument should we return EBADF
>> for invalid handles?
>
> Hmm. I think unsigned int is needed for ~0U to work, which is what you
> used in the tests. If that's what applications use today when issuing
> the syscall directly, I think we need to stick to unsigned int.
Ack.
>
>>> __THROW is incompatible with close_range as a cancellation point. I'm
>>> not sure if it is useful for this function to be a cancellation point,
>>> given that it's mostly used for cleanup. It's true that close can block
>>> for an indefinite time for certain descriptors, but I don't know if
>>> close_range inherits that behavior. It certainly does not apply to
>>> CLOSE_RANGE_CLOEXEC.
>>
>> I wasn't sure about making a cancellation entrypoint, so I followed
>> current close semantic. The cancellation entrypoing would act also
>> in an early mode, meaning that asynchronous cancellation is enabled
>> and thread signaled the cancellation will act prior the syscall
>> execution.
>>
>> In the other hand, reading the close_range code it does seen it
>> returns with side-effects due signal interrupt. So maybe not making
>> a cancellation entrypoint is indeed a better option.
>>
>> The FreeBSD implementation does not seem to make close_range a
>> cancellation entrypoint.
>
> Yes, let's do not act upon cancellation.
Ack.
>
>>>> diff --git a/sysdeps/unix/sysv/linux/tst-clone.h b/sysdeps/unix/sysv/linux/tst-clone.h
>>>> new file mode 100644
>>>> index 0000000000..2344ac5101
>>>> --- /dev/null
>>>> +++ b/sysdeps/unix/sysv/linux/tst-clone.h
>>>> @@ -0,0 +1,48 @@
>>>> +/* Auxiliary wrapper to issue the clone symbol.
>>>
>>> It's not clear to me what this means.
>>
>> Maybe:
>>
>> Auxiliary functions to issue the clone syscall. ?
>
> That's better, thanks.
Ack.
>
>>>> diff --git a/sysdeps/unix/sysv/linux/tst-close_range.c b/sysdeps/unix/sysv/linux/tst-close_range.c
>>>> new file mode 100644
>>>> index 0000000000..a685e7d359
>>>> --- /dev/null
>>>> +++ b/sysdeps/unix/sysv/linux/tst-close_range.c
>>>
>>>> +static unsigned int
>>>> +first_invalid_flag (void)
>>>> +{
>>>> + static const unsigned int flags[] = {
>>>> + CLOSE_RANGE_UNSHARE,
>>>> + };
>>>> +
>>>> + unsigned int r = 1;
>>>> + for (int i = 0; i < array_length (flags); i++)
>>>> + if ((1U << r) & flags[i])
>>>> + r++;
>>>> + return 1U << r;
>>>> +}
>>>> +
>>>> +enum
>>>> +{
>>>> + maximum_fd = 100,
>>>> + half_fd = maximum_fd / 2,
>>>> + gap_1 = maximum_fd - 8,
>>>> + gap_0 = (maximum_fd * 3) / 4
>>>> +};
>>>> +
>>>> +static void
>>>> +close_range_test_common (int *fds, unsigned int flags)
>>>> +{
>>>> + /* Basic test for invalid flags, bail out if unsupported. */
>>>> + TEST_COMPARE (close_range (fds[0], fds[half_fd], first_invalid_flag()), -1);
>>>
>>> This test is not future-proof at all. I don't think it's valuable.
>>
>> But the idea is exactly to raise a warning if the underlying kernel
>> supports more flags than the one support by glibc.
>
> But it causes a test failure, not a warning. I don't think it's valid
> to poke for unknown system call flags like this, so even a proper
> warning would be hard.
Alright, I will remove the invalid flag warning.
>
>>>> + if (errno == ENOSYS)
>>>> + FAIL_UNSUPPORTED ("close_range not supported");
>>>> + TEST_COMPARE (errno, EINVAL);
>>>> +
>>>> + /* Close half of the descriptors and check result. */
>>>> + TEST_COMPARE (close_range (fds[0], fds[half_fd], flags), 0);
>>>> + for (int i = 0; i <= half_fd; i++)
>>>> + {
>>>> + TEST_COMPARE (fcntl (fds[i], F_GETFL), -1);
>>>> + TEST_COMPARE (errno, EBADF);
>>>> + }
>>>
>>> I have some difficulty figuring out whether you expect the descriptors
>>> to be in a continuous range or not.
>>>
>>> I think it would make sense to create one specific layout of file
>>> descriptors and test with that.
>>
>> I think it is a fair assumption that descriptors are a continuous range.
>> I modeled this tests using the kernel one as base, which make the same
>> assumptions [1]
>>
>> [1] tools/testing/selftests/core/close_range_test.c
>
> The you don't need the fds array. It's what makes this confusing.
Ack, I will remove it.
>
>>>> +static int
>>>> +close_range_unshare_test_fn (void *arg)
>>>> +{
>>>> + close_range_test_common (arg, CLOSE_RANGE_UNSHARE);
>>>> + exit (EXIT_SUCCESS);
>>>> +}
>>>> +
>>>> +/* Check if a close_range with CLOSE_RANGE_UNSHARE issued from a subprocess
>>>> + created with CLONE_FILES does not close the parent file descriptor list. */
>>>> +static void
>>>> +close_range_unshare_test (void)
>>>> +{
>>>
>>> I think there could also be a test without CLOSE_RANGE_UNSHARE that
>>> verifies that the subprocess operators on the shared file descriptor
>>> table.
>>
>> Do you mean issue a ? I may add it, although it would tests
>> more CLONE_FILES support than close_range.
>
> Sorry? I meant a test that shows that flags == 0 and flags ==
> CLOSE_RANGE_UNSHARE behave differently.
My questioning was mangled (the '?' should have been CLOSE_RANGE_UNSHARE).
Alright I will add a clone tests with both flags.
On Tue, Dec 22, 2020 at 12:41:50PM +0100, Florian Weimer via Libc-alpha wrote:
> * Adhemerval Zanella:
>
> >> I think we generally use int for file descriptors, following POSIX.
> >
> > The Linux interface uses unsigned integer, meaning that negative values
> > won't really trigger an invalid usage (kernel will return EINVAL if
> > second argument is larger than first one though).
> >
> > I would prefer for syscall wrapper to be close as possible of the kernel
> > interface, otherwise it would require to add additional semantic to
> > handle potential pitfall cases.
> >
> > On this case, if we go for 'int' as argument should we return EBADF
> > for invalid handles?
>
> Hmm. I think unsigned int is needed for ~0U to work, which is what you
> used in the tests. If that's what applications use today when issuing
> the syscall directly, I think we need to stick to unsigned int.
For the record, the initial reason we chose unsigned int for
close_range() was that the close() syscall uses unsigned int too:
SYSCALL_DEFINE1(close, unsigned int, fd)
SYSCALL_DEFINE3(close_range, unsigned int, fd, unsigned int, max_fd,
unsigned int, flags)
Furthermore, the kernel doesn't care whether the fd is negative. It
simply checks whether the passed in fd number falls within the possible
range of the fdtable, i.e. fd < fdt->max_fds. If the passed-in fd falls
within the possible range that the fdtable can handle it will check
whether there's a file open at that position in the fdtable, i.e. from
the kernel's perspective "negative" fd values aren't special in any way.
But from what I can tell a lot of (non-libc) userspace isn't aware of
the fact that the kernel uses unsigned int which can lead to some
confusion. A little while ago I had to talk to Lennart about this when
they added support for close_range() which they had been waiting for.
Their syscall wrapper is now documented with:
/* Kernel-side the syscall expects fds as unsigned integers (just like close() actually), while
* userspace exclusively uses signed integers for fds. We don't know just yet how glibc is going to
* wrap this syscall, but let's assume it's going to be similar to what they do for close(),
* i.e. make the same unsigned → signed type change from the raw kernel syscall compared to the
* userspace wrapper. There's only one caveat for this: unlike for close() there's the special
* UINT_MAX fd value for the 'end_fd' argument. Let's safely map that to -1 here. And let's refuse
* any other negative values. */
https://github.com/systemd/systemd/commit/441e0fdb900b49888fb6d7901a2b5aa92c0a2017
Christian
@@ -28,6 +28,8 @@ Major new features:
The 32-bit RISC-V port requires at least Linux 5.4, GCC 7.1 and binutils
2.28.
+* On Linux, the close_range function has been added.
+
Deprecated and removed features, and other changes affecting compatibility:
* The mallinfo function is marked deprecated. Callers should call
new file mode 100644
@@ -0,0 +1,6 @@
+#include_next <bits/unistd_ext.h>
+
+#ifndef _ISOMAC
+extern int __close_range (unsigned int lowfd, unsigned int highfd, int flags);
+libc_hidden_proto (__close_range);
+#endif
@@ -61,7 +61,7 @@ sysdep_routines += adjtimex clone umount umount2 readahead sysctl \
open_by_handle_at mlock2 pkey_mprotect pkey_set pkey_get \
timerfd_gettime timerfd_settime prctl \
process_vm_readv process_vm_writev clock_adjtime \
- time64-support pselect32
+ time64-support pselect32 close_range
CFLAGS-gethostid.c = -fexceptions
CFLAGS-tee.c = -fexceptions -fasynchronous-unwind-tables
@@ -103,7 +103,8 @@ tests += tst-clone tst-clone2 tst-clone3 tst-fanotify tst-personality \
tst-quota tst-sync_file_range tst-sysconf-iov_max tst-ttyname \
test-errno-linux tst-memfd_create tst-mlock2 tst-pkey \
tst-rlimit-infinity tst-ofdlocks tst-gettid tst-gettid-kill \
- tst-tgkill tst-sysvsem-linux tst-sysvmsg-linux tst-sysvshm-linux
+ tst-tgkill tst-sysvsem-linux tst-sysvmsg-linux tst-sysvshm-linux \
+ tst-close_range
tests-internal += tst-ofdlocks-compat tst-sigcontext-get_pc
CFLAGS-tst-sigcontext-get_pc.c = -fasynchronous-unwind-tables
@@ -177,6 +178,14 @@ $(objpfx)tst-mman-consts.out: ../sysdeps/unix/sysv/linux/tst-mman-consts.py
< /dev/null > $@ 2>&1; $(evaluate-test)
$(objpfx)tst-mman-consts.out: $(sysdeps-linux-python-deps)
+tests-special += $(objpfx)tst-close_range-consts.out
+$(objpfx)tst-close_range-consts.out: ../sysdeps/unix/sysv/linux/tst-close_range-consts.py
+ $(sysdeps-linux-python) \
+ ../sysdeps/unix/sysv/linux/tst-close_range-consts.py \
+ $(sysdeps-linux-python-cc) \
+ < /dev/null > $@ 2>&1; $(evaluate-test)
+$(objpfx)tst-close_range-consts.out: $(sysdeps-linux-python-deps)
+
$(objpfx)tst-gettid: $(shared-thread-library)
$(objpfx)tst-gettid-kill: $(shared-thread-library)
$(objpfx)tst-tgkill: $(shared-thread-library)
@@ -169,6 +169,9 @@ libc {
}
GLIBC_2.32 {
}
+ GLIBC_2.33 {
+ close_range;
+ }
GLIBC_PRIVATE {
# functions used in other libraries
__syscall_rt_sigqueueinfo;
@@ -2160,6 +2160,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2242,6 +2242,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -1920,6 +1920,7 @@ GLIBC_2.32 wprintf F
GLIBC_2.32 write F
GLIBC_2.32 writev F
GLIBC_2.32 wscanf F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -141,6 +141,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -33,4 +33,15 @@
not detached and has not been joined. */
extern __pid_t gettid (void) __THROW;
+/* Unshare the file descriptor table before closing file descriptors. */
+#define CLOSE_RANGE_UNSHARE (1U << 1)
+
+/* Close all file descriptors in the range FD up to MAX_FD. The flag FLAGS
+ are define by the CLOSE_RANGE prefix. This function behaves like close
+ on the range, but in a fail-safe where it will either fail and not close
+ any file descriptor or close all of them. Returns 0 on successor or -1
+ for failure (and sets errno accordingly). */
+extern int close_range (unsigned int __fd, unsigned int __max_fd,
+ int __flags) __THROW;
+
#endif
new file mode 100644
@@ -0,0 +1,28 @@
+/* Close a range of file descriptors.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <unistd.h>
+#include <sysdep.h>
+
+int
+__close_range (unsigned int lowfd, unsigned int highfd, int flags)
+{
+ return SYSCALL_CANCEL (close_range, lowfd, highfd, flags);
+}
+libc_hidden_def (__close_range)
+weak_alias (__close_range, close_range)
@@ -2104,6 +2104,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2063,6 +2063,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2229,6 +2229,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2095,6 +2095,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2175,6 +2175,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2155,6 +2155,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2146,6 +2146,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2152,6 +2152,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2146,6 +2146,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2193,6 +2193,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2202,6 +2202,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2065,6 +2065,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2355,6 +2355,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -595,6 +595,7 @@ GLIBC_2.33 clock_nanosleep F
GLIBC_2.33 clock_settime F
GLIBC_2.33 clone F
GLIBC_2.33 close F
+GLIBC_2.33 close_range F
GLIBC_2.33 closedir F
GLIBC_2.33 closelog F
GLIBC_2.33 confstr F
@@ -2122,6 +2122,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2200,6 +2200,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2101,6 +2101,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2067,6 +2067,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2191,6 +2191,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2118,6 +2118,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
new file mode 100644
@@ -0,0 +1,48 @@
+/* Auxiliary wrapper to issue the clone symbol.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _TST_CLONE_H
+#define _TST_CLONE_H
+
+#include <sched.h>
+#include <stackinfo.h> /* For _STACK_GROWS_{UP,DOWN}. */
+
+#define DEFINE_STACK(name, size) \
+ char name[size] __attribute__ ((aligned))
+
+static inline pid_t
+clone_call (int (*fn) (void *arg), void *arg, void *stack, size_t stack_size,
+ int flags)
+{
+#ifdef __ia64__
+ extern int __clone2 (int (*fn) (void *arg), void *stack, size_t stack_size,
+ int flags, void *arg, ...);
+ return __clone2 (f, stack, stack_size, flags, arg, /* ptid */ NULL,
+ /* tls */ NULL, /* ctid */ ctid);
+#else
+# if _STACK_GROWS_DOWN
+ return clone (fn, stack + stack_size, flags, arg, /* ptid */ NULL,
+ /* tls */ NULL, /* ctid */ NULL);
+# elif _STACK_GROWS_UP
+ return clone (fn, stack, flags, arg, /* ptid */ NULL, /* tls */ NULL,
+ &ctid);
+# endif
+#endif
+}
+
+#endif
new file mode 100644
@@ -0,0 +1,49 @@
+#!/usr/bin/python3
+# Test that glibc's unistd_ext.h constants match the kernel's.
+# Copyright (C) 2020 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+import argparse
+import sys
+
+import glibcextract
+import glibcsyscalls
+
+
+def main():
+ """The main entry point."""
+ parser = argparse.ArgumentParser(
+ description="Test that glibc's unistd_ext.h constants match "
+ "the kernel's.")
+ parser.add_argument('--cc', metavar='CC',
+ help='C compiler (including options) to use')
+ args = parser.parse_args()
+ linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc)
+ linux_version_glibc = (5, 9)
+ sys.exit(glibcextract.compare_macro_consts(
+ '#define _GNU_SOURCE 1\n'
+ '#include <unistd.h>\n',
+ '#define _GNU_SOURCE 1\n'
+ '#include <linux/close_range.h>\n',
+ args.cc,
+ 'CLOSE_RANGE_.*',
+ None,
+ linux_version_glibc > linux_version_headers,
+ linux_version_headers > linux_version_glibc))
+
+if __name__ == '__main__':
+ main()
new file mode 100644
@@ -0,0 +1,175 @@
+/* Test for the close_range system call.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <getopt.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+#include <support/capture_subprocess.h>
+#include <support/check.h>
+#include <support/descriptors.h>
+#include <support/support.h>
+#include <support/xunistd.h>
+
+#include <array_length.h>
+#include <tst-clone.h>
+
+static unsigned int
+first_invalid_flag (void)
+{
+ static const unsigned int flags[] = {
+ CLOSE_RANGE_UNSHARE,
+ };
+
+ unsigned int r = 1;
+ for (int i = 0; i < array_length (flags); i++)
+ if ((1U << r) & flags[i])
+ r++;
+ return 1U << r;
+}
+
+enum
+{
+ maximum_fd = 100,
+ half_fd = maximum_fd / 2,
+ gap_1 = maximum_fd - 8,
+ gap_0 = (maximum_fd * 3) / 4
+};
+
+static void
+close_range_test_common (int *fds, unsigned int flags)
+{
+ /* Basic test for invalid flags, bail out if unsupported. */
+ TEST_COMPARE (close_range (fds[0], fds[half_fd], first_invalid_flag()), -1);
+ if (errno == ENOSYS)
+ FAIL_UNSUPPORTED ("close_range not supported");
+ TEST_COMPARE (errno, EINVAL);
+
+ /* Close half of the descriptors and check result. */
+ TEST_COMPARE (close_range (fds[0], fds[half_fd], flags), 0);
+ for (int i = 0; i <= half_fd; i++)
+ {
+ TEST_COMPARE (fcntl (fds[i], F_GETFL), -1);
+ TEST_COMPARE (errno, EBADF);
+ }
+ for (int i = half_fd + 1; i < maximum_fd; i++)
+ TEST_VERIFY (fcntl (fds[i], F_GETFL) > -1);
+
+ /* Create some gaps, close up to a threshold, and check result. */
+ xclose (57);
+ xclose (78);
+ xclose (81);
+ xclose (82);
+ xclose (84);
+ xclose (90);
+
+ TEST_COMPARE (close_range (fds[half_fd + 1], fds[gap_1], flags), 0);
+ for (int i = half_fd + 1; i < gap_1; i++)
+ {
+ TEST_COMPARE (fcntl (fds[i], F_GETFL), -1);
+ TEST_COMPARE (errno, EBADF);
+ }
+ for (int i = gap_1 + 1; i < maximum_fd; i++)
+ TEST_VERIFY (fcntl (fds[i], F_GETFL) > -1);
+
+ /* Close the remmaining but the last one. */
+ TEST_COMPARE (close_range (fds[gap_1 + 1], fds[maximum_fd - 1], flags), 0);
+ for (int i = gap_1 + 1; i < maximum_fd - 1; i++)
+ {
+ TEST_COMPARE (fcntl (fds[i], F_GETFL), -1);
+ TEST_COMPARE (errno, EBADF);
+ }
+ TEST_VERIFY (fcntl (fds[maximum_fd], F_GETFL) > -1);
+
+ /* Close the last one. */
+ TEST_COMPARE (close_range (fds[maximum_fd], fds[maximum_fd], flags), 0);
+ TEST_COMPARE (fcntl (fds[maximum_fd], F_GETFL), -1);
+ TEST_COMPARE (errno, EBADF);
+}
+
+/* Basic tests: check if the syscall close ranges with and without gaps. */
+static void
+close_range_test (void)
+{
+ int fds[maximum_fd + 1];
+
+ struct support_descriptors *descrs = support_descriptors_list ();
+
+ for (int i = 0; i < array_length (fds); i++)
+ fds[i] = xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600);
+
+ close_range_test_common (fds, 0);
+
+ /* Double check by check the /proc. */
+ support_descriptors_check (descrs);
+ support_descriptors_free (descrs);
+}
+
+static int
+close_range_unshare_test_fn (void *arg)
+{
+ close_range_test_common (arg, CLOSE_RANGE_UNSHARE);
+ exit (EXIT_SUCCESS);
+}
+
+/* Check if a close_range with CLOSE_RANGE_UNSHARE issued from a subprocess
+ created with CLONE_FILES does not close the parent file descriptor list. */
+static void
+close_range_unshare_test (void)
+{
+ int fds[maximum_fd + 1];
+
+ for (int i = 0; i < array_length (fds); i++)
+ fds[i] = xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600);
+
+ struct support_descriptors *descrs = support_descriptors_list ();
+
+ enum { stack_size = 4096 };
+ DEFINE_STACK (stack, stack_size);
+ pid_t pid = clone_call (close_range_unshare_test_fn, fds, stack, stack_size,
+ CLONE_FILES | SIGCHLD);
+ TEST_VERIFY_EXIT (pid > 0);
+ int status;
+ xwaitpid (pid, &status, 0);
+ TEST_VERIFY (WIFEXITED (status));
+ TEST_COMPARE (WEXITSTATUS(status), 0);
+
+ for (int i = 0; i < maximum_fd; i++)
+ TEST_VERIFY (fcntl (fds[i], F_GETFL) > -1);
+
+ support_descriptors_check (descrs);
+ support_descriptors_free (descrs);
+
+ TEST_COMPARE (close_range (fds[0], fds[maximum_fd], 0), 0);
+}
+
+static int
+do_test (void)
+{
+ close_range_test ();
+ close_range_unshare_test ();
+
+ return 0;
+}
+
+#include <support/test-driver.c>
@@ -2076,6 +2076,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2173,6 +2173,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F