[v2,2/4] Linux: Add close_range
Commit Message
Changes from previous version [1]:
- Make the syscall not a cancellation entrypoint.
- Move tst-clone.h to libsupport
- Fixed typos.
- Add clone (CLONE_FILES) and check if the close_range closes
the parent table as well.
- Remove file descriptors array
- Add manual entry
[1] https://patchwork.sourceware.org/project/glibc/patch/20201221215027.2176966-1-adhemerval.zanella@linaro.org/
--
It was added on Linux 5.9 (278a5fbaed89). Although FreeBSD has added
the same syscall, this only adds the symbol on Linux ports. This
syscall is required to provided a fail-safe way to implement the
closefrom symbol (BZ #10353).
The tst-close_range-consts.py requires a toolchain with an updated
kernel headers that provides the close_range.h UAPI kernel header.
Checked on x86_64-linux-gnu on kernel v5.9 and v5.4.
---
NEWS | 2 +
include/bits/unistd_ext.h | 6 +
manual/llio.texi | 31 +++
sysdeps/unix/sysv/linux/Makefile | 13 +-
sysdeps/unix/sysv/linux/Versions | 3 +
sysdeps/unix/sysv/linux/aarch64/libc.abilist | 1 +
sysdeps/unix/sysv/linux/alpha/libc.abilist | 1 +
sysdeps/unix/sysv/linux/arc/libc.abilist | 1 +
sysdeps/unix/sysv/linux/arm/le/libc.abilist | 1 +
sysdeps/unix/sysv/linux/bits/unistd_ext.h | 11 +
sysdeps/unix/sysv/linux/csky/libc.abilist | 1 +
sysdeps/unix/sysv/linux/hppa/libc.abilist | 1 +
sysdeps/unix/sysv/linux/i386/libc.abilist | 1 +
sysdeps/unix/sysv/linux/ia64/libc.abilist | 1 +
.../unix/sysv/linux/m68k/m680x0/libc.abilist | 1 +
.../sysv/linux/microblaze/be/libc.abilist | 1 +
.../sysv/linux/mips/mips32/fpu/libc.abilist | 1 +
.../sysv/linux/mips/mips64/n32/libc.abilist | 1 +
.../sysv/linux/mips/mips64/n64/libc.abilist | 1 +
sysdeps/unix/sysv/linux/nios2/libc.abilist | 1 +
.../linux/powerpc/powerpc32/fpu/libc.abilist | 1 +
.../linux/powerpc/powerpc64/be/libc.abilist | 1 +
.../linux/powerpc/powerpc64/le/libc.abilist | 1 +
.../unix/sysv/linux/riscv/rv32/libc.abilist | 1 +
.../unix/sysv/linux/riscv/rv64/libc.abilist | 1 +
.../unix/sysv/linux/s390/s390-32/libc.abilist | 1 +
.../unix/sysv/linux/s390/s390-64/libc.abilist | 1 +
sysdeps/unix/sysv/linux/sh/le/libc.abilist | 1 +
.../sysv/linux/sparc/sparc32/libc.abilist | 1 +
.../sysv/linux/sparc/sparc64/libc.abilist | 1 +
sysdeps/unix/sysv/linux/syscalls.list | 1 +
.../unix/sysv/linux/tst-close_range-consts.py | 49 +++++
sysdeps/unix/sysv/linux/tst-close_range.c | 198 ++++++++++++++++++
.../unix/sysv/linux/x86_64/64/libc.abilist | 1 +
.../unix/sysv/linux/x86_64/x32/libc.abilist | 1 +
35 files changed, 338 insertions(+), 2 deletions(-)
create mode 100644 include/bits/unistd_ext.h
create mode 100644 sysdeps/unix/sysv/linux/tst-close_range-consts.py
create mode 100644 sysdeps/unix/sysv/linux/tst-close_range.c
Comments
* Adhemerval Zanella:
> diff --git a/manual/llio.texi b/manual/llio.texi
> index c0a53e1a6e..018e7933de 100644
> --- a/manual/llio.texi
> +++ b/manual/llio.texi
> @@ -284,6 +284,37 @@ of trying to close its underlying file descriptor with @code{close}.
> This flushes any buffered output and updates the stream object to
> indicate that it is closed.
>
> +@deftypefun int close_range (unsigned int @var{lowfd}, unsigned int @var{maxfd}, int @var{flags})
> +@standards{Linux, unistd.h}
> +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}}
> +@c This is a syscall for Linux v5.9. There is no fallback emultation for
> +@c older kernels.
Typo: “emultation”
> +The function @code{clode_range} closes the file descriptor from @var{lowfd}
> +to @var{maxfd} (included). This function is similar to call @code{close} in
“(inclusive)”?
> +specified file descriptor range.
Maybe add, “depending on the flags”? With the new CLOEXEC variant, the
system call does not close anything, after all.
> +The @var{flags} add options on how the files are closes. Linux currently
> +supports.
“supports:” (with colon)
> +@vtable @code
> +@item CLOSE_RANGE_UNSHARE
> +It unshare the range of file descriptors from any other processes, instead
> +of closing them.
> +@end vtable
“The function call unshares the range”.
Is this really how CLOSE_RANGE_UNSHARE operates? I think the
implementation falls through to the closing operation after unsharing
the descriptor table, so it still closes the descriptors.
> +
> +The normal return value from @code{close} is @math{0}; a value of @math{-1}
“close” should be ”close_range”.
> +is returned in case of failure. The following @code{errno} error
> +conditions are defined for this function:
> +
> +@table @code
> +@item EINVAL
> +The @var{lowfd} value is larger than @var{maxfd} or an unsupported @var{flag}
> +is used.
> +@end table
> +@end deftypefun
Typo: “@var{flag}” (missing s)
Other error codes with CLOSE_RANGE_UNSHARE: ENOMEM, EMFILE (see dup_fd).
Maybe a few more.
> diff --git a/sysdeps/unix/sysv/linux/syscalls.list b/sysdeps/unix/sysv/linux/syscalls.list
> index 01ec2bfa95..53575669c7 100644
> --- a/sysdeps/unix/sysv/linux/syscalls.list
> +++ b/sysdeps/unix/sysv/linux/syscalls.list
> @@ -101,3 +101,4 @@ pkey_alloc EXTRA pkey_alloc i:ii pkey_alloc
> pkey_free EXTRA pkey_free i:i pkey_free
> gettid EXTRA gettid Ei: __gettid gettid
> tgkill EXTRA tgkill i:iii __tgkill tgkill
> +close_range EXTRA close_range i:iii __close_range close_range
Hmm, I'm trying to wrap around if this is correct for x32. But since
the kernel uses unsigned int, I think it is.
> diff --git a/sysdeps/unix/sysv/linux/tst-close_range.c b/sysdeps/unix/sysv/linux/tst-close_range.c
> new file mode 100644
> index 0000000000..1a127bb9cb
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/tst-close_range.c
> @@ -0,0 +1,198 @@
> +#include <dirent.h>
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <limits.h>
> +#include <getopt.h>
> +#include <signal.h>
> +#include <stdbool.h>
> +#include <stdlib.h>
> +#include <stdint.h>
> +
> +#include <array_length.h>
> +#include <support/capture_subprocess.h>
> +#include <support/check.h>
> +#include <support/descriptors.h>
> +#include <support/support.h>
> +#include <support/xsched.h>
> +#include <support/xunistd.h>
> +
> +#define NFDS 100
> +
> +static void
> +close_range_test_common (int lowfd, unsigned int flags)
> +{
> + const int maximum_fd = lowfd + NFDS;
> + const int half_fd = maximum_fd / 2;
> + const int gap_1 = maximum_fd - 8;
> +
> + /* Close half of the descriptors and check result. */
> + {
> + int r = close_range (lowfd, half_fd, flags);
> + if (r == -1 && errno == ENOSYS)
> + FAIL_UNSUPPORTED ("close_range not supported");
> + TEST_COMPARE (r, 0);
> + }
> + for (int i = lowfd; i <= half_fd; i++)
> + {
> + TEST_COMPARE (fcntl (i, F_GETFL), -1);
> + TEST_COMPARE (errno, EBADF);
> + }
> + for (int i = half_fd + 1; i < maximum_fd; i++)
> + TEST_VERIFY (fcntl (i, F_GETFL) > -1);
> +
> + /* Create some gaps, close up to a threshold, and check result. */
> + xclose (57);
> + xclose (78);
> + xclose (81);
> + xclose (82);
> + xclose (84);
> + xclose (90);
Should be lowfd + 57 etc.
> + TEST_COMPARE (close_range (half_fd + 1, gap_1, flags), 0);
> + for (int i = half_fd + 1; i < gap_1; i++)
> + {
> + TEST_COMPARE (fcntl (i, F_GETFL), -1);
> + TEST_COMPARE (errno, EBADF);
> + }
> + for (int i = gap_1 + 1; i < maximum_fd; i++)
> + TEST_VERIFY (fcntl (i, F_GETFL) > -1);
> +
> + /* Close the remmaining but the last one. */
Typo: “remmaining”
> + TEST_COMPARE (close_range (gap_1 + 1, maximum_fd - 1, flags), 0);
> + for (int i = gap_1 + 1; i < maximum_fd - 1; i++)
> + {
> + TEST_COMPARE (fcntl (i, F_GETFL), -1);
> + TEST_COMPARE (errno, EBADF);
> + }
> + TEST_VERIFY (fcntl (maximum_fd, F_GETFL) > -1);
> +
> + /* Close the last one. */
> + TEST_COMPARE (close_range (maximum_fd, maximum_fd, flags), 0);
> + TEST_COMPARE (fcntl (maximum_fd, F_GETFL), -1);
> + TEST_COMPARE (errno, EBADF);
> +}
> +
> +/* Basic tests: check if the syscall close ranges with and without gaps. */
> +static void
> +__attribute__((used))
Why __attribute__ ((used))? (Also elsewhere in the file.)
> +close_range_test (void)
> +{
> + struct support_descriptors *descrs = support_descriptors_list ();
> +
> + int lowfd = xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600);
> + for (int i = 0; i < NFDS; i++)
> + xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600);
Please add a check that there are no gaps. And this opens NFDS + 1
descriptors, so maybe add a comment to NFDS.
So perhaps this:
int lowfd = xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600);
for (int i = 1; i <= NFDS; i++)
TEST_COMPARE (xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600),
lowfd + i);
> +/* Check if a clone_range on a subprocess created with CLONE_FILES close
> + the shared file descriptor table entries in the parent. */
> +static void
> +__attribute__((used))
> +close_range_test_subprocess (void)
> +{
> + struct support_descriptors *descrs = support_descriptors_list ();
> +
> + int lowfd = xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600);
> + for (int i = 0; i < NFDS; i++)
> + xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600);
See above, maybe factor this out in a separate function?
> +_Noreturn static int
> +close_range_unshare_test_fn (void *arg)
> +{
> + int lowfd = (int) ((uintptr_t) arg);
> + close_range_test_common (lowfd, CLOSE_RANGE_UNSHARE);
> + exit (EXIT_SUCCESS);
> +}
I think you could check here against the original descriptor set, to
ensure that CLOSE_RANGE_UNSHARE still closes everything.
> +
> +/* Check if a close_range with CLOSE_RANGE_UNSHARE issued from a subprocess
> + created with CLONE_FILES does not close the parent file descriptor list. */
> +static void
> +__attribute__((used))
> +close_range_unshare_test (void)
> +{
(Create another descriptor list here.)
> + int lowfd = xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600);
> + for (int i = 0; i < NFDS; i++)
> + xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600);
Shared function?
Thanks,
Florian
On 23/12/2020 09:22, Florian Weimer wrote:
> * Adhemerval Zanella:
>
>> diff --git a/manual/llio.texi b/manual/llio.texi
>> index c0a53e1a6e..018e7933de 100644
>> --- a/manual/llio.texi
>> +++ b/manual/llio.texi
>> @@ -284,6 +284,37 @@ of trying to close its underlying file descriptor with @code{close}.
>> This flushes any buffered output and updates the stream object to
>> indicate that it is closed.
>>
>> +@deftypefun int close_range (unsigned int @var{lowfd}, unsigned int @var{maxfd}, int @var{flags})
>> +@standards{Linux, unistd.h}
>> +@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}}
>> +@c This is a syscall for Linux v5.9. There is no fallback emultation for
>> +@c older kernels.
>
> Typo: “emultation”
Ack.
>
>> +The function @code{clode_range} closes the file descriptor from @var{lowfd}
>> +to @var{maxfd} (included). This function is similar to call @code{close} in
>
> “(inclusive)”?
>
>> +specified file descriptor range.
>
> Maybe add, “depending on the flags”? With the new CLOEXEC variant, the
> system call does not close anything, after all.
Ack.
>
>> +The @var{flags} add options on how the files are closes. Linux currently
>> +supports.
>
> “supports:” (with colon)
Ack.
>
>> +@vtable @code
>> +@item CLOSE_RANGE_UNSHARE
>> +It unshare the range of file descriptors from any other processes, instead
>> +of closing them.
>> +@end vtable
>
> “The function call unshares the range”.
>
> Is this really how CLOSE_RANGE_UNSHARE operates? I think the
> implementation falls through to the closing operation after unsharing
> the descriptor table, so it still closes the descriptors.
It is not, I think I have used a sentence from a in-review patch for man-pages.
I think it would be better to just use the comment in UAPI kernel header:
Unshare the file descriptor table before closing file descriptors.
>
>> +
>> +The normal return value from @code{close} is @math{0}; a value of @math{-1}
>
> “close” should be ”close_range”.
Ack.
>
>> +is returned in case of failure. The following @code{errno} error
>> +conditions are defined for this function:
>> +
>> +@table @code
>> +@item EINVAL
>> +The @var{lowfd} value is larger than @var{maxfd} or an unsupported @var{flag}
>> +is used.
>> +@end table
>> +@end deftypefun
>
> Typo: “@var{flag}” (missing s)
Ack.
>
> Other error codes with CLOSE_RANGE_UNSHARE: ENOMEM, EMFILE (see dup_fd).
> Maybe a few more.
Right, although I not very found of just duplicating what man-pages will
also document (with probably more details).
>
>> diff --git a/sysdeps/unix/sysv/linux/syscalls.list b/sysdeps/unix/sysv/linux/syscalls.list
>> index 01ec2bfa95..53575669c7 100644
>> --- a/sysdeps/unix/sysv/linux/syscalls.list
>> +++ b/sysdeps/unix/sysv/linux/syscalls.list
>> @@ -101,3 +101,4 @@ pkey_alloc EXTRA pkey_alloc i:ii pkey_alloc
>> pkey_free EXTRA pkey_free i:i pkey_free
>> gettid EXTRA gettid Ei: __gettid gettid
>> tgkill EXTRA tgkill i:iii __tgkill tgkill
>> +close_range EXTRA close_range i:iii __close_range close_range
>
> Hmm, I'm trying to wrap around if this is correct for x32. But since
> the kernel uses unsigned int, I think it is.
I was in doubt as well if it x32 would require to use 'U', but tests
does pass clean on x32. I will add a test to check of using ~OU as
second argument as well.
>
>> diff --git a/sysdeps/unix/sysv/linux/tst-close_range.c b/sysdeps/unix/sysv/linux/tst-close_range.c
>> new file mode 100644
>> index 0000000000..1a127bb9cb
>> --- /dev/null
>> +++ b/sysdeps/unix/sysv/linux/tst-close_range.c
>> @@ -0,0 +1,198 @@
>
>> +#include <dirent.h>
>> +#include <errno.h>
>> +#include <fcntl.h>
>> +#include <limits.h>
>> +#include <getopt.h>
>> +#include <signal.h>
>> +#include <stdbool.h>
>> +#include <stdlib.h>
>> +#include <stdint.h>
>> +
>> +#include <array_length.h>
>> +#include <support/capture_subprocess.h>
>> +#include <support/check.h>
>> +#include <support/descriptors.h>
>> +#include <support/support.h>
>> +#include <support/xsched.h>
>> +#include <support/xunistd.h>
>> +
>> +#define NFDS 100
>> +
>> +static void
>> +close_range_test_common (int lowfd, unsigned int flags)
>> +{
>> + const int maximum_fd = lowfd + NFDS;
>> + const int half_fd = maximum_fd / 2;
>> + const int gap_1 = maximum_fd - 8;
>> +
>> + /* Close half of the descriptors and check result. */
>> + {
>> + int r = close_range (lowfd, half_fd, flags);
>> + if (r == -1 && errno == ENOSYS)
>> + FAIL_UNSUPPORTED ("close_range not supported");
>> + TEST_COMPARE (r, 0);
>> + }
>> + for (int i = lowfd; i <= half_fd; i++)
>> + {
>> + TEST_COMPARE (fcntl (i, F_GETFL), -1);
>> + TEST_COMPARE (errno, EBADF);
>> + }
>> + for (int i = half_fd + 1; i < maximum_fd; i++)
>> + TEST_VERIFY (fcntl (i, F_GETFL) > -1);
>> +
>> + /* Create some gaps, close up to a threshold, and check result. */
>> + xclose (57);
>> + xclose (78);
>> + xclose (81);
>> + xclose (82);
>> + xclose (84);
>> + xclose (90);
>
> Should be lowfd + 57 etc.
It would make more sense indeed.
>
>> + TEST_COMPARE (close_range (half_fd + 1, gap_1, flags), 0);
>> + for (int i = half_fd + 1; i < gap_1; i++)
>> + {
>> + TEST_COMPARE (fcntl (i, F_GETFL), -1);
>> + TEST_COMPARE (errno, EBADF);
>> + }
>> + for (int i = gap_1 + 1; i < maximum_fd; i++)
>> + TEST_VERIFY (fcntl (i, F_GETFL) > -1);
>> +
>> + /* Close the remmaining but the last one. */
>
> Typo: “remmaining”
Ack.
>
>> + TEST_COMPARE (close_range (gap_1 + 1, maximum_fd - 1, flags), 0);
>> + for (int i = gap_1 + 1; i < maximum_fd - 1; i++)
>> + {
>> + TEST_COMPARE (fcntl (i, F_GETFL), -1);
>> + TEST_COMPARE (errno, EBADF);
>> + }
>> + TEST_VERIFY (fcntl (maximum_fd, F_GETFL) > -1);
>> +
>> + /* Close the last one. */
>> + TEST_COMPARE (close_range (maximum_fd, maximum_fd, flags), 0);
>> + TEST_COMPARE (fcntl (maximum_fd, F_GETFL), -1);
>> + TEST_COMPARE (errno, EBADF);
>> +}
>> +
>> +/* Basic tests: check if the syscall close ranges with and without gaps. */
>> +static void
>> +__attribute__((used))
>
> Why __attribute__ ((used))? (Also elsewhere in the file.)
This is a left over from debugging.
>
>> +close_range_test (void)
>> +{
>> + struct support_descriptors *descrs = support_descriptors_list ();
>> +
>> + int lowfd = xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600);
>> + for (int i = 0; i < NFDS; i++)
>> + xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600);
>
> Please add a check that there are no gaps. And this opens NFDS + 1
> descriptors, so maybe add a comment to NFDS.
>
> So perhaps this:
>
> int lowfd = xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600);
> for (int i = 1; i <= NFDS; i++)
> TEST_COMPARE (xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600),
> lowfd + i);
Ack.
>
>> +/* Check if a clone_range on a subprocess created with CLONE_FILES close
>> + the shared file descriptor table entries in the parent. */
>> +static void
>> +__attribute__((used))
>> +close_range_test_subprocess (void)
>> +{
>> + struct support_descriptors *descrs = support_descriptors_list ();
>> +
>> + int lowfd = xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600);
>> + for (int i = 0; i < NFDS; i++)
>> + xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600);
>
> See above, maybe factor this out in a separate function?
Ack.
>
>> +_Noreturn static int
>> +close_range_unshare_test_fn (void *arg)
>> +{
>> + int lowfd = (int) ((uintptr_t) arg);
>> + close_range_test_common (lowfd, CLOSE_RANGE_UNSHARE);
>> + exit (EXIT_SUCCESS);
>> +}
>
> I think you could check here against the original descriptor set, to
> ensure that CLOSE_RANGE_UNSHARE still closes everything.
But close_range_test_common already does it.
>
>> +
>> +/* Check if a close_range with CLOSE_RANGE_UNSHARE issued from a subprocess
>> + created with CLONE_FILES does not close the parent file descriptor list. */
>> +static void
>> +__attribute__((used))
>> +close_range_unshare_test (void)
>> +{
>
> (Create another descriptor list here.)
Ack.
>
>> + int lowfd = xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600);
>> + for (int i = 0; i < NFDS; i++)
>> + xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600);
>
> Shared function?
I will add it.
* Adhemerval Zanella:
>>> +_Noreturn static int
>>> +close_range_unshare_test_fn (void *arg)
>>> +{
>>> + int lowfd = (int) ((uintptr_t) arg);
>>> + close_range_test_common (lowfd, CLOSE_RANGE_UNSHARE);
>>> + exit (EXIT_SUCCESS);
>>> +}
>>
>> I think you could check here against the original descriptor set, to
>> ensure that CLOSE_RANGE_UNSHARE still closes everything.
>
> But close_range_test_common already does it.
Okay, that's probably enough.
Thanks,
Florian
@@ -28,6 +28,8 @@ Major new features:
The 32-bit RISC-V port requires at least Linux 5.4, GCC 7.1 and binutils
2.28.
+* On Linux, the close_range function has been added.
+
Deprecated and removed features, and other changes affecting compatibility:
* The mallinfo function is marked deprecated. Callers should call
new file mode 100644
@@ -0,0 +1,6 @@
+#include_next <bits/unistd_ext.h>
+
+#ifndef _ISOMAC
+extern int __close_range (unsigned int lowfd, unsigned int highfd, int flags);
+libc_hidden_proto (__close_range);
+#endif
@@ -284,6 +284,37 @@ of trying to close its underlying file descriptor with @code{close}.
This flushes any buffered output and updates the stream object to
indicate that it is closed.
+@deftypefun int close_range (unsigned int @var{lowfd}, unsigned int @var{maxfd}, int @var{flags})
+@standards{Linux, unistd.h}
+@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{@acsfd{}}}
+@c This is a syscall for Linux v5.9. There is no fallback emultation for
+@c older kernels.
+
+The function @code{clode_range} closes the file descriptor from @var{lowfd}
+to @var{maxfd} (included). This function is similar to call @code{close} in
+specified file descriptor range.
+
+The @var{flags} add options on how the files are closes. Linux currently
+supports.
+
+@vtable @code
+@item CLOSE_RANGE_UNSHARE
+It unshare the range of file descriptors from any other processes, instead
+of closing them.
+@end vtable
+
+The normal return value from @code{close} is @math{0}; a value of @math{-1}
+is returned in case of failure. The following @code{errno} error
+conditions are defined for this function:
+
+@table @code
+@item EINVAL
+The @var{lowfd} value is larger than @var{maxfd} or an unsupported @var{flag}
+is used.
+@end table
+@end deftypefun
+
+
@node I/O Primitives
@section Input and Output Primitives
@@ -61,7 +61,7 @@ sysdep_routines += adjtimex clone umount umount2 readahead sysctl \
open_by_handle_at mlock2 pkey_mprotect pkey_set pkey_get \
timerfd_gettime timerfd_settime prctl \
process_vm_readv process_vm_writev clock_adjtime \
- time64-support pselect32
+ time64-support pselect32 close_range
CFLAGS-gethostid.c = -fexceptions
CFLAGS-tee.c = -fexceptions -fasynchronous-unwind-tables
@@ -103,7 +103,8 @@ tests += tst-clone tst-clone2 tst-clone3 tst-fanotify tst-personality \
tst-quota tst-sync_file_range tst-sysconf-iov_max tst-ttyname \
test-errno-linux tst-memfd_create tst-mlock2 tst-pkey \
tst-rlimit-infinity tst-ofdlocks tst-gettid tst-gettid-kill \
- tst-tgkill tst-sysvsem-linux tst-sysvmsg-linux tst-sysvshm-linux
+ tst-tgkill tst-sysvsem-linux tst-sysvmsg-linux tst-sysvshm-linux \
+ tst-close_range
tests-internal += tst-ofdlocks-compat tst-sigcontext-get_pc
CFLAGS-tst-sigcontext-get_pc.c = -fasynchronous-unwind-tables
@@ -177,6 +178,14 @@ $(objpfx)tst-mman-consts.out: ../sysdeps/unix/sysv/linux/tst-mman-consts.py
< /dev/null > $@ 2>&1; $(evaluate-test)
$(objpfx)tst-mman-consts.out: $(sysdeps-linux-python-deps)
+tests-special += $(objpfx)tst-close_range-consts.out
+$(objpfx)tst-close_range-consts.out: ../sysdeps/unix/sysv/linux/tst-close_range-consts.py
+ $(sysdeps-linux-python) \
+ ../sysdeps/unix/sysv/linux/tst-close_range-consts.py \
+ $(sysdeps-linux-python-cc) \
+ < /dev/null > $@ 2>&1; $(evaluate-test)
+$(objpfx)tst-close_range-consts.out: $(sysdeps-linux-python-deps)
+
$(objpfx)tst-gettid: $(shared-thread-library)
$(objpfx)tst-gettid-kill: $(shared-thread-library)
$(objpfx)tst-tgkill: $(shared-thread-library)
@@ -169,6 +169,9 @@ libc {
}
GLIBC_2.32 {
}
+ GLIBC_2.33 {
+ close_range;
+ }
GLIBC_PRIVATE {
# functions used in other libraries
__syscall_rt_sigqueueinfo;
@@ -2160,6 +2160,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2242,6 +2242,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -1920,6 +1920,7 @@ GLIBC_2.32 wprintf F
GLIBC_2.32 write F
GLIBC_2.32 writev F
GLIBC_2.32 wscanf F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -141,6 +141,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -33,4 +33,15 @@
not detached and has not been joined. */
extern __pid_t gettid (void) __THROW;
+/* Unshare the file descriptor table before closing file descriptors. */
+#define CLOSE_RANGE_UNSHARE (1U << 1)
+
+/* Close all file descriptors in the range FD up to MAX_FD. The flag FLAGS
+ are define by the CLOSE_RANGE prefix. This function behaves like close
+ on the range, but in a fail-safe where it will either fail and not close
+ any file descriptor or close all of them. Returns 0 on successor or -1
+ for failure (and sets errno accordingly). */
+extern int close_range (unsigned int __fd, unsigned int __max_fd,
+ int __flags) __THROW;
+
#endif
@@ -2104,6 +2104,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2063,6 +2063,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2229,6 +2229,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2095,6 +2095,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2175,6 +2175,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2155,6 +2155,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2146,6 +2146,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2152,6 +2152,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2146,6 +2146,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2193,6 +2193,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2202,6 +2202,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2065,6 +2065,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2355,6 +2355,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -595,6 +595,7 @@ GLIBC_2.33 clock_nanosleep F
GLIBC_2.33 clock_settime F
GLIBC_2.33 clone F
GLIBC_2.33 close F
+GLIBC_2.33 close_range F
GLIBC_2.33 closedir F
GLIBC_2.33 closelog F
GLIBC_2.33 confstr F
@@ -2122,6 +2122,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2200,6 +2200,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2101,6 +2101,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2067,6 +2067,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2191,6 +2191,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2118,6 +2118,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -101,3 +101,4 @@ pkey_alloc EXTRA pkey_alloc i:ii pkey_alloc
pkey_free EXTRA pkey_free i:i pkey_free
gettid EXTRA gettid Ei: __gettid gettid
tgkill EXTRA tgkill i:iii __tgkill tgkill
+close_range EXTRA close_range i:iii __close_range close_range
new file mode 100644
@@ -0,0 +1,49 @@
+#!/usr/bin/python3
+# Test that glibc's unistd_ext.h constants match the kernel's.
+# Copyright (C) 2020 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+import argparse
+import sys
+
+import glibcextract
+import glibcsyscalls
+
+
+def main():
+ """The main entry point."""
+ parser = argparse.ArgumentParser(
+ description="Test that glibc's unistd_ext.h constants match "
+ "the kernel's.")
+ parser.add_argument('--cc', metavar='CC',
+ help='C compiler (including options) to use')
+ args = parser.parse_args()
+ linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc)
+ linux_version_glibc = (5, 9)
+ sys.exit(glibcextract.compare_macro_consts(
+ '#define _GNU_SOURCE 1\n'
+ '#include <unistd.h>\n',
+ '#define _GNU_SOURCE 1\n'
+ '#include <linux/close_range.h>\n',
+ args.cc,
+ 'CLOSE_RANGE_.*',
+ None,
+ linux_version_glibc > linux_version_headers,
+ linux_version_headers > linux_version_glibc))
+
+if __name__ == '__main__':
+ main()
new file mode 100644
@@ -0,0 +1,198 @@
+/* Test for the close_range system call.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <getopt.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+#include <array_length.h>
+#include <support/capture_subprocess.h>
+#include <support/check.h>
+#include <support/descriptors.h>
+#include <support/support.h>
+#include <support/xsched.h>
+#include <support/xunistd.h>
+
+#define NFDS 100
+
+static void
+close_range_test_common (int lowfd, unsigned int flags)
+{
+ const int maximum_fd = lowfd + NFDS;
+ const int half_fd = maximum_fd / 2;
+ const int gap_1 = maximum_fd - 8;
+
+ /* Close half of the descriptors and check result. */
+ {
+ int r = close_range (lowfd, half_fd, flags);
+ if (r == -1 && errno == ENOSYS)
+ FAIL_UNSUPPORTED ("close_range not supported");
+ TEST_COMPARE (r, 0);
+ }
+ for (int i = lowfd; i <= half_fd; i++)
+ {
+ TEST_COMPARE (fcntl (i, F_GETFL), -1);
+ TEST_COMPARE (errno, EBADF);
+ }
+ for (int i = half_fd + 1; i < maximum_fd; i++)
+ TEST_VERIFY (fcntl (i, F_GETFL) > -1);
+
+ /* Create some gaps, close up to a threshold, and check result. */
+ xclose (57);
+ xclose (78);
+ xclose (81);
+ xclose (82);
+ xclose (84);
+ xclose (90);
+
+ TEST_COMPARE (close_range (half_fd + 1, gap_1, flags), 0);
+ for (int i = half_fd + 1; i < gap_1; i++)
+ {
+ TEST_COMPARE (fcntl (i, F_GETFL), -1);
+ TEST_COMPARE (errno, EBADF);
+ }
+ for (int i = gap_1 + 1; i < maximum_fd; i++)
+ TEST_VERIFY (fcntl (i, F_GETFL) > -1);
+
+ /* Close the remmaining but the last one. */
+ TEST_COMPARE (close_range (gap_1 + 1, maximum_fd - 1, flags), 0);
+ for (int i = gap_1 + 1; i < maximum_fd - 1; i++)
+ {
+ TEST_COMPARE (fcntl (i, F_GETFL), -1);
+ TEST_COMPARE (errno, EBADF);
+ }
+ TEST_VERIFY (fcntl (maximum_fd, F_GETFL) > -1);
+
+ /* Close the last one. */
+ TEST_COMPARE (close_range (maximum_fd, maximum_fd, flags), 0);
+ TEST_COMPARE (fcntl (maximum_fd, F_GETFL), -1);
+ TEST_COMPARE (errno, EBADF);
+}
+
+/* Basic tests: check if the syscall close ranges with and without gaps. */
+static void
+__attribute__((used))
+close_range_test (void)
+{
+ struct support_descriptors *descrs = support_descriptors_list ();
+
+ int lowfd = xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600);
+ for (int i = 0; i < NFDS; i++)
+ xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600);
+
+ close_range_test_common (lowfd, 0);
+
+ /* Double check by check the /proc. */
+ support_descriptors_check (descrs);
+ support_descriptors_free (descrs);
+}
+
+_Noreturn static int
+close_range_test_fn (void *arg)
+{
+ int lowfd = (int) ((uintptr_t) arg);
+ close_range_test_common (lowfd, 0);
+ exit (EXIT_SUCCESS);
+}
+
+/* Check if a clone_range on a subprocess created with CLONE_FILES close
+ the shared file descriptor table entries in the parent. */
+static void
+__attribute__((used))
+close_range_test_subprocess (void)
+{
+ struct support_descriptors *descrs = support_descriptors_list ();
+
+ int lowfd = xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600);
+ for (int i = 0; i < NFDS; i++)
+ xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600);
+
+ enum { stack_size = 4096 };
+ DEFINE_STACK (stack, stack_size);
+ pid_t pid = xclone (close_range_test_fn, (void*) (uintptr_t) lowfd, stack,
+ stack_size, CLONE_FILES | SIGCHLD);
+ TEST_VERIFY_EXIT (pid > 0);
+ int status;
+ xwaitpid (pid, &status, 0);
+ TEST_VERIFY (WIFEXITED (status));
+ TEST_COMPARE (WEXITSTATUS(status), 0);
+
+ for (int i = lowfd; i < NFDS; i++)
+ TEST_VERIFY (fcntl (i, F_GETFL) < 0);
+
+ support_descriptors_check (descrs);
+ support_descriptors_free (descrs);
+}
+
+
+_Noreturn static int
+close_range_unshare_test_fn (void *arg)
+{
+ int lowfd = (int) ((uintptr_t) arg);
+ close_range_test_common (lowfd, CLOSE_RANGE_UNSHARE);
+ exit (EXIT_SUCCESS);
+}
+
+/* Check if a close_range with CLOSE_RANGE_UNSHARE issued from a subprocess
+ created with CLONE_FILES does not close the parent file descriptor list. */
+static void
+__attribute__((used))
+close_range_unshare_test (void)
+{
+ int lowfd = xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600);
+ for (int i = 0; i < NFDS; i++)
+ xopen ("/dev/null", O_RDONLY | O_CLOEXEC, 0600);
+
+ struct support_descriptors *descrs = support_descriptors_list ();
+
+ enum { stack_size = 4096 };
+ DEFINE_STACK (stack, stack_size);
+ pid_t pid = xclone (close_range_unshare_test_fn, (void*) (uintptr_t) lowfd,
+ stack, stack_size, CLONE_FILES | SIGCHLD);
+ TEST_VERIFY_EXIT (pid > 0);
+ int status;
+ xwaitpid (pid, &status, 0);
+ TEST_VERIFY (WIFEXITED (status));
+ TEST_COMPARE (WEXITSTATUS(status), 0);
+
+ for (int i = 0; i < NFDS; i++)
+ TEST_VERIFY (fcntl (i, F_GETFL) > -1);
+
+ support_descriptors_check (descrs);
+ support_descriptors_free (descrs);
+
+ TEST_COMPARE (close_range (lowfd, lowfd + NFDS, 0), 0);
+}
+
+static int
+do_test (void)
+{
+ close_range_test ();
+ close_range_test_subprocess ();
+ close_range_unshare_test ();
+
+ return 0;
+}
+
+#include <support/test-driver.c>
@@ -2076,6 +2076,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F
@@ -2173,6 +2173,7 @@ GLIBC_2.32 sigabbrev_np F
GLIBC_2.32 sigdescr_np F
GLIBC_2.32 strerrordesc_np F
GLIBC_2.32 strerrorname_np F
+GLIBC_2.33 close_range F
GLIBC_2.33 fstat F
GLIBC_2.33 fstat64 F
GLIBC_2.33 fstatat F