@@ -167,7 +167,9 @@ static inline _Bool __closefrom_fallback (int __lowfd, _Bool dirfd_fallback)
return false;
}
# else
-extern _Bool __closefrom_fallback (int __lowfd, _Bool) attribute_hidden;
+extern _Bool __closefrom_fallback (unsigned int __from, unsigned int __to,
+ _Bool)
+ attribute_hidden;
# endif
extern ssize_t __read (int __fd, void *__buf, size_t __nbytes);
libc_hidden_proto (__read)
@@ -30,7 +30,7 @@ __closefrom (int lowfd)
if (r == 0)
return ;
- if (!__closefrom_fallback (l, true))
+ if (!__closefrom_fallback (l, ~0U, true))
__fortify_fail ("closefrom failed to close a file descriptor");
}
weak_alias (__closefrom, closefrom)
@@ -109,7 +109,9 @@ tests := test-errno tstgetopt testfnm runtests runptests \
tst-glob-tilde test-ssize-max tst-spawn4 bug-regex37 \
bug-regex38 tst-regcomp-truncated tst-spawn-chdir \
tst-wordexp-nocmd tst-execveat tst-spawn5 \
- tst-sched_getaffinity tst-spawn6
+ tst-sched_getaffinity tst-spawn6 \
+ tst-spawn-chdir-timens tst-spawn4-timens tst-spawn5-timens \
+ tst-spawn6-timens
# Test for the glob symbol version that was replaced in glibc 2.27.
ifeq ($(have-GLIBC_2.26)$(build-shared),yesyes)
@@ -130,7 +132,15 @@ xtests := tst-getaddrinfo4 tst-getaddrinfo5 tst-sched_rr_get_interval
xtests-time64 := tst-sched_rr_get_interval-time64
ifeq (yes,$(build-shared))
test-srcs := globtest
-tests += wordexp-test tst-exec tst-spawn tst-spawn2 tst-spawn3
+tests += \
+ tst-exec \
+ tst-spawn \
+ tst-spawn-timens \
+ tst-spawn2 \
+ tst-spawn2-timens \
+ tst-spawn3 \
+ wordexp-test \
+ # tests
endif
ifeq (yesyes,$(build-shared)$(have-thread-library))
tests += tst-getopt-cancel tst-_Fork
@@ -290,7 +300,9 @@ tst-execvpe5-ARGS = -- $(host-test-program-cmd)
tst-spawn-ARGS = -- $(host-test-program-cmd)
tst-spawn-static-ARGS = $(tst-spawn-ARGS)
tst-spawn5-ARGS = -- $(host-test-program-cmd)
+tst-spawn5-timens-ARGS = -- $(host-test-program-cmd)
tst-spawn6-ARGS = -- $(host-test-program-cmd)
+tst-spawn6-timens-ARGS = -- $(host-test-program-cmd)
tst-dir-ARGS = `pwd` `cd $(common-objdir)/$(subdir); pwd` `cd $(common-objdir); pwd` $(objpfx)tst-dir
tst-chmod-ARGS = $(objdir)
tst-vfork3-ARGS = --test-dir=$(objpfx)
new file mode 100644
@@ -0,0 +1,2 @@
+#define CHECK_TIMENAMESPACE
+#include "tst-spawn-chdir.c"
@@ -29,6 +29,7 @@
#include <support/test-driver.h>
#include <support/xstdio.h>
#include <support/xunistd.h>
+#include <support/namespace.h>
#include <unistd.h>
/* Reads the file at PATH, which must consist of exactly one line.
@@ -66,6 +67,7 @@ get_pwd_program (void)
FAIL_EXIT1 ("cannot find pwd program");
}
+
/* Adds chdir operations to ACTIONS, using PATH. If DO_FCHDIR, use
the open function and TMPFD to emulate chdir using fchdir. */
static void
@@ -87,6 +89,12 @@ add_chdir (posix_spawn_file_actions_t *actions, const char *path,
static int
do_test (void)
{
+#ifdef CHECK_TIMENAMESPACE
+ support_become_root();
+ if (!support_enter_time_namespace ())
+ return EXIT_UNSUPPORTED;
+#endif
+
/* Directory for temporary file data. Each subtest uses a numeric
subdirectory. */
char *directory = support_create_temp_directory ("tst-spawn-chdir-");
new file mode 100644
@@ -0,0 +1,2 @@
+#define CHECK_TIMENAMESPACE
+#include "tst-spawn-chdir.c"
@@ -178,6 +178,12 @@ do_test (int argc, char *argv[])
return handle_restart (argv[1], argv[2], argv[3], argv[4], argv[5],
argv[6]);
+#ifdef CHECK_TIMENAMESPACE
+ support_become_root();
+ if (!support_enter_time_namespace ())
+ return EXIT_UNSUPPORTED;
+#endif
+
/* Prepare the test. We are creating four files: two which file descriptor
will be marked with FD_CLOEXEC, another which is not. */
new file mode 100644
@@ -0,0 +1,2 @@
+#define CHECK_TIMENAMESPACE
+#include "tst-spawn2.c"
@@ -26,10 +26,18 @@
#include <stdio.h>
#include <support/check.h>
+#include <support/test-driver.h>
+#include <support/namespace.h>
int
do_test (void)
{
+#ifdef CHECK_TIMENAMESPACE
+ support_become_root();
+ if (!support_enter_time_namespace ())
+ return EXIT_UNSUPPORTED;
+#endif
+
/* Check if posix_spawn correctly returns an error and an invalid pid
by trying to spawn an invalid binary. */
new file mode 100644
@@ -0,0 +1,2 @@
+#define CHECK_TIMENAMESPACE
+#include "tst-spawn4.c"
@@ -21,13 +21,21 @@
#include <unistd.h>
#include <sys/stat.h>
-#include <support/xunistd.h>
#include <support/check.h>
+#include <support/namespace.h>
#include <support/temp_file.h>
+#include <support/test-driver.h>
+#include <support/xunistd.h>
static int
do_test (void)
{
+#ifdef CHECK_TIMENAMESPACE
+ support_become_root();
+ if (!support_enter_time_namespace ())
+ return EXIT_UNSUPPORTED;
+#endif
+
char *scriptname;
int fd = create_temp_file ("tst-spawn4.", &scriptname);
TEST_VERIFY_EXIT (fd >= 0);
new file mode 100644
@@ -0,0 +1,2 @@
+#define CHECK_TIMENAMESPACE
+#include "tst-spawn5.c"
@@ -28,8 +28,10 @@
#include <limits.h>
#include <support/check.h>
-#include <support/xunistd.h>
+#include <support/namespace.h>
#include <support/support.h>
+#include <support/test-driver.h>
+#include <support/xunistd.h>
#include <arch-fd_to_filename.h>
#include <array_length.h>
@@ -278,6 +280,12 @@ do_test (int argc, char *argv[])
/* Ignore the application name. */
handle_restart (argc - 1, &argv[1]);
+#ifdef CHECK_TIMENAMESPACE
+ support_become_root();
+ if (!support_enter_time_namespace ())
+ return EXIT_UNSUPPORTED;
+#endif
+
TEST_VERIFY_EXIT (argc == 2 || argc == 5);
int i;
new file mode 100644
@@ -0,0 +1,2 @@
+#define CHECK_TIMENAMESPACE
+#include "tst-spawn6.c"
@@ -25,12 +25,14 @@
#include <spawn.h>
#include <stdbool.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
#include <support/check.h>
+#include <support/namespace.h>
+#include <support/test-driver.h>
#include <support/xunistd.h>
-#include <sys/wait.h>
#include <sys/ioctl.h>
-#include <stdlib.h>
+#include <sys/wait.h>
#include <termios.h>
#ifndef PATH_MAX
@@ -124,6 +126,12 @@ run_subprogram (int argc, char *argv[], const posix_spawnattr_t *attr,
static int
run_test (int argc, char *argv[])
{
+#ifdef CHECK_TIMENAMESPACE
+ support_become_root();
+ if (!support_enter_time_namespace ())
+ return EXIT_UNSUPPORTED;
+#endif
+
/* We must have either:
- four parameters left if called initially:
+ path to ld.so optional
@@ -28,7 +28,7 @@
/proc/self/fd open will trigger a fallback that tries to close a file
descriptor before proceed. */
_Bool
-__closefrom_fallback (int from, _Bool dirfd_fallback)
+__closefrom_fallback (unsigned int from, unsigned int to, _Bool dirfd_fallback)
{
int dirfd = __open_nocancel (FD_TO_FILENAME_PREFIX, O_RDONLY | O_DIRECTORY,
0);
@@ -40,7 +40,7 @@ __closefrom_fallback (int from, _Bool dirfd_fallback)
return false;
/* The closefrom should work even when process can't open new files. */
- for (int i = from; i < INT_MAX; i++)
+ for (int i = from; i <= to; i++)
{
int r = __close_nocancel (i);
if (r == 0 || (r == -1 && errno != EBADF))
@@ -84,6 +84,8 @@ __closefrom_fallback (int from, _Bool dirfd_fallback)
if (fd == dirfd || fd < from)
continue;
+ if (fd > to)
+ break;
/* We ignore close errors because EBADF, EINTR, and EIO means the
descriptor has been released. */
@@ -44,7 +44,11 @@
third issue is done by a stack allocation in parent, and by using a
field in struct spawn_args where the child can write an error
code. CLONE_VFORK ensures that the parent does not run until the
- child has either exec'ed successfully or exited. */
+ child has either exec'ed successfully or exited.
+
+ If the clone with CLONE_VM and CLONE_VFORK fails (due any kernel limitation
+ such as time namespace), only CLONE_VFORK is used instead and the
+ preparation and execve failures are communicated with a pipe. */
/* The Unix standard contains a long explanation of the way to signal
@@ -67,6 +71,7 @@ struct posix_spawn_args
char *const *envp;
int xflags;
int err;
+ int pipe[2];
};
/* Older version requires that shell script without shebang definition
@@ -94,15 +99,59 @@ maybe_script_execute (struct posix_spawn_args *args)
}
}
+/* If the file operation would clobber the pipe fd used to communicate with
+ parent, dup the pipe onto an unoccupied file descriptor. */
+static inline bool
+spawni_fa_handle_pipe (const struct __spawn_action *fa, int p[])
+{
+ int fd;
+
+ switch (fa->tag)
+ {
+ case spawn_do_close:
+ fd = fa->action.close_action.fd;
+ break;
+ case spawn_do_open:
+ fd = fa->action.open_action.fd;
+ break;
+ case spawn_do_dup2:
+ fd = fa->action.dup2_action.newfd;
+ break;
+ case spawn_do_fchdir:
+ fd = fa->action.fchdir_action.fd;
+ default:
+ return true;
+ }
+
+ if (fd == p[1])
+ {
+ int r = __fcntl (p[1], F_DUPFD_CLOEXEC);
+ if (r < 0)
+ return false;
+ __close_nocancel (p[1]);
+ p[1] = r;
+ }
+
+ return true;
+}
+
+static inline bool
+spawni_fa_closerange (int from, int to)
+{
+ int r = INLINE_SYSCALL_CALL (close_range, from, to, 0);
+ return r == 0 || __closefrom_fallback (from, to, false);
+}
+
/* Function used in the clone call to setup the signals mask, posix_spawn
attributes, and file actions. It run on its own stack (provided by the
posix_spawn call). */
-static int
-__spawni_child (void *arguments)
+static _Noreturn int
+spawni_child (void *arguments)
{
struct posix_spawn_args *args = arguments;
const posix_spawnattr_t *restrict attr = args->attr;
const posix_spawn_file_actions_t *file_actions = args->fa;
+ bool use_pipe = args->pipe[0] != -1;
/* The child must ensure that no signal handler are enabled because it shared
memory with parent, so the signal disposition must be either SIG_DFL or
@@ -113,6 +162,9 @@ __spawni_child (void *arguments)
struct sigaction sa;
memset (&sa, '\0', sizeof (sa));
+ if (use_pipe)
+ __close (args->pipe[0]);
+
sigset_t hset;
__sigprocmask (SIG_BLOCK, 0, &hset);
for (int sig = 1; sig < _NSIG; ++sig)
@@ -181,6 +233,9 @@ __spawni_child (void *arguments)
{
struct __spawn_action *action = &file_actions->__actions[cnt];
+ if (use_pipe && !spawni_fa_handle_pipe (action, args->pipe))
+ goto fail;
+
switch (action->tag)
{
case spawn_do_close:
@@ -233,6 +288,11 @@ __spawni_child (void *arguments)
break;
case spawn_do_dup2:
+ if (use_pipe && action->action.dup2_action.fd == args->pipe[1])
+ {
+ errno = EBADF;
+ goto fail;
+ }
/* Austin Group issue #411 requires adddup2 action with source
and destination being equal to remove close-on-exec flag. */
if (action->action.dup2_action.fd
@@ -264,8 +324,20 @@ __spawni_child (void *arguments)
case spawn_do_closefrom:
{
int lowfd = action->action.closefrom_action.from;
- int r = INLINE_SYSCALL_CALL (close_range, lowfd, ~0U, 0);
- if (r != 0 && !__closefrom_fallback (lowfd, false))
+ /* Skip the pipe descriptor if it is used. No need to handle
+ it since it is created with O_CLOEXEC. */
+ if (use_pipe && args->pipe[1] == lowfd)
+ {
+ if (!spawni_fa_closerange (lowfd + 1u, ~0U))
+ goto fail;
+ }
+ else if (use_pipe && args->pipe[1] > lowfd)
+ {
+ if (!spawni_fa_closerange (lowfd, args->pipe[1] - 1)
+ || !spawni_fa_closerange (args->pipe[1] + 1u, ~0U))
+ goto fail;
+ }
+ else if (!spawni_fa_closerange (lowfd, ~0U))
goto fail;
} break;
@@ -300,10 +372,112 @@ fail:
(EINTERNALBUG) describing that, use ECHILD. Another option would
be to set args->err to some negative sentinel and have the parent
abort(), but that seems needlessly harsh. */
- args->err = errno ? : ECHILD;
+ int ret = errno ? : ECHILD;
+ if (use_pipe)
+ {
+ while (__write_nocancel (args->pipe[1], &ret, sizeof (ret)) < 0)
+ if (errno == EPIPE || errno == EBADF)
+ break;
+ }
+ else
+ args->err = ret;
+
_exit (SPAWN_ERROR);
}
+static pid_t
+clone_call (struct posix_spawn_args *args, int flags, void *stack,
+ size_t stack_size)
+{
+ struct clone_args clone_args =
+ {
+ .flags = flags,
+ .exit_signal = SIGCHLD,
+ .stack = (uintptr_t) stack,
+ .stack_size = stack_size,
+ };
+ return __clone_internal (&clone_args, spawni_child, args);
+}
+
+/* Spawn a new process using clone with CLONE_VM | CLONE_VFORK (to optimize
+ memory and overcommit) and return TRUE if the helper was created or if the
+ failure was not due resource exhaustion. */
+static bool
+spawni_clone (struct posix_spawn_args *args, pid_t *new_pid, int *ec,
+ void *stack, size_t stack_size)
+{
+ /* The clone flags used will create a new child that will run in the same
+ memory space (CLONE_VM) and the execution of calling thread will be
+ suspend until the child calls execve or _exit.
+
+ Also since the calling thread execution will be suspend, there is not
+ need for CLONE_SETTLS. Although parent and child share the same TLS
+ namespace, there will be no concurrent access for TLS variables (errno
+ for instance). */
+ *new_pid = clone_call (args, CLONE_VM | CLONE_VFORK, stack, stack_size);
+
+ /* It needs to collect the case where the auxiliary process was created
+ but failed to execute the file (due either any preparation step or
+ for execve itself). */
+ if (*new_pid > 0)
+ {
+ /* Also, it handles the unlikely case where the auxiliary process was
+ terminated before calling execve as if it was successfully. The
+ args.err is set to 0 as default and changed to a positive value
+ only in case of failure, so in case of premature termination
+ due a signal args.err will remain zeroed and it will be up to
+ caller to actually collect it. */
+ *ec = args->err;
+ if (*ec > 0)
+ /* There still an unlikely case where the child is cancelled after
+ setting args.err, due to a positive error value. Also there is
+ possible pid reuse race (where the kernel allocated the same pid
+ to an unrelated process). Unfortunately due synchronization
+ issues where the kernel might not have the process collected
+ the waitpid below can not use WNOHANG. */
+ __waitpid (*new_pid, NULL, 0);
+ }
+ else
+ *ec = errno;
+
+ /* There is no much point in retrying with fork and exec if kernel returns a
+ failure due resource exhaustion. */
+ return *new_pid > 0 || (errno == ENOMEM || errno == EAGAIN);
+}
+
+/* Fallback spawn case which does not use CLONE_VM. Any preparation step or
+ execve failure is passed with a pipe, which requires additional care by
+ the helper stating process since it additional file descriptors handle. */
+static void
+spawni_fork (struct posix_spawn_args *args, pid_t *new_pid, int *ec,
+ char *stack, size_t stack_size)
+{
+ if (__pipe2 (args->pipe, O_CLOEXEC) != 0)
+ {
+ *ec = errno;
+ return;
+ }
+
+ /* Do not trigger atfork handler nor any internal state reset since the
+ helper process will call execve. */
+ *new_pid = clone_call (args, CLONE_VFORK, stack, stack_size);
+
+ __close (args->pipe[1]);
+
+ if (*new_pid > 0)
+ {
+ if (__read (args->pipe[0], ec, sizeof *ec) != sizeof *ec)
+ /* A successful execve will close the helper process pipe end. */
+ *ec = 0;
+ else
+ __waitpid (*new_pid, NULL, 0);
+ }
+ else
+ *ec = errno;
+
+ __close (args->pipe[0]);
+}
+
/* Spawn a new process executing PATH with the attributes describes in *ATTRP.
Before running the process perform the actions described in FILE-ACTIONS. */
static int
@@ -367,49 +541,16 @@ __spawnix (pid_t * pid, const char *file,
args.argc = argc;
args.envp = envp;
args.xflags = xflags;
+ args.pipe[0] = args.pipe[1] = -1;
__libc_signal_block_all (&args.oldmask);
- /* The clone flags used will create a new child that will run in the same
- memory space (CLONE_VM) and the execution of calling thread will be
- suspend until the child calls execve or _exit.
-
- Also since the calling thread execution will be suspend, there is not
- need for CLONE_SETTLS. Although parent and child share the same TLS
- namespace, there will be no concurrent access for TLS variables (errno
- for instance). */
- struct clone_args clone_args =
- {
- .flags = CLONE_VM | CLONE_VFORK,
- .exit_signal = SIGCHLD,
- .stack = (uintptr_t) stack,
- .stack_size = stack_size,
- };
- new_pid = __clone_internal (&clone_args, __spawni_child, &args);
-
- /* It needs to collect the case where the auxiliary process was created
- but failed to execute the file (due either any preparation step or
- for execve itself). */
- if (new_pid > 0)
- {
- /* Also, it handles the unlikely case where the auxiliary process was
- terminated before calling execve as if it was successfully. The
- args.err is set to 0 as default and changed to a positive value
- only in case of failure, so in case of premature termination
- due a signal args.err will remain zeroed and it will be up to
- caller to actually collect it. */
- ec = args.err;
- if (ec > 0)
- /* There still an unlikely case where the child is cancelled after
- setting args.err, due to a positive error value. Also there is
- possible pid reuse race (where the kernel allocated the same pid
- to an unrelated process). Unfortunately due synchronization
- issues where the kernel might not have the process collected
- the waitpid below can not use WNOHANG. */
- __waitpid (new_pid, NULL, 0);
- }
- else
- ec = errno;
+ /* clone with CLONE_VM | CLONE_VFORK may fail for some namespace restriction
+ (for instance Linux does not allow processes in different time namespaces
+ to share address space) and in this case clone fails with EINVAL. Retry
+ with fork and exec. */
+ if (!spawni_clone (&args, &new_pid, &ec, stack, stack_size))
+ spawni_fork (&args, &new_pid, &ec, stack, stack_size);
__munmap (stack, stack_size);