[committed] libphobos: Default to libc closefrom in spawnProcessPosix [PR119112]

Message ID 20250310221341.67801-1-ibuclaw@gdcproject.org
State New
Headers
Series [committed] libphobos: Default to libc closefrom in spawnProcessPosix [PR119112] |

Commit Message

Iain Buclaw March 10, 2025, 10:13 p.m. UTC
  Hi,

This is a backport of two changes in upstream Phobos.

- The current implementation of spawnProcessPosix is broken on systems
with a large `ulimit -n' because it always OOMs making it impossible to
spawn processes. Using the libc implementation, when available, for
doing file descriptor operations en-mass solves this problem.

- The fallback code now reads the list of file descriptors in use from
/dev/fd or /proc/this/fd, avoiding the need to scroll through the entire
list of possible file descriptors. This fixes the fork process being
very slow and memory intensive when RLIMIT_NOFILE is too high.

Bootstrapped and regression tested on x86_64-linux-gnu/-m32, committed
to the releases/gcc-14 branch.

Regards,
Iain.

---
	PR d/119112

libphobos/ChangeLog:

	* libdruntime/core/sys/freebsd/unistd.d (closefrom): Add binding.
	* libdruntime/core/sys/linux/unistd.d (closefrom): Likewise.
	* libdruntime/core/sys/openbsd/unistd.d (closefrom): Likewise.
	* src/std/process.d (enum InternalError): Add closefds_dup2.
	(spawnProcessPosix): Use closefrom when available.

Reviewed-on: https://github.com/dlang/phobos/pull/9048
Reviewed-on: https://github.com/dlang/phobos/pull/9077
---
 .../libdruntime/core/sys/freebsd/unistd.d     |   2 +
 libphobos/libdruntime/core/sys/linux/unistd.d |   4 +
 .../libdruntime/core/sys/openbsd/unistd.d     |   2 +
 libphobos/src/std/process.d                   | 194 ++++++++++++++----
 4 files changed, 158 insertions(+), 44 deletions(-)
  

Patch

diff --git a/libphobos/libdruntime/core/sys/freebsd/unistd.d b/libphobos/libdruntime/core/sys/freebsd/unistd.d
index 493cda1c8c9..ebb7afa2726 100644
--- a/libphobos/libdruntime/core/sys/freebsd/unistd.d
+++ b/libphobos/libdruntime/core/sys/freebsd/unistd.d
@@ -17,3 +17,5 @@  extern(C):
 nothrow:
 
 int getosreldate() pure @trusted;
+
+void closefrom(int);
diff --git a/libphobos/libdruntime/core/sys/linux/unistd.d b/libphobos/libdruntime/core/sys/linux/unistd.d
index faa226cf407..548870268db 100644
--- a/libphobos/libdruntime/core/sys/linux/unistd.d
+++ b/libphobos/libdruntime/core/sys/linux/unistd.d
@@ -5,6 +5,7 @@  public import core.sys.posix.unistd;
 version (linux):
 extern(C):
 nothrow:
+@nogc:
 
 // Additional seek constants for sparse file handling
 // from Linux's unistd.h, stdio.h, and linux/fs.h
@@ -21,3 +22,6 @@  char* getpass(const(char)* prompt);
 
 // Exit all threads in a process
 void exit_group(int status);
+
+/// Close all open file descriptors greater or equal to `lowfd`
+void closefrom(int lowfd);
diff --git a/libphobos/libdruntime/core/sys/openbsd/unistd.d b/libphobos/libdruntime/core/sys/openbsd/unistd.d
index 4232c036049..9618543d8da 100644
--- a/libphobos/libdruntime/core/sys/openbsd/unistd.d
+++ b/libphobos/libdruntime/core/sys/openbsd/unistd.d
@@ -19,3 +19,5 @@  int getthrname(pid_t, char*, size_t);
 int pledge(const scope char*, const scope char*);
 int setthrname(pid_t, const scope char*);
 int unveil(const scope char*, const scope char*);
+
+int closefrom(int);
diff --git a/libphobos/src/std/process.d b/libphobos/src/std/process.d
index 494910f3535..3b892217917 100644
--- a/libphobos/src/std/process.d
+++ b/libphobos/src/std/process.d
@@ -872,6 +872,7 @@  version (Posix) private enum InternalError : ubyte
     doubleFork,
     malloc,
     preExec,
+    closefds_dup2,
 }
 
 /*
@@ -1000,7 +1001,7 @@  private Pid spawnProcessPosix(scope const(char[])[] args,
         if (config.flags & Config.Flags.detached)
             close(pidPipe[0]);
         close(forkPipe[0]);
-        immutable forkPipeOut = forkPipe[1];
+        auto forkPipeOut = forkPipe[1];
         immutable pidPipeOut = pidPipe[1];
 
         // Set the working directory.
@@ -1034,56 +1035,157 @@  private Pid spawnProcessPosix(scope const(char[])[] args,
 
             if (!(config.flags & Config.Flags.inheritFDs))
             {
-                // NOTE: malloc() and getrlimit() are not on the POSIX async
-                // signal safe functions list, but practically this should
-                // not be a problem. Java VM and CPython also use malloc()
-                // in its own implementation via opendir().
-                import core.stdc.stdlib : malloc;
-                import core.sys.posix.poll : pollfd, poll, POLLNVAL;
-                import core.sys.posix.sys.resource : rlimit, getrlimit, RLIMIT_NOFILE;
-
-                // Get the maximum number of file descriptors that could be open.
-                rlimit r;
-                if (getrlimit(RLIMIT_NOFILE, &r) != 0)
-                {
-                    abortOnError(forkPipeOut, InternalError.getrlimit, .errno);
-                }
-                immutable maxDescriptors = cast(int) r.rlim_cur;
+                version (FreeBSD)
+                    import core.sys.freebsd.unistd : closefrom;
+                else version (OpenBSD)
+                    import core.sys.openbsd.unistd : closefrom;
 
-                // The above, less stdin, stdout, and stderr
-                immutable maxToClose = maxDescriptors - 3;
-
-                // Call poll() to see which ones are actually open:
-                auto pfds = cast(pollfd*) malloc(pollfd.sizeof * maxToClose);
-                if (pfds is null)
-                {
-                    abortOnError(forkPipeOut, InternalError.malloc, .errno);
-                }
-                foreach (i; 0 .. maxToClose)
-                {
-                    pfds[i].fd = i + 3;
-                    pfds[i].events = 0;
-                    pfds[i].revents = 0;
-                }
-                if (poll(pfds, maxToClose, 0) >= 0)
+                static if (!__traits(compiles, closefrom))
                 {
-                    foreach (i; 0 .. maxToClose)
+                    void fallback (int lowfd)
                     {
-                        // don't close pipe write end
-                        if (pfds[i].fd == forkPipeOut) continue;
-                        // POLLNVAL will be set if the file descriptor is invalid.
-                        if (!(pfds[i].revents & POLLNVAL)) close(pfds[i].fd);
+                        import core.sys.posix.dirent : dirent, opendir, readdir, closedir, DIR;
+                        import core.sys.posix.unistd : close;
+                        import core.sys.posix.stdlib : atoi, malloc, free;
+                        import core.sys.posix.sys.resource : rlimit, getrlimit, RLIMIT_NOFILE;
+
+                        // Get the maximum number of file descriptors that could be open.
+                        rlimit r;
+                        if (getrlimit(RLIMIT_NOFILE, &r) != 0)
+                            abortOnError(forkPipeOut, InternalError.getrlimit, .errno);
+
+                        immutable maxDescriptors = cast(int) r.rlim_cur;
+
+                        // Missing druntime declaration
+                        pragma(mangle, "dirfd")
+                        extern(C) nothrow @nogc int dirfd(DIR* dir);
+
+                        DIR* dir = null;
+
+                        // We read from /dev/fd or /proc/self/fd only if the limit is high enough
+                        if (maxDescriptors > 128*1024)
+                        {
+                            // Try to open the directory /dev/fd or /proc/self/fd
+                            dir = opendir("/dev/fd");
+                            if (dir is null) dir = opendir("/proc/self/fd");
+                        }
+
+                        // If we have a directory, close all file descriptors except stdin, stdout, and stderr
+                        if (dir)
+                        {
+                            scope(exit) closedir(dir);
+
+                            int opendirfd = dirfd(dir);
+
+                            // Iterate over all file descriptors
+                            while (true)
+                            {
+                                dirent* entry = readdir(dir);
+
+                                if (entry is null) break;
+                                if (entry.d_name[0] == '.') continue;
+
+                                int fd = atoi(cast(char*) entry.d_name);
+
+                                // Don't close stdin, stdout, stderr, or the directory file descriptor
+                                if (fd < lowfd || fd == opendirfd) continue;
+
+                                close(fd);
+                            }
+                        }
+                        else
+                        {
+                            // This is going to allocate 8 bytes for each possible file descriptor from lowfd to r.rlim_cur
+                            if (maxDescriptors <= 128*1024)
+                            {
+                                // NOTE: malloc() and getrlimit() are not on the POSIX async
+                                // signal safe functions list, but practically this should
+                                // not be a problem. Java VM and CPython also use malloc()
+                                // in its own implementation via opendir().
+                                import core.stdc.stdlib : malloc;
+                                import core.sys.posix.poll : pollfd, poll, POLLNVAL;
+
+                                immutable maxToClose = maxDescriptors - lowfd;
+
+                                // Call poll() to see which ones are actually open:
+                                auto pfds = cast(pollfd*) malloc(pollfd.sizeof * maxToClose);
+                                if (pfds is null)
+                                {
+                                    abortOnError(forkPipeOut, InternalError.malloc, .errno);
+                                }
+
+                                foreach (i; 0 .. maxToClose)
+                                {
+                                    pfds[i].fd = i + lowfd;
+                                    pfds[i].events = 0;
+                                    pfds[i].revents = 0;
+                                }
+
+                                if (poll(pfds, maxToClose, 0) < 0)
+                                    // couldn't use poll, use the slow path.
+                                    goto LslowClose;
+
+                                foreach (i; 0 .. maxToClose)
+                                {
+                                    // POLLNVAL will be set if the file descriptor is invalid.
+                                    if (!(pfds[i].revents & POLLNVAL)) close(pfds[i].fd);
+                                }
+                            }
+                            else
+                            {
+                            LslowClose:
+                                // Fall back to closing everything.
+                                foreach (i; lowfd .. maxDescriptors)
+                                {
+                                    close(i);
+                                }
+                            }
+                        }
                     }
-                }
-                else
-                {
-                    // Fall back to closing everything.
-                    foreach (i; 3 .. maxDescriptors)
+
+                    // closefrom may not be available on the version of glibc we build against.
+                    // Until we find a way to perform this check we will try to use dlsym to
+                    // check for the function. See: https://github.com/dlang/phobos/pull/9048
+                    version (CRuntime_Glibc)
                     {
-                        if (i == forkPipeOut) continue;
-                        close(i);
+                        void closefrom (int lowfd) {
+                            static bool tryGlibcClosefrom (int lowfd) {
+                                import core.sys.posix.dlfcn : dlopen, dlclose, dlsym, dlerror, RTLD_LAZY;
+
+                                void *handle = dlopen("libc.so.6", RTLD_LAZY);
+                                if (!handle)
+                                    return false;
+                                scope(exit) dlclose(handle);
+
+                                // Clear errors
+                                dlerror();
+                                alias closefromT = extern(C) void function(int) @nogc @system nothrow;
+                                auto closefrom = cast(closefromT) dlsym(handle, "closefrom");
+                                if (dlerror())
+                                    return false;
+
+                                closefrom(lowfd);
+                                return true;
+                            }
+
+                            if (!tryGlibcClosefrom(lowfd))
+                                fallback(lowfd);
+                        }
                     }
+                    else
+                        alias closefrom = fallback;
                 }
+
+                // We need to close all open file descriptors excluding std{in,out,err}
+                // and forkPipeOut because we still need it.
+                // Since the various libc's provide us with `closefrom` move forkPipeOut
+                // to position 3, right after STDERR_FILENO, and close all FDs following that.
+                if (dup2(forkPipeOut, 3) == -1)
+                    abortOnError(forkPipeOut, InternalError.closefds_dup2, .errno);
+                forkPipeOut = 3;
+                // forkPipeOut needs to be closed after we call `exec`.
+                setCLOEXEC(forkPipeOut, true);
+                closefrom(forkPipeOut + 1);
             }
             else // This is already done if we don't inherit descriptors.
             {
@@ -1189,6 +1291,10 @@  private Pid spawnProcessPosix(scope const(char[])[] args,
                 case InternalError.preExec:
                     errorMsg = "Failed to execute preExecFunction";
                     break;
+                case InternalError.closefds_dup2:
+                    assert(!(config.flags & Config.Flags.inheritFDs));
+                    errorMsg = "Failed to close inherited file descriptors";
+                    break;
                 case InternalError.noerror:
                     assert(false);
             }