[v2,3/3] linux: Revert the use of sched_getaffinity on get_nproc (BZ #28310)

Message ID 20210907122259.79800-4-adhemerval.zanella@linaro.org
State Committed
Commit 342298278eabc75baabcaced110a11a02c3d3580
Headers
Series Revert the use of sched_getaffinity on get_nproc |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Adhemerval Zanella Sept. 7, 2021, 12:22 p.m. UTC
  The use of sched_getaffinity on get_nproc and
sysconf (_SC_NPROCESSORS_ONLN) done in 903bc7dcc2acafc40 (BZ #27645)
breaks the top command in common hypervisor configurations and also
other monitoring tools.

The main issue using sched_getaffinity changed the symbols semantic
from system-wide scope of online CPUs to per-process one (which can
be changed with kernel cpusets or book parameters in VM).

This patch reverts mostly of the 903bc7dcc2acafc40, with the
exceptions:

  * No more cached values and atomic updates, since they are inherent
    racy.

  * No /proc/cpuinfo fallback, since /proc/stat is already used and
    it would require to revert more arch-specific code.

  * The alloca is replace with a static buffer of 1024 bytes.

So the implementation first consult the sysfs, and fallbacks to procfs.

Checked on x86_64-linux-gnu.
---
 sysdeps/unix/sysv/linux/getsysstats.c | 139 +++++++++++++++++++++++++-
 1 file changed, 134 insertions(+), 5 deletions(-)
  

Comments

Florian Weimer Sept. 13, 2021, 11:05 a.m. UTC | #1
* Adhemerval Zanella:

> The use of sched_getaffinity on get_nproc and
> sysconf (_SC_NPROCESSORS_ONLN) done in 903bc7dcc2acafc40 (BZ #27645)
> breaks the top command in common hypervisor configurations and also
> other monitoring tools.
>
> The main issue using sched_getaffinity changed the symbols semantic
> from system-wide scope of online CPUs to per-process one (which can
> be changed with kernel cpusets or book parameters in VM).
>
> This patch reverts mostly of the 903bc7dcc2acafc40, with the
> exceptions:
>
>   * No more cached values and atomic updates, since they are inherent
>     racy.
>
>   * No /proc/cpuinfo fallback, since /proc/stat is already used and
>     it would require to revert more arch-specific code.
>
>   * The alloca is replace with a static buffer of 1024 bytes.
>
> So the implementation first consult the sysfs, and fallbacks to procfs.

Looks okay.

Reviewed-by: Florian Weimer <fweimer@redhat.com>

Thanks,
Florian
  

Patch

diff --git a/sysdeps/unix/sysv/linux/getsysstats.c b/sysdeps/unix/sysv/linux/getsysstats.c
index 1e3d88676d..15ad91cf2f 100644
--- a/sysdeps/unix/sysv/linux/getsysstats.c
+++ b/sysdeps/unix/sysv/linux/getsysstats.c
@@ -17,6 +17,8 @@ 
    <https://www.gnu.org/licenses/>.  */
 
 #include <array_length.h>
+#include <assert.h>
+#include <ctype.h>
 #include <dirent.h>
 #include <errno.h>
 #include <ldsodefs.h>
@@ -29,7 +31,7 @@ 
 #include <sysdep.h>
 
 int
-__get_nprocs (void)
+__get_nprocs_sched (void)
 {
   enum
     {
@@ -52,14 +54,141 @@  __get_nprocs (void)
      atomics are needed). */
   return 2;
 }
-libc_hidden_def (__get_nprocs)
-weak_alias (__get_nprocs, get_nprocs)
+
+static char *
+next_line (int fd, char *const buffer, char **cp, char **re,
+           char *const buffer_end)
+{
+  char *res = *cp;
+  char *nl = memchr (*cp, '\n', *re - *cp);
+  if (nl == NULL)
+    {
+      if (*cp != buffer)
+        {
+          if (*re == buffer_end)
+            {
+              memmove (buffer, *cp, *re - *cp);
+              *re = buffer + (*re - *cp);
+              *cp = buffer;
+
+              ssize_t n = __read_nocancel (fd, *re, buffer_end - *re);
+              if (n < 0)
+                return NULL;
+
+              *re += n;
+
+              nl = memchr (*cp, '\n', *re - *cp);
+              while (nl == NULL && *re == buffer_end)
+                {
+                  /* Truncate too long lines.  */
+                  *re = buffer + 3 * (buffer_end - buffer) / 4;
+                  n = __read_nocancel (fd, *re, buffer_end - *re);
+                  if (n < 0)
+                    return NULL;
+
+                  nl = memchr (*re, '\n', n);
+                  **re = '\n';
+                  *re += n;
+                }
+            }
+          else
+            nl = memchr (*cp, '\n', *re - *cp);
+
+          res = *cp;
+        }
+
+      if (nl == NULL)
+        nl = *re - 1;
+    }
+
+  *cp = nl + 1;
+  assert (*cp <= *re);
+
+  return res == *re ? NULL : res;
+}
+
 
 int
-__get_nprocs_sched (void)
+__get_nprocs (void)
 {
-  return __get_nprocs ();
+  enum { buffer_size = 1024 };
+  char buffer[buffer_size];
+  char *buffer_end = buffer + buffer_size;
+  char *cp = buffer_end;
+  char *re = buffer_end;
+
+  const int flags = O_RDONLY | O_CLOEXEC;
+  /* This file contains comma-separated ranges.  */
+  int fd = __open_nocancel ("/sys/devices/system/cpu/online", flags);
+  char *l;
+  int result = 0;
+  if (fd != -1)
+    {
+      l = next_line (fd, buffer, &cp, &re, buffer_end);
+      if (l != NULL)
+	do
+	  {
+	    char *endp;
+	    unsigned long int n = strtoul (l, &endp, 10);
+	    if (l == endp)
+	      {
+		result = 0;
+		break;
+	      }
+
+	    unsigned long int m = n;
+	    if (*endp == '-')
+	      {
+		l = endp + 1;
+		m = strtoul (l, &endp, 10);
+		if (l == endp)
+		  {
+		    result = 0;
+		    break;
+		  }
+	      }
+
+	    result += m - n + 1;
+
+	    l = endp;
+	    if (l < re && *l == ',')
+	      ++l;
+	  }
+	while (l < re && *l != '\n');
+
+      __close_nocancel_nostatus (fd);
+
+      if (result > 0)
+	return result;
+    }
+
+  cp = buffer_end;
+  re = buffer_end;
+
+  /* Default to an SMP system in case we cannot obtain an accurate
+     number.  */
+  result = 2;
+
+  fd = __open_nocancel ("/proc/stat", flags);
+  if (fd != -1)
+    {
+      result = 0;
+
+      while ((l = next_line (fd, buffer, &cp, &re, buffer_end)) != NULL)
+	/* The current format of /proc/stat has all the cpu* entries
+	   at the front.  We assume here that stays this way.  */
+	if (strncmp (l, "cpu", 3) != 0)
+	  break;
+	else if (isdigit (l[3]))
+	  ++result;
+
+      __close_nocancel_nostatus (fd);
+    }
+
+  return result;
 }
+libc_hidden_def (__get_nprocs)
+weak_alias (__get_nprocs, get_nprocs)
 
 
 /* On some architectures it is possible to distinguish between configured