[3/4] linux: Use /sys/devices/system/cpu/possible on __get_nprocs_conf

Message ID 20210329182520.323665-3-adhemerval.zanella@linaro.org
State Dropped
Headers
Series [1/4] Remove architecture specific sched_cpucount optimizations |

Commit Message

Adhemerval Zanella March 29, 2021, 6:25 p.m. UTC
  Instead of iterate over all the cpu in the sysfs folder.  It consumes
slight less resources on large system (which might require extra
getdents call) and memory (a limited stack buffer instead a large
malloced one for opendir).

Checked on x86_64-linux-gnu, aarch64-linux-gnu, and
powerpc64le-linux-gnu.
---
 sysdeps/unix/sysv/linux/getsysstats.c | 51 +++++++++++++++------------
 1 file changed, 29 insertions(+), 22 deletions(-)
  

Comments

Adhemerval Zanella May 5, 2021, 4:53 p.m. UTC | #1
Ping.

On 29/03/2021 15:25, Adhemerval Zanella wrote:
> Instead of iterate over all the cpu in the sysfs folder.  It consumes
> slight less resources on large system (which might require extra
> getdents call) and memory (a limited stack buffer instead a large
> malloced one for opendir).
> 
> Checked on x86_64-linux-gnu, aarch64-linux-gnu, and
> powerpc64le-linux-gnu.
> ---
>  sysdeps/unix/sysv/linux/getsysstats.c | 51 +++++++++++++++------------
>  1 file changed, 29 insertions(+), 22 deletions(-)
> 
> diff --git a/sysdeps/unix/sysv/linux/getsysstats.c b/sysdeps/unix/sysv/linux/getsysstats.c
> index f8a4a31d1b..5069951246 100644
> --- a/sysdeps/unix/sysv/linux/getsysstats.c
> +++ b/sysdeps/unix/sysv/linux/getsysstats.c
> @@ -17,7 +17,8 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>  
> -#include <dirent.h>
> +#include <ctype.h>
> +#include <intprops.h>
>  #include <not-cancel.h>
>  #include <scratch_buffer.h>
>  #include <stdio.h>
> @@ -63,33 +64,39 @@ weak_alias (__get_nprocs, get_nprocs)
>  int
>  __get_nprocs_conf (void)
>  {
> -  /* XXX Here will come a test for the new system call.  */
> +  int result = 1;
>  
>    /* Try to use the sysfs filesystem.  It has actual information about
>       online processors.  */
> -  DIR *dir = __opendir ("/sys/devices/system/cpu");
> -  if (dir != NULL)
> +  int fd = __open64_nocancel ("/sys/devices/system/cpu/possible", O_RDONLY);
> +  if (fd != -1)
>      {
> -      int count = 0;
> -      struct dirent64 *d;
> -
> -      while ((d = __readdir64 (dir)) != NULL)
> -	/* NB: the sysfs has d_type support.  */
> -	if (d->d_type == DT_DIR && strncmp (d->d_name, "cpu", 3) == 0)
> -	  {
> -	    char *endp;
> -	    unsigned long int nr = strtoul (d->d_name + 3, &endp, 10);
> -	    if (nr != ULONG_MAX && endp != d->d_name + 3 && *endp == '\0')
> -	      ++count;
> -	  }
> -
> -      __closedir (dir);
> -
> -      return count;
> +      /* The entry is in the form of '[cpuX]-[cpuY]'.  */
> +      char buf[2 * INT_STRLEN_BOUND (unsigned int) + 1];
> +
> +      ssize_t n = __read_nocancel (fd, buf, sizeof (buf));
> +      if (n > 0)
> +	{
> +	  buf[n] = '\0';
> +
> +	  /* Start on the right, to find highest node number.  */
> +	  int m = 1;
> +	  while (--n)
> +	    {
> +	      if ((buf[n] == ',') || (buf[n] == '-'))
> +		break;
> +	      /* Ignore '\n'  */
> +	      if (! isdigit (buf[n]))
> +		continue;
> +	      result += (buf[n] - '0') * m;
> +	      m *= 10;
> +	    }
> +	}
> +
> +      __close_nocancel (fd);
> +      return result + 1;
>      }
>  
> -  int result = 1;
> -
>  #ifdef GET_NPROCS_CONF_PARSER
>    /* If we haven't found an appropriate entry return 1.  */
>    FILE *fp = fopen ("/proc/cpuinfo", "rce");
>
  
Florian Weimer May 5, 2021, 5:54 p.m. UTC | #2
* Adhemerval Zanella via Libc-alpha:

> +      /* The entry is in the form of '[cpuX]-[cpuY]'.  */
> +      char buf[2 * INT_STRLEN_BOUND (unsigned int) + 1];
> +
> +      ssize_t n = __read_nocancel (fd, buf, sizeof (buf));
> +      if (n > 0)
> +	{
> +	  buf[n] = '\0';
> +
> +	  /* Start on the right, to find highest node number.  */
> +	  int m = 1;
> +	  while (--n)
> +	    {
> +	      if ((buf[n] == ',') || (buf[n] == '-'))
> +		break;
> +	      /* Ignore '\n'  */
> +	      if (! isdigit (buf[n]))
> +		continue;
> +	      result += (buf[n] - '0') * m;
> +	      m *= 10;
> +	    }
> +	}
> +
> +      __close_nocancel (fd);
> +      return result + 1;
>      }

I think the /online and /possible files have the same layout, so you
could use both.

Is there a way to tell whether the two data sources (/possible and the
count of the cpu%d directory entries) actually agree?  I tried to make
sense of the kernel sources but didn't succeed.

Thanks,
Florian
  
Florian Weimer May 5, 2021, 6:06 p.m. UTC | #3
* Florian Weimer:

> * Adhemerval Zanella via Libc-alpha:
>
>> +      /* The entry is in the form of '[cpuX]-[cpuY]'.  */
>> +      char buf[2 * INT_STRLEN_BOUND (unsigned int) + 1];
>> +
>> +      ssize_t n = __read_nocancel (fd, buf, sizeof (buf));
>> +      if (n > 0)
>> +	{
>> +	  buf[n] = '\0';
>> +
>> +	  /* Start on the right, to find highest node number.  */
>> +	  int m = 1;
>> +	  while (--n)
>> +	    {
>> +	      if ((buf[n] == ',') || (buf[n] == '-'))
>> +		break;
>> +	      /* Ignore '\n'  */
>> +	      if (! isdigit (buf[n]))
>> +		continue;
>> +	      result += (buf[n] - '0') * m;
>> +	      m *= 10;
>> +	    }
>> +	}
>> +
>> +      __close_nocancel (fd);
>> +      return result + 1;
>>      }
>
> I think the /online and /possible files have the same layout, so you
> could use both.

Sorry, I meant to write: “so you could use *one parser for* both”

Florian
  
Adhemerval Zanella May 6, 2021, 1:03 p.m. UTC | #4
On 05/05/2021 15:06, Florian Weimer wrote:
> * Florian Weimer:
> 
>> * Adhemerval Zanella via Libc-alpha:
>>
>>> +      /* The entry is in the form of '[cpuX]-[cpuY]'.  */
>>> +      char buf[2 * INT_STRLEN_BOUND (unsigned int) + 1];
>>> +
>>> +      ssize_t n = __read_nocancel (fd, buf, sizeof (buf));
>>> +      if (n > 0)
>>> +	{
>>> +	  buf[n] = '\0';
>>> +
>>> +	  /* Start on the right, to find highest node number.  */
>>> +	  int m = 1;
>>> +	  while (--n)
>>> +	    {
>>> +	      if ((buf[n] == ',') || (buf[n] == '-'))
>>> +		break;
>>> +	      /* Ignore '\n'  */
>>> +	      if (! isdigit (buf[n]))
>>> +		continue;
>>> +	      result += (buf[n] - '0') * m;
>>> +	      m *= 10;
>>> +	    }
>>> +	}
>>> +
>>> +      __close_nocancel (fd);
>>> +      return result + 1;
>>>      }
>>
>> I think the /online and /possible files have the same layout, so you
>> could use both.
> 
> Sorry, I meant to write: “so you could use *one parser for* both”

I am not following, the second patch in this set *removed* the 
/sys/devices/system/cpu/online parsing in favor of the sched_getaffinity.
So now it only parses /sys/devices/system/cpu/possible and fallbacks
to sched_getaffinity.
  
Adhemerval Zanella May 6, 2021, 1:17 p.m. UTC | #5
On 05/05/2021 14:54, Florian Weimer wrote:
> * Adhemerval Zanella via Libc-alpha:
> 
>> +      /* The entry is in the form of '[cpuX]-[cpuY]'.  */
>> +      char buf[2 * INT_STRLEN_BOUND (unsigned int) + 1];
>> +
>> +      ssize_t n = __read_nocancel (fd, buf, sizeof (buf));
>> +      if (n > 0)
>> +	{
>> +	  buf[n] = '\0';
>> +
>> +	  /* Start on the right, to find highest node number.  */
>> +	  int m = 1;
>> +	  while (--n)
>> +	    {
>> +	      if ((buf[n] == ',') || (buf[n] == '-'))
>> +		break;
>> +	      /* Ignore '\n'  */
>> +	      if (! isdigit (buf[n]))
>> +		continue;
>> +	      result += (buf[n] - '0') * m;
>> +	      m *= 10;
>> +	    }
>> +	}
>> +
>> +      __close_nocancel (fd);
>> +      return result + 1;
>>      }
> 
> I think the /online and /possible files have the same layout, so you
> could use both.
> 
> Is there a way to tell whether the two data sources (/possible and the
> count of the cpu%d directory entries) actually agree?  I tried to make
> sense of the kernel sources but didn't succeed.

I am not sure, but at least the /possible is properly documented as 
'testing' (which should stable as indicated by own kernel documentation [1])
ABI [2]:

What:		/sys/devices/system/cpu/kernel_max
		/sys/devices/system/cpu/offline
		/sys/devices/system/cpu/online
		/sys/devices/system/cpu/possible
		/sys/devices/system/cpu/present
Date:		December 2008
Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description:	CPU topology files that describe kernel limits related to
		hotplug. Briefly:

		kernel_max: the maximum cpu index allowed by the kernel
		configuration.

		offline: cpus that are not online because they have been
		HOTPLUGGED off or exceed the limit of cpus allowed by the
		kernel configuration (kernel_max above).

		online: cpus that are online and being scheduled.

		possible: cpus that have been allocated resources and can be
		brought online if they are present.

So I think we don't need to certify that directories entries do match
/possible values.

[1] https://github.com/torvalds/linux/tree/master/Documentation/ABI
[2] https://github.com/torvalds/linux/blob/master/Documentation/ABI/testing/sysfs-devices-system-cpu
  
Florian Weimer May 6, 2021, 1:51 p.m. UTC | #6
* Adhemerval Zanella via Libc-alpha:

> On 05/05/2021 15:06, Florian Weimer wrote:
>> * Florian Weimer:
>> 
>>> * Adhemerval Zanella via Libc-alpha:
>>>
>>>> +      /* The entry is in the form of '[cpuX]-[cpuY]'.  */
>>>> +      char buf[2 * INT_STRLEN_BOUND (unsigned int) + 1];
>>>> +
>>>> +      ssize_t n = __read_nocancel (fd, buf, sizeof (buf));
>>>> +      if (n > 0)
>>>> +	{
>>>> +	  buf[n] = '\0';
>>>> +
>>>> +	  /* Start on the right, to find highest node number.  */
>>>> +	  int m = 1;
>>>> +	  while (--n)
>>>> +	    {
>>>> +	      if ((buf[n] == ',') || (buf[n] == '-'))
>>>> +		break;
>>>> +	      /* Ignore '\n'  */
>>>> +	      if (! isdigit (buf[n]))
>>>> +		continue;
>>>> +	      result += (buf[n] - '0') * m;
>>>> +	      m *= 10;
>>>> +	    }
>>>> +	}
>>>> +
>>>> +      __close_nocancel (fd);
>>>> +      return result + 1;
>>>>      }
>>>
>>> I think the /online and /possible files have the same layout, so you
>>> could use both.
>> 
>> Sorry, I meant to write: “so you could use *one parser for* both”
>
> I am not following, the second patch in this set *removed* the 
> /sys/devices/system/cpu/online parsing in favor of the sched_getaffinity.
> So now it only parses /sys/devices/system/cpu/possible and fallbacks
> to sched_getaffinity.

Oh, right.  So you removed the old implementation and bring bug a
slightly different new one.  Got it.

Thanks,
Florian
  
Adhemerval Zanella May 6, 2021, 8:07 p.m. UTC | #7
On 06/05/2021 10:51, Florian Weimer wrote:
> * Adhemerval Zanella via Libc-alpha:
> 
>> On 05/05/2021 15:06, Florian Weimer wrote:
>>> * Florian Weimer:
>>>
>>>> * Adhemerval Zanella via Libc-alpha:
>>>>
>>>>> +      /* The entry is in the form of '[cpuX]-[cpuY]'.  */
>>>>> +      char buf[2 * INT_STRLEN_BOUND (unsigned int) + 1];
>>>>> +
>>>>> +      ssize_t n = __read_nocancel (fd, buf, sizeof (buf));
>>>>> +      if (n > 0)
>>>>> +	{
>>>>> +	  buf[n] = '\0';
>>>>> +
>>>>> +	  /* Start on the right, to find highest node number.  */
>>>>> +	  int m = 1;
>>>>> +	  while (--n)
>>>>> +	    {
>>>>> +	      if ((buf[n] == ',') || (buf[n] == '-'))
>>>>> +		break;
>>>>> +	      /* Ignore '\n'  */
>>>>> +	      if (! isdigit (buf[n]))
>>>>> +		continue;
>>>>> +	      result += (buf[n] - '0') * m;
>>>>> +	      m *= 10;
>>>>> +	    }
>>>>> +	}
>>>>> +
>>>>> +      __close_nocancel (fd);
>>>>> +      return result + 1;
>>>>>      }
>>>>
>>>> I think the /online and /possible files have the same layout, so you
>>>> could use both.
>>>
>>> Sorry, I meant to write: “so you could use *one parser for* both”
>>
>> I am not following, the second patch in this set *removed* the 
>> /sys/devices/system/cpu/online parsing in favor of the sched_getaffinity.
>> So now it only parses /sys/devices/system/cpu/possible and fallbacks
>> to sched_getaffinity.
> 
> Oh, right.  So you removed the old implementation and bring bug a
> slightly different new one.  Got it.

Right, are you ok with the patch then?
  
Florian Weimer May 7, 2021, 11:07 a.m. UTC | #8
* Adhemerval Zanella via Libc-alpha:

> On 06/05/2021 10:51, Florian Weimer wrote:
>> * Adhemerval Zanella via Libc-alpha:
>> 
>>> On 05/05/2021 15:06, Florian Weimer wrote:
>>>> * Florian Weimer:
>>>>
>>>>> * Adhemerval Zanella via Libc-alpha:
>>>>>
>>>>>> +      /* The entry is in the form of '[cpuX]-[cpuY]'.  */
>>>>>> +      char buf[2 * INT_STRLEN_BOUND (unsigned int) + 1];
>>>>>> +
>>>>>> +      ssize_t n = __read_nocancel (fd, buf, sizeof (buf));
>>>>>> +      if (n > 0)
>>>>>> +	{
>>>>>> +	  buf[n] = '\0';
>>>>>> +
>>>>>> +	  /* Start on the right, to find highest node number.  */
>>>>>> +	  int m = 1;
>>>>>> +	  while (--n)
>>>>>> +	    {
>>>>>> +	      if ((buf[n] == ',') || (buf[n] == '-'))
>>>>>> +		break;
>>>>>> +	      /* Ignore '\n'  */
>>>>>> +	      if (! isdigit (buf[n]))
>>>>>> +		continue;
>>>>>> +	      result += (buf[n] - '0') * m;
>>>>>> +	      m *= 10;
>>>>>> +	    }
>>>>>> +	}
>>>>>> +
>>>>>> +      __close_nocancel (fd);
>>>>>> +      return result + 1;
>>>>>>      }
>>>>>
>>>>> I think the /online and /possible files have the same layout, so you
>>>>> could use both.
>>>>
>>>> Sorry, I meant to write: “so you could use *one parser for* both”
>>>
>>> I am not following, the second patch in this set *removed* the 
>>> /sys/devices/system/cpu/online parsing in favor of the sched_getaffinity.
>>> So now it only parses /sys/devices/system/cpu/possible and fallbacks
>>> to sched_getaffinity.
>> 
>> Oh, right.  So you removed the old implementation and bring bug a
>> slightly different new one.  Got it.
>
> Right, are you ok with the patch then?

The new parser cannot handle gaps or ranges that do not start at 0, I
think.  The old parser could cope with that.  The kernel data structures
support gaps in the possible CPU mask.  I don't know if they occur in
practice, but firmware quirks in this area aren't exactly rare (e.g.,
single-socket systems which report hundreds of hot-pluggable CPU cores).

Thanks,
Florian
  
Adhemerval Zanella May 7, 2021, 12:43 p.m. UTC | #9
On 07/05/2021 08:07, Florian Weimer wrote:
> * Adhemerval Zanella via Libc-alpha:
> 
>> On 06/05/2021 10:51, Florian Weimer wrote:
>>> * Adhemerval Zanella via Libc-alpha:
>>>
>>>> On 05/05/2021 15:06, Florian Weimer wrote:
>>>>> * Florian Weimer:
>>>>>
>>>>>> * Adhemerval Zanella via Libc-alpha:
>>>>>>
>>>>>>> +      /* The entry is in the form of '[cpuX]-[cpuY]'.  */
>>>>>>> +      char buf[2 * INT_STRLEN_BOUND (unsigned int) + 1];
>>>>>>> +
>>>>>>> +      ssize_t n = __read_nocancel (fd, buf, sizeof (buf));
>>>>>>> +      if (n > 0)
>>>>>>> +	{
>>>>>>> +	  buf[n] = '\0';
>>>>>>> +
>>>>>>> +	  /* Start on the right, to find highest node number.  */
>>>>>>> +	  int m = 1;
>>>>>>> +	  while (--n)
>>>>>>> +	    {
>>>>>>> +	      if ((buf[n] == ',') || (buf[n] == '-'))
>>>>>>> +		break;
>>>>>>> +	      /* Ignore '\n'  */
>>>>>>> +	      if (! isdigit (buf[n]))
>>>>>>> +		continue;
>>>>>>> +	      result += (buf[n] - '0') * m;
>>>>>>> +	      m *= 10;
>>>>>>> +	    }
>>>>>>> +	}
>>>>>>> +
>>>>>>> +      __close_nocancel (fd);
>>>>>>> +      return result + 1;
>>>>>>>      }
>>>>>>
>>>>>> I think the /online and /possible files have the same layout, so you
>>>>>> could use both.
>>>>>
>>>>> Sorry, I meant to write: “so you could use *one parser for* both”
>>>>
>>>> I am not following, the second patch in this set *removed* the 
>>>> /sys/devices/system/cpu/online parsing in favor of the sched_getaffinity.
>>>> So now it only parses /sys/devices/system/cpu/possible and fallbacks
>>>> to sched_getaffinity.
>>>
>>> Oh, right.  So you removed the old implementation and bring bug a
>>> slightly different new one.  Got it.
>>
>> Right, are you ok with the patch then?
> 
> The new parser cannot handle gaps or ranges that do not start at 0, I
> think.  The old parser could cope with that.  The kernel data structures
> support gaps in the possible CPU mask.  I don't know if they occur in
> practice, but firmware quirks in this area aren't exactly rare (e.g.,
> single-socket systems which report hundreds of hot-pluggable CPU cores).

Indeed I did not take this in consideration and it seems that using
/possible is more complicated than just opendir the directory.  And 
we will need to handle the possible unlikely issue of a truncated
value from kernel, since the buffer is limited to PAGE_SIZE.

I will drop this patch.
  

Patch

diff --git a/sysdeps/unix/sysv/linux/getsysstats.c b/sysdeps/unix/sysv/linux/getsysstats.c
index f8a4a31d1b..5069951246 100644
--- a/sysdeps/unix/sysv/linux/getsysstats.c
+++ b/sysdeps/unix/sysv/linux/getsysstats.c
@@ -17,7 +17,8 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <dirent.h>
+#include <ctype.h>
+#include <intprops.h>
 #include <not-cancel.h>
 #include <scratch_buffer.h>
 #include <stdio.h>
@@ -63,33 +64,39 @@  weak_alias (__get_nprocs, get_nprocs)
 int
 __get_nprocs_conf (void)
 {
-  /* XXX Here will come a test for the new system call.  */
+  int result = 1;
 
   /* Try to use the sysfs filesystem.  It has actual information about
      online processors.  */
-  DIR *dir = __opendir ("/sys/devices/system/cpu");
-  if (dir != NULL)
+  int fd = __open64_nocancel ("/sys/devices/system/cpu/possible", O_RDONLY);
+  if (fd != -1)
     {
-      int count = 0;
-      struct dirent64 *d;
-
-      while ((d = __readdir64 (dir)) != NULL)
-	/* NB: the sysfs has d_type support.  */
-	if (d->d_type == DT_DIR && strncmp (d->d_name, "cpu", 3) == 0)
-	  {
-	    char *endp;
-	    unsigned long int nr = strtoul (d->d_name + 3, &endp, 10);
-	    if (nr != ULONG_MAX && endp != d->d_name + 3 && *endp == '\0')
-	      ++count;
-	  }
-
-      __closedir (dir);
-
-      return count;
+      /* The entry is in the form of '[cpuX]-[cpuY]'.  */
+      char buf[2 * INT_STRLEN_BOUND (unsigned int) + 1];
+
+      ssize_t n = __read_nocancel (fd, buf, sizeof (buf));
+      if (n > 0)
+	{
+	  buf[n] = '\0';
+
+	  /* Start on the right, to find highest node number.  */
+	  int m = 1;
+	  while (--n)
+	    {
+	      if ((buf[n] == ',') || (buf[n] == '-'))
+		break;
+	      /* Ignore '\n'  */
+	      if (! isdigit (buf[n]))
+		continue;
+	      result += (buf[n] - '0') * m;
+	      m *= 10;
+	    }
+	}
+
+      __close_nocancel (fd);
+      return result + 1;
     }
 
-  int result = 1;
-
 #ifdef GET_NPROCS_CONF_PARSER
   /* If we haven't found an appropriate entry return 1.  */
   FILE *fp = fopen ("/proc/cpuinfo", "rce");