diff mbox

BZ# 16418: Fix powerpc get_clockfreq raciness

Message ID 5473A6C3.5020806@linux.vnet.ibm.com
State Committed
Delegated to: Adhemerval Zanella Netto
Headers show

Commit Message

Adhemerval Zanella Netto Nov. 24, 2014, 9:44 p.m. UTC
This patch fixes powerpc __get_clockfreq racy and cancel-safe issues by
dropping internal static cache and by using nocancel file operations.
The vDSO failure check is also removed, since kernel code does not
return an error (it cleans cr0.so bit on function return) and the static
code (to read value /proc) now uses non-cancellable calls.

Since currently I don't see this code patch to be performance sensitive
(usually the clock frequency is obtained once to transform timebase
values), I don't see a problem to drop its internal cache.  Also, if
latency came up as being important for this one, correct approach would
be use IFUNC to call vDSO symbols directly (which I do not aim to
implement now).

Tested on powerpc64 and powerpc32.

--

	[BZ# 16418]
	* sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c
	(__get_clockfreq): Make code racy and cancel safe.

---

Comments

Adhemerval Zanella Netto Dec. 10, 2014, 5:18 p.m. UTC | #1
Ping.

On 24-11-2014 19:44, Adhemerval Zanella wrote:
> This patch fixes powerpc __get_clockfreq racy and cancel-safe issues by
> dropping internal static cache and by using nocancel file operations.
> The vDSO failure check is also removed, since kernel code does not
> return an error (it cleans cr0.so bit on function return) and the static
> code (to read value /proc) now uses non-cancellable calls.
>
> Since currently I don't see this code patch to be performance sensitive
> (usually the clock frequency is obtained once to transform timebase
> values), I don't see a problem to drop its internal cache.  Also, if
> latency came up as being important for this one, correct approach would
> be use IFUNC to call vDSO symbols directly (which I do not aim to
> implement now).
>
> Tested on powerpc64 and powerpc32.
>
> --
>
> 	[BZ# 16418]
> 	* sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c
> 	(__get_clockfreq): Make code racy and cancel safe.
>
> ---
>
> diff --git a/NEWS b/NEWS
> index ad170c4..833a680 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -9,10 +9,10 @@ Version 2.21
>
>  * The following bugs are resolved with this release:
>
> -  6652, 12926, 14132, 14138, 14171, 14498, 15215, 15884, 16469, 17266,
> -  17344, 17363, 17370, 17371, 17411, 17460, 17475, 17485, 17501, 17506,
> -  17508, 17522, 17555, 17570, 17571, 17572, 17573, 17574, 17582, 17583,
> -  17584, 17585, 17589, 17594, 17616, 17625.
> +  6652, 12926, 14132, 14138, 14171, 14498, 15215, 15884, 16418, 16469,
> +  17266, 17344, 17363, 17370, 17371, 17411, 17460, 17475, 17485, 17501,
> +  17506, 17508, 17522, 17555, 17570, 17571, 17572, 17573, 17574, 17582,
> +  17583, 17584, 17585, 17589, 17594, 17616, 17625.
>
>  * CVE-2104-7817 The wordexp function could ignore the WRDE_NOCMD flag
>    under certain input conditions resulting in the execution of a shell for
> diff --git a/sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c b/sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c
> index 62217b1..44f90b4 100644
> --- a/sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c
> +++ b/sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c
> @@ -24,95 +24,85 @@
>  #include <libc-internal.h>
>  #include <sysdep.h>
>  #include <bits/libc-vdso.h>
> +#include <not-cancel.h>
>
>  hp_timing_t
>  __get_clockfreq (void)
>  {
> +  hp_timing_t result = 0L;
> +
> +#ifdef SHARED
> +  /* The vDSO does not return an error (it clear cr0.so on returning).  */
> +  INTERNAL_SYSCALL_DECL (err);
> +  result =
> +    INTERNAL_VSYSCALL_NO_SYSCALL_FALLBACK (get_tbfreq, err, uint64_t, 0);
> +#else
>    /* We read the information from the /proc filesystem.  /proc/cpuinfo
>       contains at least one line like:
>       timebase        : 33333333
>       We search for this line and convert the number into an integer.  */
> -  static hp_timing_t timebase_freq;
> -  hp_timing_t result = 0L;
> +  int fd = __open_nocancel ("/proc/cpuinfo", O_RDONLY);
> +  if (__glibc_likely (fd != -1))
> +    return result;
>
> -  /* If this function was called before, we know the result.  */
> -  if (timebase_freq != 0)
> -    return timebase_freq;
> +  /* The timebase will be in the 1st 1024 bytes for systems with up
> +     to 8 processors.  If the first read returns less then 1024
> +     bytes read,  we have the whole cpuinfo and can start the scan.
> +     Otherwise we will have to read more to insure we have the
> +     timebase value in the scan.  */
> +  char buf[1024];
> +  ssize_t n;
>
> -  /* If we can use the vDSO to obtain the timebase even better.  */
> -#ifdef SHARED
> -  INTERNAL_SYSCALL_DECL (err);
> -  timebase_freq =
> -    INTERNAL_VSYSCALL_NO_SYSCALL_FALLBACK (get_tbfreq, err, uint64_t, 0);
> -  if (INTERNAL_SYSCALL_ERROR_P (timebase_freq, err)
> -      && INTERNAL_SYSCALL_ERRNO (timebase_freq, err) == ENOSYS)
> -#endif
> +  n = __read_nocancel (fd, buf, sizeof (buf));
> +  if (n == sizeof (buf))
>      {
> -      int fd = __open ("/proc/cpuinfo", O_RDONLY);
> +      /* We are here because the 1st read returned exactly sizeof
> +         (buf) bytes.  This implies that we are not at EOF and may
> +         not have read the timebase value yet.  So we need to read
> +         more bytes until we know we have EOF.  We copy the lower
> +         half of buf to the upper half and read sizeof (buf)/2
> +         bytes into the lower half of buf and repeat until we
> +         reach EOF.  We can assume that the timebase will be in
> +         the last 512 bytes of cpuinfo, so two 512 byte half_bufs
> +         will be sufficient to contain the timebase and will
> +         handle the case where the timebase spans the half_buf
> +         boundry.  */
> +      const ssize_t half_buf = sizeof (buf) / 2;
> +      while (n >= half_buf)
> +	{
> +	  memcpy (buf, buf + half_buf, half_buf);
> +	  n = __read_nocancel (fd, buf + half_buf, half_buf);
> +	}
> +      if (n >= 0)
> +	n += half_buf;
> +    }
> +  __close_nocancel (fd);
>
> -      if (__glibc_likely (fd != -1))
> +  if (__glibc_likely (n > 0))
> +    {
> +      char *mhz = memmem (buf, n, "timebase", 7);
> +
> +      if (__glibc_likely (mhz != NULL))
>  	{
> -	  /* The timebase will be in the 1st 1024 bytes for systems with up
> -	     to 8 processors.  If the first read returns less then 1024
> -	     bytes read,  we have the whole cpuinfo and can start the scan.
> -	     Otherwise we will have to read more to insure we have the
> -	     timebase value in the scan.  */
> -	  char buf[1024];
> -	  ssize_t n;
> +	  char *endp = buf + n;
>
> -	  n = __read (fd, buf, sizeof (buf));
> -	  if (n == sizeof (buf))
> -	    {
> -	      /* We are here because the 1st read returned exactly sizeof
> -	         (buf) bytes.  This implies that we are not at EOF and may
> -	         not have read the timebase value yet.  So we need to read
> -	         more bytes until we know we have EOF.  We copy the lower
> -	         half of buf to the upper half and read sizeof (buf)/2
> -	         bytes into the lower half of buf and repeat until we
> -	         reach EOF.  We can assume that the timebase will be in
> -	         the last 512 bytes of cpuinfo, so two 512 byte half_bufs
> -	         will be sufficient to contain the timebase and will
> -	         handle the case where the timebase spans the half_buf
> -	         boundry.  */
> -	      const ssize_t half_buf = sizeof (buf) / 2;
> -	      while (n >= half_buf)
> -		{
> -		  memcpy (buf, buf + half_buf, half_buf);
> -		  n = __read (fd, buf + half_buf, half_buf);
> -		}
> -	      if (n >= 0)
> -		n += half_buf;
> -	    }
> +	  /* Search for the beginning of the string.  */
> +	  while (mhz < endp && (*mhz < '0' || *mhz > '9') && *mhz != '\n')
> +	    ++mhz;
>
> -	  if (__builtin_expect (n, 1) > 0)
> +	  while (mhz < endp && *mhz != '\n')
>  	    {
> -	      char *mhz = memmem (buf, n, "timebase", 7);
> -
> -	      if (__glibc_likely (mhz != NULL))
> +	      if (*mhz >= '0' && *mhz <= '9')
>  		{
> -		  char *endp = buf + n;
> -
> -		  /* Search for the beginning of the string.  */
> -		  while (mhz < endp && (*mhz < '0' || *mhz > '9')
> -			 && *mhz != '\n')
> -		    ++mhz;
> -
> -		  while (mhz < endp && *mhz != '\n')
> -		    {
> -		      if (*mhz >= '0' && *mhz <= '9')
> -			{
> -			  result *= 10;
> -			  result += *mhz - '0';
> -			}
> -
> -		      ++mhz;
> -		    }
> +		  result *= 10;
> +		  result += *mhz - '0';
>  		}
> -	      timebase_freq = result;
> +
> +	      ++mhz;
>  	    }
> -	  __close (fd);
>  	}
>      }
> +#endif
>
> -  return timebase_freq;
> +  return result;
>  }
>
Adhemerval Zanella Netto Jan. 6, 2015, 1:46 p.m. UTC | #2
Ping.


On 10-12-2014 15:18, Adhemerval Zanella wrote:
> Ping.
>
> On 24-11-2014 19:44, Adhemerval Zanella wrote:
>> This patch fixes powerpc __get_clockfreq racy and cancel-safe issues by
>> dropping internal static cache and by using nocancel file operations.
>> The vDSO failure check is also removed, since kernel code does not
>> return an error (it cleans cr0.so bit on function return) and the static
>> code (to read value /proc) now uses non-cancellable calls.
>>
>> Since currently I don't see this code patch to be performance sensitive
>> (usually the clock frequency is obtained once to transform timebase
>> values), I don't see a problem to drop its internal cache.  Also, if
>> latency came up as being important for this one, correct approach would
>> be use IFUNC to call vDSO symbols directly (which I do not aim to
>> implement now).
>>
>> Tested on powerpc64 and powerpc32.
>>
>> --
>>
>> 	[BZ# 16418]
>> 	* sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c
>> 	(__get_clockfreq): Make code racy and cancel safe.
>>
>> ---
>>
>> diff --git a/NEWS b/NEWS
>> index ad170c4..833a680 100644
>> --- a/NEWS
>> +++ b/NEWS
>> @@ -9,10 +9,10 @@ Version 2.21
>>
>>  * The following bugs are resolved with this release:
>>
>> -  6652, 12926, 14132, 14138, 14171, 14498, 15215, 15884, 16469, 17266,
>> -  17344, 17363, 17370, 17371, 17411, 17460, 17475, 17485, 17501, 17506,
>> -  17508, 17522, 17555, 17570, 17571, 17572, 17573, 17574, 17582, 17583,
>> -  17584, 17585, 17589, 17594, 17616, 17625.
>> +  6652, 12926, 14132, 14138, 14171, 14498, 15215, 15884, 16418, 16469,
>> +  17266, 17344, 17363, 17370, 17371, 17411, 17460, 17475, 17485, 17501,
>> +  17506, 17508, 17522, 17555, 17570, 17571, 17572, 17573, 17574, 17582,
>> +  17583, 17584, 17585, 17589, 17594, 17616, 17625.
>>
>>  * CVE-2104-7817 The wordexp function could ignore the WRDE_NOCMD flag
>>    under certain input conditions resulting in the execution of a shell for
>> diff --git a/sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c b/sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c
>> index 62217b1..44f90b4 100644
>> --- a/sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c
>> +++ b/sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c
>> @@ -24,95 +24,85 @@
>>  #include <libc-internal.h>
>>  #include <sysdep.h>
>>  #include <bits/libc-vdso.h>
>> +#include <not-cancel.h>
>>
>>  hp_timing_t
>>  __get_clockfreq (void)
>>  {
>> +  hp_timing_t result = 0L;
>> +
>> +#ifdef SHARED
>> +  /* The vDSO does not return an error (it clear cr0.so on returning).  */
>> +  INTERNAL_SYSCALL_DECL (err);
>> +  result =
>> +    INTERNAL_VSYSCALL_NO_SYSCALL_FALLBACK (get_tbfreq, err, uint64_t, 0);
>> +#else
>>    /* We read the information from the /proc filesystem.  /proc/cpuinfo
>>       contains at least one line like:
>>       timebase        : 33333333
>>       We search for this line and convert the number into an integer.  */
>> -  static hp_timing_t timebase_freq;
>> -  hp_timing_t result = 0L;
>> +  int fd = __open_nocancel ("/proc/cpuinfo", O_RDONLY);
>> +  if (__glibc_likely (fd != -1))
>> +    return result;
>>
>> -  /* If this function was called before, we know the result.  */
>> -  if (timebase_freq != 0)
>> -    return timebase_freq;
>> +  /* The timebase will be in the 1st 1024 bytes for systems with up
>> +     to 8 processors.  If the first read returns less then 1024
>> +     bytes read,  we have the whole cpuinfo and can start the scan.
>> +     Otherwise we will have to read more to insure we have the
>> +     timebase value in the scan.  */
>> +  char buf[1024];
>> +  ssize_t n;
>>
>> -  /* If we can use the vDSO to obtain the timebase even better.  */
>> -#ifdef SHARED
>> -  INTERNAL_SYSCALL_DECL (err);
>> -  timebase_freq =
>> -    INTERNAL_VSYSCALL_NO_SYSCALL_FALLBACK (get_tbfreq, err, uint64_t, 0);
>> -  if (INTERNAL_SYSCALL_ERROR_P (timebase_freq, err)
>> -      && INTERNAL_SYSCALL_ERRNO (timebase_freq, err) == ENOSYS)
>> -#endif
>> +  n = __read_nocancel (fd, buf, sizeof (buf));
>> +  if (n == sizeof (buf))
>>      {
>> -      int fd = __open ("/proc/cpuinfo", O_RDONLY);
>> +      /* We are here because the 1st read returned exactly sizeof
>> +         (buf) bytes.  This implies that we are not at EOF and may
>> +         not have read the timebase value yet.  So we need to read
>> +         more bytes until we know we have EOF.  We copy the lower
>> +         half of buf to the upper half and read sizeof (buf)/2
>> +         bytes into the lower half of buf and repeat until we
>> +         reach EOF.  We can assume that the timebase will be in
>> +         the last 512 bytes of cpuinfo, so two 512 byte half_bufs
>> +         will be sufficient to contain the timebase and will
>> +         handle the case where the timebase spans the half_buf
>> +         boundry.  */
>> +      const ssize_t half_buf = sizeof (buf) / 2;
>> +      while (n >= half_buf)
>> +	{
>> +	  memcpy (buf, buf + half_buf, half_buf);
>> +	  n = __read_nocancel (fd, buf + half_buf, half_buf);
>> +	}
>> +      if (n >= 0)
>> +	n += half_buf;
>> +    }
>> +  __close_nocancel (fd);
>>
>> -      if (__glibc_likely (fd != -1))
>> +  if (__glibc_likely (n > 0))
>> +    {
>> +      char *mhz = memmem (buf, n, "timebase", 7);
>> +
>> +      if (__glibc_likely (mhz != NULL))
>>  	{
>> -	  /* The timebase will be in the 1st 1024 bytes for systems with up
>> -	     to 8 processors.  If the first read returns less then 1024
>> -	     bytes read,  we have the whole cpuinfo and can start the scan.
>> -	     Otherwise we will have to read more to insure we have the
>> -	     timebase value in the scan.  */
>> -	  char buf[1024];
>> -	  ssize_t n;
>> +	  char *endp = buf + n;
>>
>> -	  n = __read (fd, buf, sizeof (buf));
>> -	  if (n == sizeof (buf))
>> -	    {
>> -	      /* We are here because the 1st read returned exactly sizeof
>> -	         (buf) bytes.  This implies that we are not at EOF and may
>> -	         not have read the timebase value yet.  So we need to read
>> -	         more bytes until we know we have EOF.  We copy the lower
>> -	         half of buf to the upper half and read sizeof (buf)/2
>> -	         bytes into the lower half of buf and repeat until we
>> -	         reach EOF.  We can assume that the timebase will be in
>> -	         the last 512 bytes of cpuinfo, so two 512 byte half_bufs
>> -	         will be sufficient to contain the timebase and will
>> -	         handle the case where the timebase spans the half_buf
>> -	         boundry.  */
>> -	      const ssize_t half_buf = sizeof (buf) / 2;
>> -	      while (n >= half_buf)
>> -		{
>> -		  memcpy (buf, buf + half_buf, half_buf);
>> -		  n = __read (fd, buf + half_buf, half_buf);
>> -		}
>> -	      if (n >= 0)
>> -		n += half_buf;
>> -	    }
>> +	  /* Search for the beginning of the string.  */
>> +	  while (mhz < endp && (*mhz < '0' || *mhz > '9') && *mhz != '\n')
>> +	    ++mhz;
>>
>> -	  if (__builtin_expect (n, 1) > 0)
>> +	  while (mhz < endp && *mhz != '\n')
>>  	    {
>> -	      char *mhz = memmem (buf, n, "timebase", 7);
>> -
>> -	      if (__glibc_likely (mhz != NULL))
>> +	      if (*mhz >= '0' && *mhz <= '9')
>>  		{
>> -		  char *endp = buf + n;
>> -
>> -		  /* Search for the beginning of the string.  */
>> -		  while (mhz < endp && (*mhz < '0' || *mhz > '9')
>> -			 && *mhz != '\n')
>> -		    ++mhz;
>> -
>> -		  while (mhz < endp && *mhz != '\n')
>> -		    {
>> -		      if (*mhz >= '0' && *mhz <= '9')
>> -			{
>> -			  result *= 10;
>> -			  result += *mhz - '0';
>> -			}
>> -
>> -		      ++mhz;
>> -		    }
>> +		  result *= 10;
>> +		  result += *mhz - '0';
>>  		}
>> -	      timebase_freq = result;
>> +
>> +	      ++mhz;
>>  	    }
>> -	  __close (fd);
>>  	}
>>      }
>> +#endif
>>
>> -  return timebase_freq;
>> +  return result;
>>  }
>>
Adhemerval Zanella Netto Jan. 14, 2015, 11:12 a.m. UTC | #3
Ping, although it is powerpc specific, it would be good to have a second opinion.
Also, I would like to push it to 2.21.

Thanks!

On 06-01-2015 11:46, Adhemerval Zanella wrote:
> Ping.
>
>
> On 10-12-2014 15:18, Adhemerval Zanella wrote:
>> Ping.
>>
>> On 24-11-2014 19:44, Adhemerval Zanella wrote:
>>> This patch fixes powerpc __get_clockfreq racy and cancel-safe issues by
>>> dropping internal static cache and by using nocancel file operations.
>>> The vDSO failure check is also removed, since kernel code does not
>>> return an error (it cleans cr0.so bit on function return) and the static
>>> code (to read value /proc) now uses non-cancellable calls.
>>>
>>> Since currently I don't see this code patch to be performance sensitive
>>> (usually the clock frequency is obtained once to transform timebase
>>> values), I don't see a problem to drop its internal cache.  Also, if
>>> latency came up as being important for this one, correct approach would
>>> be use IFUNC to call vDSO symbols directly (which I do not aim to
>>> implement now).
>>>
>>> Tested on powerpc64 and powerpc32.
>>>
>>> --
>>>
>>> 	[BZ# 16418]
>>> 	* sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c
>>> 	(__get_clockfreq): Make code racy and cancel safe.
>>>
>>> ---
>>>
>>> diff --git a/NEWS b/NEWS
>>> index ad170c4..833a680 100644
>>> --- a/NEWS
>>> +++ b/NEWS
>>> @@ -9,10 +9,10 @@ Version 2.21
>>>
>>>  * The following bugs are resolved with this release:
>>>
>>> -  6652, 12926, 14132, 14138, 14171, 14498, 15215, 15884, 16469, 17266,
>>> -  17344, 17363, 17370, 17371, 17411, 17460, 17475, 17485, 17501, 17506,
>>> -  17508, 17522, 17555, 17570, 17571, 17572, 17573, 17574, 17582, 17583,
>>> -  17584, 17585, 17589, 17594, 17616, 17625.
>>> +  6652, 12926, 14132, 14138, 14171, 14498, 15215, 15884, 16418, 16469,
>>> +  17266, 17344, 17363, 17370, 17371, 17411, 17460, 17475, 17485, 17501,
>>> +  17506, 17508, 17522, 17555, 17570, 17571, 17572, 17573, 17574, 17582,
>>> +  17583, 17584, 17585, 17589, 17594, 17616, 17625.
>>>
>>>  * CVE-2104-7817 The wordexp function could ignore the WRDE_NOCMD flag
>>>    under certain input conditions resulting in the execution of a shell for
>>> diff --git a/sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c b/sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c
>>> index 62217b1..44f90b4 100644
>>> --- a/sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c
>>> +++ b/sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c
>>> @@ -24,95 +24,85 @@
>>>  #include <libc-internal.h>
>>>  #include <sysdep.h>
>>>  #include <bits/libc-vdso.h>
>>> +#include <not-cancel.h>
>>>
>>>  hp_timing_t
>>>  __get_clockfreq (void)
>>>  {
>>> +  hp_timing_t result = 0L;
>>> +
>>> +#ifdef SHARED
>>> +  /* The vDSO does not return an error (it clear cr0.so on returning).  */
>>> +  INTERNAL_SYSCALL_DECL (err);
>>> +  result =
>>> +    INTERNAL_VSYSCALL_NO_SYSCALL_FALLBACK (get_tbfreq, err, uint64_t, 0);
>>> +#else
>>>    /* We read the information from the /proc filesystem.  /proc/cpuinfo
>>>       contains at least one line like:
>>>       timebase        : 33333333
>>>       We search for this line and convert the number into an integer.  */
>>> -  static hp_timing_t timebase_freq;
>>> -  hp_timing_t result = 0L;
>>> +  int fd = __open_nocancel ("/proc/cpuinfo", O_RDONLY);
>>> +  if (__glibc_likely (fd != -1))
>>> +    return result;
>>>
>>> -  /* If this function was called before, we know the result.  */
>>> -  if (timebase_freq != 0)
>>> -    return timebase_freq;
>>> +  /* The timebase will be in the 1st 1024 bytes for systems with up
>>> +     to 8 processors.  If the first read returns less then 1024
>>> +     bytes read,  we have the whole cpuinfo and can start the scan.
>>> +     Otherwise we will have to read more to insure we have the
>>> +     timebase value in the scan.  */
>>> +  char buf[1024];
>>> +  ssize_t n;
>>>
>>> -  /* If we can use the vDSO to obtain the timebase even better.  */
>>> -#ifdef SHARED
>>> -  INTERNAL_SYSCALL_DECL (err);
>>> -  timebase_freq =
>>> -    INTERNAL_VSYSCALL_NO_SYSCALL_FALLBACK (get_tbfreq, err, uint64_t, 0);
>>> -  if (INTERNAL_SYSCALL_ERROR_P (timebase_freq, err)
>>> -      && INTERNAL_SYSCALL_ERRNO (timebase_freq, err) == ENOSYS)
>>> -#endif
>>> +  n = __read_nocancel (fd, buf, sizeof (buf));
>>> +  if (n == sizeof (buf))
>>>      {
>>> -      int fd = __open ("/proc/cpuinfo", O_RDONLY);
>>> +      /* We are here because the 1st read returned exactly sizeof
>>> +         (buf) bytes.  This implies that we are not at EOF and may
>>> +         not have read the timebase value yet.  So we need to read
>>> +         more bytes until we know we have EOF.  We copy the lower
>>> +         half of buf to the upper half and read sizeof (buf)/2
>>> +         bytes into the lower half of buf and repeat until we
>>> +         reach EOF.  We can assume that the timebase will be in
>>> +         the last 512 bytes of cpuinfo, so two 512 byte half_bufs
>>> +         will be sufficient to contain the timebase and will
>>> +         handle the case where the timebase spans the half_buf
>>> +         boundry.  */
>>> +      const ssize_t half_buf = sizeof (buf) / 2;
>>> +      while (n >= half_buf)
>>> +	{
>>> +	  memcpy (buf, buf + half_buf, half_buf);
>>> +	  n = __read_nocancel (fd, buf + half_buf, half_buf);
>>> +	}
>>> +      if (n >= 0)
>>> +	n += half_buf;
>>> +    }
>>> +  __close_nocancel (fd);
>>>
>>> -      if (__glibc_likely (fd != -1))
>>> +  if (__glibc_likely (n > 0))
>>> +    {
>>> +      char *mhz = memmem (buf, n, "timebase", 7);
>>> +
>>> +      if (__glibc_likely (mhz != NULL))
>>>  	{
>>> -	  /* The timebase will be in the 1st 1024 bytes for systems with up
>>> -	     to 8 processors.  If the first read returns less then 1024
>>> -	     bytes read,  we have the whole cpuinfo and can start the scan.
>>> -	     Otherwise we will have to read more to insure we have the
>>> -	     timebase value in the scan.  */
>>> -	  char buf[1024];
>>> -	  ssize_t n;
>>> +	  char *endp = buf + n;
>>>
>>> -	  n = __read (fd, buf, sizeof (buf));
>>> -	  if (n == sizeof (buf))
>>> -	    {
>>> -	      /* We are here because the 1st read returned exactly sizeof
>>> -	         (buf) bytes.  This implies that we are not at EOF and may
>>> -	         not have read the timebase value yet.  So we need to read
>>> -	         more bytes until we know we have EOF.  We copy the lower
>>> -	         half of buf to the upper half and read sizeof (buf)/2
>>> -	         bytes into the lower half of buf and repeat until we
>>> -	         reach EOF.  We can assume that the timebase will be in
>>> -	         the last 512 bytes of cpuinfo, so two 512 byte half_bufs
>>> -	         will be sufficient to contain the timebase and will
>>> -	         handle the case where the timebase spans the half_buf
>>> -	         boundry.  */
>>> -	      const ssize_t half_buf = sizeof (buf) / 2;
>>> -	      while (n >= half_buf)
>>> -		{
>>> -		  memcpy (buf, buf + half_buf, half_buf);
>>> -		  n = __read (fd, buf + half_buf, half_buf);
>>> -		}
>>> -	      if (n >= 0)
>>> -		n += half_buf;
>>> -	    }
>>> +	  /* Search for the beginning of the string.  */
>>> +	  while (mhz < endp && (*mhz < '0' || *mhz > '9') && *mhz != '\n')
>>> +	    ++mhz;
>>>
>>> -	  if (__builtin_expect (n, 1) > 0)
>>> +	  while (mhz < endp && *mhz != '\n')
>>>  	    {
>>> -	      char *mhz = memmem (buf, n, "timebase", 7);
>>> -
>>> -	      if (__glibc_likely (mhz != NULL))
>>> +	      if (*mhz >= '0' && *mhz <= '9')
>>>  		{
>>> -		  char *endp = buf + n;
>>> -
>>> -		  /* Search for the beginning of the string.  */
>>> -		  while (mhz < endp && (*mhz < '0' || *mhz > '9')
>>> -			 && *mhz != '\n')
>>> -		    ++mhz;
>>> -
>>> -		  while (mhz < endp && *mhz != '\n')
>>> -		    {
>>> -		      if (*mhz >= '0' && *mhz <= '9')
>>> -			{
>>> -			  result *= 10;
>>> -			  result += *mhz - '0';
>>> -			}
>>> -
>>> -		      ++mhz;
>>> -		    }
>>> +		  result *= 10;
>>> +		  result += *mhz - '0';
>>>  		}
>>> -	      timebase_freq = result;
>>> +
>>> +	      ++mhz;
>>>  	    }
>>> -	  __close (fd);
>>>  	}
>>>      }
>>> +#endif
>>>
>>> -  return timebase_freq;
>>> +  return result;
>>>  }
>>>
Carlos O'Donell Jan. 21, 2015, 2:12 p.m. UTC | #4
On 11/24/2014 04:44 PM, Adhemerval Zanella wrote:
> This patch fixes powerpc __get_clockfreq racy and cancel-safe issues by
> dropping internal static cache and by using nocancel file operations.
> The vDSO failure check is also removed, since kernel code does not
> return an error (it cleans cr0.so bit on function return) and the static
> code (to read value /proc) now uses non-cancellable calls.
> 
> Since currently I don't see this code patch to be performance sensitive
> (usually the clock frequency is obtained once to transform timebase
> values), I don't see a problem to drop its internal cache.  Also, if
> latency came up as being important for this one, correct approach would
> be use IFUNC to call vDSO symbols directly (which I do not aim to
> implement now).
> 
> Tested on powerpc64 and powerpc32.

This looks good to me.

You've removed the cache which removes the MT-unsafe multiple-writers
of the cache. You could make this performant by using atomic loads and
stores with acq/rel semantics to ensure happens before. However, that
would be a future enhancement if you wanted to do it that way.

You've removed the potential cancellation points and that makes you
conformant with POSIX in that you no longer create cancellation points
for clock_* functions.

e.g.
An implementation shall not introduce cancellation points into any 
other functions specified in this volume of IEEE Std 1003.1-2001.
 
> --
> 
> 	[BZ# 16418]
> 	* sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c
> 	(__get_clockfreq): Make code racy and cancel safe.
> 
> ---
> 
> diff --git a/NEWS b/NEWS
> index ad170c4..833a680 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -9,10 +9,10 @@ Version 2.21
>  
>  * The following bugs are resolved with this release:
>  
> -  6652, 12926, 14132, 14138, 14171, 14498, 15215, 15884, 16469, 17266,
> -  17344, 17363, 17370, 17371, 17411, 17460, 17475, 17485, 17501, 17506,
> -  17508, 17522, 17555, 17570, 17571, 17572, 17573, 17574, 17582, 17583,
> -  17584, 17585, 17589, 17594, 17616, 17625.
> +  6652, 12926, 14132, 14138, 14171, 14498, 15215, 15884, 16418, 16469,
> +  17266, 17344, 17363, 17370, 17371, 17411, 17460, 17475, 17485, 17501,
> +  17506, 17508, 17522, 17555, 17570, 17571, 17572, 17573, 17574, 17582,
> +  17583, 17584, 17585, 17589, 17594, 17616, 17625.
>  
>  * CVE-2104-7817 The wordexp function could ignore the WRDE_NOCMD flag
>    under certain input conditions resulting in the execution of a shell for
> diff --git a/sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c b/sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c
> index 62217b1..44f90b4 100644
> --- a/sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c
> +++ b/sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c
> @@ -24,95 +24,85 @@
>  #include <libc-internal.h>
>  #include <sysdep.h>
>  #include <bits/libc-vdso.h>
> +#include <not-cancel.h>
>  
>  hp_timing_t
>  __get_clockfreq (void)
>  {
> +  hp_timing_t result = 0L;
> +
> +#ifdef SHARED
> +  /* The vDSO does not return an error (it clear cr0.so on returning).  */
> +  INTERNAL_SYSCALL_DECL (err);
> +  result =
> +    INTERNAL_VSYSCALL_NO_SYSCALL_FALLBACK (get_tbfreq, err, uint64_t, 0);
> +#else
>    /* We read the information from the /proc filesystem.  /proc/cpuinfo
>       contains at least one line like:
>       timebase        : 33333333
>       We search for this line and convert the number into an integer.  */
> -  static hp_timing_t timebase_freq;
> -  hp_timing_t result = 0L;
> +  int fd = __open_nocancel ("/proc/cpuinfo", O_RDONLY);
> +  if (__glibc_likely (fd != -1))
> +    return result;
>  
> -  /* If this function was called before, we know the result.  */
> -  if (timebase_freq != 0)
> -    return timebase_freq;
> +  /* The timebase will be in the 1st 1024 bytes for systems with up
> +     to 8 processors.  If the first read returns less then 1024
> +     bytes read,  we have the whole cpuinfo and can start the scan.
> +     Otherwise we will have to read more to insure we have the
> +     timebase value in the scan.  */
> +  char buf[1024];
> +  ssize_t n;
>  
> -  /* If we can use the vDSO to obtain the timebase even better.  */
> -#ifdef SHARED
> -  INTERNAL_SYSCALL_DECL (err);
> -  timebase_freq =
> -    INTERNAL_VSYSCALL_NO_SYSCALL_FALLBACK (get_tbfreq, err, uint64_t, 0);
> -  if (INTERNAL_SYSCALL_ERROR_P (timebase_freq, err)
> -      && INTERNAL_SYSCALL_ERRNO (timebase_freq, err) == ENOSYS)
> -#endif
> +  n = __read_nocancel (fd, buf, sizeof (buf));
> +  if (n == sizeof (buf))
>      {
> -      int fd = __open ("/proc/cpuinfo", O_RDONLY);
> +      /* We are here because the 1st read returned exactly sizeof
> +         (buf) bytes.  This implies that we are not at EOF and may
> +         not have read the timebase value yet.  So we need to read
> +         more bytes until we know we have EOF.  We copy the lower
> +         half of buf to the upper half and read sizeof (buf)/2
> +         bytes into the lower half of buf and repeat until we
> +         reach EOF.  We can assume that the timebase will be in
> +         the last 512 bytes of cpuinfo, so two 512 byte half_bufs
> +         will be sufficient to contain the timebase and will
> +         handle the case where the timebase spans the half_buf
> +         boundry.  */
> +      const ssize_t half_buf = sizeof (buf) / 2;
> +      while (n >= half_buf)
> +	{
> +	  memcpy (buf, buf + half_buf, half_buf);
> +	  n = __read_nocancel (fd, buf + half_buf, half_buf);
> +	}
> +      if (n >= 0)
> +	n += half_buf;
> +    }
> +  __close_nocancel (fd);
>  
> -      if (__glibc_likely (fd != -1))
> +  if (__glibc_likely (n > 0))
> +    {
> +      char *mhz = memmem (buf, n, "timebase", 7);
> +
> +      if (__glibc_likely (mhz != NULL))
>  	{
> -	  /* The timebase will be in the 1st 1024 bytes for systems with up
> -	     to 8 processors.  If the first read returns less then 1024
> -	     bytes read,  we have the whole cpuinfo and can start the scan.
> -	     Otherwise we will have to read more to insure we have the
> -	     timebase value in the scan.  */
> -	  char buf[1024];
> -	  ssize_t n;
> +	  char *endp = buf + n;
>  
> -	  n = __read (fd, buf, sizeof (buf));
> -	  if (n == sizeof (buf))
> -	    {
> -	      /* We are here because the 1st read returned exactly sizeof
> -	         (buf) bytes.  This implies that we are not at EOF and may
> -	         not have read the timebase value yet.  So we need to read
> -	         more bytes until we know we have EOF.  We copy the lower
> -	         half of buf to the upper half and read sizeof (buf)/2
> -	         bytes into the lower half of buf and repeat until we
> -	         reach EOF.  We can assume that the timebase will be in
> -	         the last 512 bytes of cpuinfo, so two 512 byte half_bufs
> -	         will be sufficient to contain the timebase and will
> -	         handle the case where the timebase spans the half_buf
> -	         boundry.  */
> -	      const ssize_t half_buf = sizeof (buf) / 2;
> -	      while (n >= half_buf)
> -		{
> -		  memcpy (buf, buf + half_buf, half_buf);
> -		  n = __read (fd, buf + half_buf, half_buf);
> -		}
> -	      if (n >= 0)
> -		n += half_buf;
> -	    }
> +	  /* Search for the beginning of the string.  */
> +	  while (mhz < endp && (*mhz < '0' || *mhz > '9') && *mhz != '\n')
> +	    ++mhz;
>  
> -	  if (__builtin_expect (n, 1) > 0)
> +	  while (mhz < endp && *mhz != '\n')
>  	    {
> -	      char *mhz = memmem (buf, n, "timebase", 7);
> -
> -	      if (__glibc_likely (mhz != NULL))
> +	      if (*mhz >= '0' && *mhz <= '9')
>  		{
> -		  char *endp = buf + n;
> -
> -		  /* Search for the beginning of the string.  */
> -		  while (mhz < endp && (*mhz < '0' || *mhz > '9')
> -			 && *mhz != '\n')
> -		    ++mhz;
> -
> -		  while (mhz < endp && *mhz != '\n')
> -		    {
> -		      if (*mhz >= '0' && *mhz <= '9')
> -			{
> -			  result *= 10;
> -			  result += *mhz - '0';
> -			}
> -
> -		      ++mhz;
> -		    }
> +		  result *= 10;
> +		  result += *mhz - '0';
>  		}
> -	      timebase_freq = result;
> +
> +	      ++mhz;
>  	    }
> -	  __close (fd);
>  	}
>      }
> +#endif
>  
> -  return timebase_freq;
> +  return result;
>  }
>
Adhemerval Zanella Netto Jan. 21, 2015, 3:55 p.m. UTC | #5
On 21-01-2015 12:12, Carlos O'Donell wrote:
> On 11/24/2014 04:44 PM, Adhemerval Zanella wrote:
>> This patch fixes powerpc __get_clockfreq racy and cancel-safe issues by
>> dropping internal static cache and by using nocancel file operations.
>> The vDSO failure check is also removed, since kernel code does not
>> return an error (it cleans cr0.so bit on function return) and the static
>> code (to read value /proc) now uses non-cancellable calls.
>>
>> Since currently I don't see this code patch to be performance sensitive
>> (usually the clock frequency is obtained once to transform timebase
>> values), I don't see a problem to drop its internal cache.  Also, if
>> latency came up as being important for this one, correct approach would
>> be use IFUNC to call vDSO symbols directly (which I do not aim to
>> implement now).
>>
>> Tested on powerpc64 and powerpc32.
> This looks good to me.
>
> You've removed the cache which removes the MT-unsafe multiple-writers
> of the cache. You could make this performant by using atomic loads and
> stores with acq/rel semantics to ensure happens before. However, that
> would be a future enhancement if you wanted to do it that way.
>
> You've removed the potential cancellation points and that makes you
> conformant with POSIX in that you no longer create cancellation points
> for clock_* functions.
>
> e.g.
> An implementation shall not introduce cancellation points into any 
> other functions specified in this volume of IEEE Std 1003.1-2001.
>
Pushed upstream.  I will think if it is really worth to optimize this 
function for static case.  Thanks for the review.
diff mbox

Patch

diff --git a/NEWS b/NEWS
index ad170c4..833a680 100644
--- a/NEWS
+++ b/NEWS
@@ -9,10 +9,10 @@  Version 2.21
 
 * The following bugs are resolved with this release:
 
-  6652, 12926, 14132, 14138, 14171, 14498, 15215, 15884, 16469, 17266,
-  17344, 17363, 17370, 17371, 17411, 17460, 17475, 17485, 17501, 17506,
-  17508, 17522, 17555, 17570, 17571, 17572, 17573, 17574, 17582, 17583,
-  17584, 17585, 17589, 17594, 17616, 17625.
+  6652, 12926, 14132, 14138, 14171, 14498, 15215, 15884, 16418, 16469,
+  17266, 17344, 17363, 17370, 17371, 17411, 17460, 17475, 17485, 17501,
+  17506, 17508, 17522, 17555, 17570, 17571, 17572, 17573, 17574, 17582,
+  17583, 17584, 17585, 17589, 17594, 17616, 17625.
 
 * CVE-2104-7817 The wordexp function could ignore the WRDE_NOCMD flag
   under certain input conditions resulting in the execution of a shell for
diff --git a/sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c b/sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c
index 62217b1..44f90b4 100644
--- a/sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c
+++ b/sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c
@@ -24,95 +24,85 @@ 
 #include <libc-internal.h>
 #include <sysdep.h>
 #include <bits/libc-vdso.h>
+#include <not-cancel.h>
 
 hp_timing_t
 __get_clockfreq (void)
 {
+  hp_timing_t result = 0L;
+
+#ifdef SHARED
+  /* The vDSO does not return an error (it clear cr0.so on returning).  */
+  INTERNAL_SYSCALL_DECL (err);
+  result =
+    INTERNAL_VSYSCALL_NO_SYSCALL_FALLBACK (get_tbfreq, err, uint64_t, 0);
+#else
   /* We read the information from the /proc filesystem.  /proc/cpuinfo
      contains at least one line like:
      timebase        : 33333333
      We search for this line and convert the number into an integer.  */
-  static hp_timing_t timebase_freq;
-  hp_timing_t result = 0L;
+  int fd = __open_nocancel ("/proc/cpuinfo", O_RDONLY);
+  if (__glibc_likely (fd != -1))
+    return result;
 
-  /* If this function was called before, we know the result.  */
-  if (timebase_freq != 0)
-    return timebase_freq;
+  /* The timebase will be in the 1st 1024 bytes for systems with up
+     to 8 processors.  If the first read returns less then 1024
+     bytes read,  we have the whole cpuinfo and can start the scan.
+     Otherwise we will have to read more to insure we have the
+     timebase value in the scan.  */
+  char buf[1024];
+  ssize_t n;
 
-  /* If we can use the vDSO to obtain the timebase even better.  */
-#ifdef SHARED
-  INTERNAL_SYSCALL_DECL (err);
-  timebase_freq =
-    INTERNAL_VSYSCALL_NO_SYSCALL_FALLBACK (get_tbfreq, err, uint64_t, 0);
-  if (INTERNAL_SYSCALL_ERROR_P (timebase_freq, err)
-      && INTERNAL_SYSCALL_ERRNO (timebase_freq, err) == ENOSYS)
-#endif
+  n = __read_nocancel (fd, buf, sizeof (buf));
+  if (n == sizeof (buf))
     {
-      int fd = __open ("/proc/cpuinfo", O_RDONLY);
+      /* We are here because the 1st read returned exactly sizeof
+         (buf) bytes.  This implies that we are not at EOF and may
+         not have read the timebase value yet.  So we need to read
+         more bytes until we know we have EOF.  We copy the lower
+         half of buf to the upper half and read sizeof (buf)/2
+         bytes into the lower half of buf and repeat until we
+         reach EOF.  We can assume that the timebase will be in
+         the last 512 bytes of cpuinfo, so two 512 byte half_bufs
+         will be sufficient to contain the timebase and will
+         handle the case where the timebase spans the half_buf
+         boundry.  */
+      const ssize_t half_buf = sizeof (buf) / 2;
+      while (n >= half_buf)
+	{
+	  memcpy (buf, buf + half_buf, half_buf);
+	  n = __read_nocancel (fd, buf + half_buf, half_buf);
+	}
+      if (n >= 0)
+	n += half_buf;
+    }
+  __close_nocancel (fd);
 
-      if (__glibc_likely (fd != -1))
+  if (__glibc_likely (n > 0))
+    {
+      char *mhz = memmem (buf, n, "timebase", 7);
+
+      if (__glibc_likely (mhz != NULL))
 	{
-	  /* The timebase will be in the 1st 1024 bytes for systems with up
-	     to 8 processors.  If the first read returns less then 1024
-	     bytes read,  we have the whole cpuinfo and can start the scan.
-	     Otherwise we will have to read more to insure we have the
-	     timebase value in the scan.  */
-	  char buf[1024];
-	  ssize_t n;
+	  char *endp = buf + n;
 
-	  n = __read (fd, buf, sizeof (buf));
-	  if (n == sizeof (buf))
-	    {
-	      /* We are here because the 1st read returned exactly sizeof
-	         (buf) bytes.  This implies that we are not at EOF and may
-	         not have read the timebase value yet.  So we need to read
-	         more bytes until we know we have EOF.  We copy the lower
-	         half of buf to the upper half and read sizeof (buf)/2
-	         bytes into the lower half of buf and repeat until we
-	         reach EOF.  We can assume that the timebase will be in
-	         the last 512 bytes of cpuinfo, so two 512 byte half_bufs
-	         will be sufficient to contain the timebase and will
-	         handle the case where the timebase spans the half_buf
-	         boundry.  */
-	      const ssize_t half_buf = sizeof (buf) / 2;
-	      while (n >= half_buf)
-		{
-		  memcpy (buf, buf + half_buf, half_buf);
-		  n = __read (fd, buf + half_buf, half_buf);
-		}
-	      if (n >= 0)
-		n += half_buf;
-	    }
+	  /* Search for the beginning of the string.  */
+	  while (mhz < endp && (*mhz < '0' || *mhz > '9') && *mhz != '\n')
+	    ++mhz;
 
-	  if (__builtin_expect (n, 1) > 0)
+	  while (mhz < endp && *mhz != '\n')
 	    {
-	      char *mhz = memmem (buf, n, "timebase", 7);
-
-	      if (__glibc_likely (mhz != NULL))
+	      if (*mhz >= '0' && *mhz <= '9')
 		{
-		  char *endp = buf + n;
-
-		  /* Search for the beginning of the string.  */
-		  while (mhz < endp && (*mhz < '0' || *mhz > '9')
-			 && *mhz != '\n')
-		    ++mhz;
-
-		  while (mhz < endp && *mhz != '\n')
-		    {
-		      if (*mhz >= '0' && *mhz <= '9')
-			{
-			  result *= 10;
-			  result += *mhz - '0';
-			}
-
-		      ++mhz;
-		    }
+		  result *= 10;
+		  result += *mhz - '0';
 		}
-	      timebase_freq = result;
+
+	      ++mhz;
 	    }
-	  __close (fd);
 	}
     }
+#endif
 
-  return timebase_freq;
+  return result;
 }