time: strftime_l: Use malloc rather than an unbounded alloca.

Message ID 20230510195946.3728273-1-josimmon@redhat.com
State Superseded
Headers
Series time: strftime_l: Use malloc rather than an unbounded alloca. |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent
dj/TryBot-32bit success Build for i686

Commit Message

Joe Simmons-Talbott May 10, 2023, 7:59 p.m. UTC
  Avoid possible stack overflow by replacing alloca() with malloc().
---
 time/strftime_l.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)
  

Comments

Adhemerval Zanella Netto May 16, 2023, 7:28 p.m. UTC | #1
On 10/05/23 16:59, Joe Simmons-Talbott via Libc-alpha wrote:
> Avoid possible stack overflow by replacing alloca() with malloc().
> ---
>  time/strftime_l.c | 8 ++++++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/time/strftime_l.c b/time/strftime_l.c
> index 402c6c4111..59d3e1a3b2 100644
> --- a/time/strftime_l.c
> +++ b/time/strftime_l.c
> @@ -273,8 +273,9 @@ static const CHAR_T zeroes[16] = /* "0000000000000000" */
>      const char *__s = os;						      \
>      memset (&__st, '\0', sizeof (__st));				      \
>      l = __mbsrtowcs_l (NULL, &__s, 0, &__st, loc);			      \
> -    ws = alloca ((l + 1) * sizeof (wchar_t));				      \
> -    (void) __mbsrtowcs_l (ws, &__s, l, &__st, loc);			      \
> +    ws = malloc ((l + 1) * sizeof (wchar_t));				      \
> +    if (ws != NULL)							      \
> +      (void) __mbsrtowcs_l (ws, &__s, l, &__st, loc);			      \
>    }
>  #endif
>  
> @@ -1346,7 +1347,10 @@ __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format,
>  	    wchar_t *wczone;
>  	    size_t len;
>  	    widen (zone, wczone, len);
> +	    if (wczone == NULL)
> +	      return 0;
>  	    cpy (len, wczone);
> +	    free (wczone);
>  	  }
>  #else
>  	  cpy (strlen (zone), zone);

Do we have a practical maximum size for the abbreviate timezone name?  The
internal tz_rule 'name' field is just a pointer, but I think all timezones
uses a maximum name size.
  
Joe Simmons-Talbott May 16, 2023, 7:53 p.m. UTC | #2
On Tue, May 16, 2023 at 04:28:58PM -0300, Adhemerval Zanella Netto wrote:
> 
> 
> On 10/05/23 16:59, Joe Simmons-Talbott via Libc-alpha wrote:
> > Avoid possible stack overflow by replacing alloca() with malloc().
> > ---
> >  time/strftime_l.c | 8 ++++++--
> >  1 file changed, 6 insertions(+), 2 deletions(-)
> > 
> > diff --git a/time/strftime_l.c b/time/strftime_l.c
> > index 402c6c4111..59d3e1a3b2 100644
> > --- a/time/strftime_l.c
> > +++ b/time/strftime_l.c
> > @@ -273,8 +273,9 @@ static const CHAR_T zeroes[16] = /* "0000000000000000" */
> >      const char *__s = os;						      \
> >      memset (&__st, '\0', sizeof (__st));				      \
> >      l = __mbsrtowcs_l (NULL, &__s, 0, &__st, loc);			      \
> > -    ws = alloca ((l + 1) * sizeof (wchar_t));				      \
> > -    (void) __mbsrtowcs_l (ws, &__s, l, &__st, loc);			      \
> > +    ws = malloc ((l + 1) * sizeof (wchar_t));				      \
> > +    if (ws != NULL)							      \
> > +      (void) __mbsrtowcs_l (ws, &__s, l, &__st, loc);			      \
> >    }
> >  #endif
> >  
> > @@ -1346,7 +1347,10 @@ __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format,
> >  	    wchar_t *wczone;
> >  	    size_t len;
> >  	    widen (zone, wczone, len);
> > +	    if (wczone == NULL)
> > +	      return 0;
> >  	    cpy (len, wczone);
> > +	    free (wczone);
> >  	  }
> >  #else
> >  	  cpy (strlen (zone), zone);
> 
> Do we have a practical maximum size for the abbreviate timezone name?  The
> internal tz_rule 'name' field is just a pointer, but I think all timezones
> uses a maximum name size.
> 

I was able to pass a random string in via the TZ environment variable.
I'm not sure if that matters since the size of the buffer (s) and
maxsize would still limit the amount of bytes from the TZ variable.
  
Paul Eggert May 16, 2023, 10 p.m. UTC | #3
On 5/16/23 12:28, Adhemerval Zanella Netto via Libc-alpha wrote:
> Do we have a practical maximum size for the abbreviate timezone name?  The
> internal tz_rule 'name' field is just a pointer, but I think all timezones
> uses a maximum name size.

In current TZDB data the longest abbreviation has four bytes (e.g., 
"AKST", "NZST"). Before TZDB release 2016g (2016-09-13) the longest was 
48 bytes, for the Factory zone's "Local time zone must be set--see zic 
manual page" abbreviation.

Although the TZif file format's limit is 2**32 - 1 bytes, the 'zic' 
command won't process TZif files with abbreviations longer than about 
2000 bytes, due to its internal input line buffer.

There is a limit even if you use long strings in TZ environment 
variables, as glibc strftime is buggy when a buffer contains more than 
INT_MAX bytes (it uses 'int' to store byte counts). This means you can't 
effectively use a time zone abbreviation longer than about 2**31 - 1 
bytes with glibc, even on 64-bit platforms. For example, a program like 
the one shown below won't work with glibc.

For what it's worth, _POSIX_TZNAME_MAX is 6, that is, every POSIX 
platform must support at least 6 bytes.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

int
main ()
{
   size_t Xs = (1LL << 32) + 1;
   char *tzbuf = malloc (sizeof "TZ=" - 1 + Xs + sizeof "0");
   if (!tzbuf)
     return perror ("malloc tzbuf"), 1;
   size_t strftimebufsize = Xs + 2;
   char *strftimebuf = malloc (strftimebufsize);
   if (!strftimebuf)
     return perror ("malloc strftimebuf"), 1;
   strcpy (tzbuf, "TZ=");
   memset (tzbuf + 3, 'X', Xs);
   strcpy (tzbuf + 3 + Xs, "0");
   if (putenv (tzbuf))
     return perror ("putenv"), 1;
   time_t t0 = 0;
   struct tm *tm = localtime (&t0);
   if (!tm)
     return perror ("localtime"), 1;
   size_t nbytes = strftime (strftimebuf, strftimebufsize, "%Z", tm);
   if (!nbytes)
     return perror ("strftime"), 1;
   if (puts (strftimebuf) < 0)
     return perror ("puts"), 1;
   if (fclose (stdout) < 0)
     return perror ("fclose"), 1;
   return 0;
}
  
Adhemerval Zanella Netto May 17, 2023, 11:04 a.m. UTC | #4
On 16/05/23 19:00, Paul Eggert wrote:
> On 5/16/23 12:28, Adhemerval Zanella Netto via Libc-alpha wrote:
>> Do we have a practical maximum size for the abbreviate timezone name?  The
>> internal tz_rule 'name' field is just a pointer, but I think all timezones
>> uses a maximum name size.
> 
> In current TZDB data the longest abbreviation has four bytes (e.g., "AKST", "NZST"). Before TZDB release 2016g (2016-09-13) the longest was 48 bytes, for the Factory zone's "Local time zone must be set--see zic manual page" abbreviation.
> 
> Although the TZif file format's limit is 2**32 - 1 bytes, the 'zic' command won't process TZif files with abbreviations longer than about 2000 bytes, due to its internal input line buffer.
> 
> There is a limit even if you use long strings in TZ environment variables, as glibc strftime is buggy when a buffer contains more than INT_MAX bytes (it uses 'int' to store byte counts). This means you can't effectively use a time zone abbreviation longer than about 2**31 - 1 bytes with glibc, even on 64-bit platforms. For example, a program like the one shown below won't work with glibc.
> 
> For what it's worth, _POSIX_TZNAME_MAX is 6, that is, every POSIX platform must support at least 6 bytes.

Thanks for the information, so do you think it would be feasible to assume
_POSIX_TZNAME_MAX or 48 (for Factory zone) to avoid the malloc call on
strftime_l?

> 
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> #include <time.h>
> 
> int
> main ()
> {
>   size_t Xs = (1LL << 32) + 1;
>   char *tzbuf = malloc (sizeof "TZ=" - 1 + Xs + sizeof "0");
>   if (!tzbuf)
>     return perror ("malloc tzbuf"), 1;
>   size_t strftimebufsize = Xs + 2;
>   char *strftimebuf = malloc (strftimebufsize);
>   if (!strftimebuf)
>     return perror ("malloc strftimebuf"), 1;
>   strcpy (tzbuf, "TZ=");
>   memset (tzbuf + 3, 'X', Xs);
>   strcpy (tzbuf + 3 + Xs, "0");
>   if (putenv (tzbuf))
>     return perror ("putenv"), 1;
>   time_t t0 = 0;
>   struct tm *tm = localtime (&t0);
>   if (!tm)
>     return perror ("localtime"), 1;
>   size_t nbytes = strftime (strftimebuf, strftimebufsize, "%Z", tm);
>   if (!nbytes)
>     return perror ("strftime"), 1;
>   if (puts (strftimebuf) < 0)
>     return perror ("puts"), 1;
>   if (fclose (stdout) < 0)
>     return perror ("fclose"), 1;
>   return 0;
> }
>
  
Paul Eggert May 17, 2023, 5:40 p.m. UTC | #5
On 2023-05-17 04:04, Adhemerval Zanella Netto wrote:
> do you think it would be feasible to assume
> _POSIX_TZNAME_MAX or 48 (for Factory zone) to avoid the malloc call on
> strftime_l?

ALthough it's technically feasible, it'd be some work. Among other 
things, whatever limit we chose would have to become visible via 
<limits.h> and sysconf and etc., and we'd need to document that there is 
now a limit. Better, I think, would be to continue to follow the GNU 
coding standards and not impose an arbitrary limit on time zone 
abbreviation length, even for the highly unusual case where code is 
calling wcsftime or wcsftime_l.

How about avoiding the malloc in the following way? I installed the 
attached patch into Gnulib lib/nstrftime.c, which has forked from glibc 
but with some work could be merged back in, and this should also fix the 
glibc bugs with buffer sizes exceeding INT_MAX.

If you don't want all the hassle of merging, you can cherry-pick this 
patch. I haven't tested the patch, though, as Gnulib does not use this code.
  
Joe Simmons-Talbott May 22, 2023, 6:35 p.m. UTC | #6
On Wed, May 17, 2023 at 10:40:05AM -0700, Paul Eggert wrote:
> On 2023-05-17 04:04, Adhemerval Zanella Netto wrote:
> > do you think it would be feasible to assume
> > _POSIX_TZNAME_MAX or 48 (for Factory zone) to avoid the malloc call on
> > strftime_l?
> 
> ALthough it's technically feasible, it'd be some work. Among other things,
> whatever limit we chose would have to become visible via <limits.h> and
> sysconf and etc., and we'd need to document that there is now a limit.
> Better, I think, would be to continue to follow the GNU coding standards and
> not impose an arbitrary limit on time zone abbreviation length, even for the
> highly unusual case where code is calling wcsftime or wcsftime_l.
> 
> How about avoiding the malloc in the following way? I installed the attached
> patch into Gnulib lib/nstrftime.c, which has forked from glibc but with some
> work could be merged back in, and this should also fix the glibc bugs with
> buffer sizes exceeding INT_MAX.
> 
> If you don't want all the hassle of merging, you can cherry-pick this patch.
> I haven't tested the patch, though, as Gnulib does not use this code.

I've pushed your suggested patch after testing on x86_64 and i686. [1]

Thanks,
Joe

[1] https://sourceware.org/pipermail/libc-alpha/2023-May/148384.html

> From 197eec6075bbaf2b97137115a09a6bbce43b4dd4 Mon Sep 17 00:00:00 2001
> From: Paul Eggert <eggert@cs.ucla.edu>
> Date: Wed, 17 May 2023 10:27:40 -0700
> Subject: [PATCH] nstrftime: suggest to glibc how to avoid alloca
> 
> * lib/nstrftime.c (widen) [COMPILE_WIDE]: Remove.
> (__strftime_internal) [COMPILE_WIDE): Instead of converting the
> multibyte time zone abbreviation into a potentially unbounded
> alloca buffer, convert it directly into the output buffer.
> Although this code is not used in Gnulib, this can help the glibc
> developers avoid the problem on the glibc side.
> ---
>  ChangeLog       | 10 ++++++++++
>  lib/nstrftime.c | 39 +++++++++++++++++++++++++--------------
>  2 files changed, 35 insertions(+), 14 deletions(-)
> 
> diff --git a/ChangeLog b/ChangeLog
> index ecbc25ef06..36b3c65b81 100644
> --- a/ChangeLog
> +++ b/ChangeLog
> @@ -1,3 +1,13 @@
> +2023-05-17  Paul Eggert  <eggert@cs.ucla.edu>
> +
> +	nstrftime: suggest to glibc how to avoid alloca
> +	* lib/nstrftime.c (widen) [COMPILE_WIDE]: Remove.
> +	(__strftime_internal) [COMPILE_WIDE): Instead of converting the
> +	multibyte time zone abbreviation into a potentially unbounded
> +	alloca buffer, convert it directly into the output buffer.
> +	Although this code is not used in Gnulib, this can help the glibc
> +	developers avoid the problem on the glibc side.
> +
>  2023-05-15  Bruno Haible  <bruno@clisp.org>
>  
>  	doc: New chapter "Strings and Characters".
> diff --git a/lib/nstrftime.c b/lib/nstrftime.c
> index 68bb560910..35a9307e1a 100644
> --- a/lib/nstrftime.c
> +++ b/lib/nstrftime.c
> @@ -226,15 +226,6 @@ extern char *tzname[];
>  #  undef __mbsrtowcs_l
>  #  define __mbsrtowcs_l(d, s, l, st, loc) __mbsrtowcs (d, s, l, st)
>  # endif
> -# define widen(os, ws, l) \
> -  {                                                                           \
> -    mbstate_t __st;                                                           \
> -    const char *__s = os;                                                     \
> -    memset (&__st, '\0', sizeof (__st));                                      \
> -    l = __mbsrtowcs_l (NULL, &__s, 0, &__st, loc);                            \
> -    ws = (wchar_t *) alloca ((l + 1) * sizeof (wchar_t));                     \
> -    (void) __mbsrtowcs_l (ws, &__s, l, &__st, loc);                           \
> -  }
>  #endif
>  
>  
> @@ -1374,11 +1365,31 @@ __strftime_internal (STREAM_OR_CHAR_T *s, STRFTIME_ARG (size_t maxsize)
>  #ifdef COMPILE_WIDE
>            {
>              /* The zone string is always given in multibyte form.  We have
> -               to transform it first.  */
> -            wchar_t *wczone;
> -            size_t len;
> -            widen (zone, wczone, len);
> -            cpy (len, wczone);
> +               to convert it to wide character.  */
> +            size_t w = pad == L_('-') || width < 0 ? 0 : width;
> +            char const *z = zone;
> +            mbstate_t st = {0};
> +            size_t len = __mbsrtowcs_l (p, &z, maxsize - i, &st, loc);
> +            if (len == (size_t) -1)
> +              return 0;
> +            size_t incr = len < w ? w : len;
> +            if (incr >= maxsize - i)
> +              {
> +                errno = ERANGE;
> +                return 0;
> +              }
> +            if (p)
> +              {
> +                if (len < w)
> +                  {
> +                    size_t delta = w - len;
> +                    wmemmove (p + delta, p, len);
> +                    wchar_t wc = pad == L_('0') || pad == L_('+') ? L'0' : L' ';
> +                    wmemset (p, wc, delta);
> +                  }
> +                p += incr;
> +              }
> +            i += incr;
>            }
>  #else
>            cpy (strlen (zone), zone);
> -- 
> 2.39.2
>
  

Patch

diff --git a/time/strftime_l.c b/time/strftime_l.c
index 402c6c4111..59d3e1a3b2 100644
--- a/time/strftime_l.c
+++ b/time/strftime_l.c
@@ -273,8 +273,9 @@  static const CHAR_T zeroes[16] = /* "0000000000000000" */
     const char *__s = os;						      \
     memset (&__st, '\0', sizeof (__st));				      \
     l = __mbsrtowcs_l (NULL, &__s, 0, &__st, loc);			      \
-    ws = alloca ((l + 1) * sizeof (wchar_t));				      \
-    (void) __mbsrtowcs_l (ws, &__s, l, &__st, loc);			      \
+    ws = malloc ((l + 1) * sizeof (wchar_t));				      \
+    if (ws != NULL)							      \
+      (void) __mbsrtowcs_l (ws, &__s, l, &__st, loc);			      \
   }
 #endif
 
@@ -1346,7 +1347,10 @@  __strftime_internal (CHAR_T *s, size_t maxsize, const CHAR_T *format,
 	    wchar_t *wczone;
 	    size_t len;
 	    widen (zone, wczone, len);
+	    if (wczone == NULL)
+	      return 0;
 	    cpy (len, wczone);
+	    free (wczone);
 	  }
 #else
 	  cpy (strlen (zone), zone);