From patchwork Mon Jun 19 16:20:34 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Florian Weimer X-Patchwork-Id: 21106 Received: (qmail 90371 invoked by alias); 19 Jun 2017 16:20:47 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 90125 invoked by uid 89); 19 Jun 2017 16:20:35 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-26.9 required=5.0 tests=BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, SPF_HELO_PASS, T_RP_MATCHES_RCVD autolearn=ham version=3.3.2 spammy= X-HELO: mx1.redhat.com DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com B7C9380B52 Authentication-Results: ext-mx04.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx04.extmail.prod.ext.phx2.redhat.com; spf=pass smtp.mailfrom=fweimer@redhat.com DKIM-Filter: OpenDKIM Filter v2.11.0 mx1.redhat.com B7C9380B52 Date: Mon, 19 Jun 2017 18:20:34 +0200 To: libc-alpha@sourceware.org Subject: [PATCH] vfprintf: Remove alloca from string formatting with conversion User-Agent: Heirloom mailx 12.5 7/5/10 MIME-Version: 1.0 Message-Id: <20170619162034.8F1FA402AEC0E@oldenburg.str.redhat.com> From: fweimer@redhat.com (Florian Weimer) 2017-06-19 Florian Weimer * stdio-common/vfprintf.c (done_add_func): New function. (done_add): Use it. (OTHER_CHAR_T, CONVERT_FROM_OTHER_STRING): New macros. (pad_func): New function. (PAD): Use it. (outstring_func): New function. (outstring): Use it. (outstring_converted_wide_string): New function. (process_arg): Use it. diff --git a/stdio-common/Makefile b/stdio-common/Makefile index 309d83e..622a85f 100644 --- a/stdio-common/Makefile +++ b/stdio-common/Makefile @@ -87,6 +87,7 @@ $(objpfx)tst-grouping.out: $(gen-locales) $(objpfx)tst-sprintf.out: $(gen-locales) $(objpfx)tst-sscanf.out: $(gen-locales) $(objpfx)tst-swprintf.out: $(gen-locales) +$(objpfx)tst-vfprintf-mbs-prec.out: $(gen-locales) endif tst-printf-bz18872-ENV = MALLOC_TRACE=$(objpfx)tst-printf-bz18872.mtrace diff --git a/stdio-common/vfprintf.c b/stdio-common/vfprintf.c index b15c5d0..d58f0ea 100644 --- a/stdio-common/vfprintf.c +++ b/stdio-common/vfprintf.c @@ -67,22 +67,37 @@ } while (0) #define UNBUFFERED_P(S) ((S)->_IO_file_flags & _IO_UNBUFFERED) -#define done_add(val) \ - do { \ - unsigned int _val = val; \ - assert ((unsigned int) done < (unsigned int) INT_MAX); \ - if (__glibc_unlikely (INT_MAX - done < _val)) \ - { \ - done = -1; \ - __set_errno (EOVERFLOW); \ - goto all_done; \ - } \ - done += _val; \ - } while (0) +/* Add LENGTH to DONE. Return the new value of DONE, or -1 on + overflow (and set errno accordingly). */ +static inline int +done_add_func (int length, int done) +{ + if (done < 0) + return done; + if (__glibc_unlikely (INT_MAX - done < length)) + { + __set_errno (EOVERFLOW); + return -1; + } + return done + length; +} + +#define done_add(val) \ + do \ + { \ + /* Ensure that VAL has a type similar to int. */ \ + _Static_assert (sizeof (val) == sizeof (int), "value int size"); \ + _Static_assert ((__typeof__ (val)) -1 < 0, "value signed"); \ + done = done_add_func ((val), done); \ + if (done < 0) \ + goto all_done; \ + } \ + while (0) #ifndef COMPILE_WPRINTF # define vfprintf _IO_vfprintf_internal # define CHAR_T char +# define OTHER_CHAR_T wchar_t # define UCHAR_T unsigned char # define INT_T int typedef const char *THOUSANDS_SEP_T; @@ -91,25 +106,14 @@ typedef const char *THOUSANDS_SEP_T; # define STR_LEN(Str) strlen (Str) # define PUT(F, S, N) _IO_sputn ((F), (S), (N)) -# define PAD(Padchar) \ - do { \ - if (width > 0) \ - { \ - _IO_ssize_t written = _IO_padn (s, (Padchar), width); \ - if (__glibc_unlikely (written != width)) \ - { \ - done = -1; \ - goto all_done; \ - } \ - done_add (written); \ - } \ - } while (0) # define PUTC(C, F) _IO_putc_unlocked (C, F) # define ORIENT if (_IO_vtable_offset (s) == 0 && _IO_fwide (s, -1) != -1)\ return -1 +# define CONVERT_FROM_OTHER_STRING __wcsrtombs #else # define vfprintf _IO_vfwprintf # define CHAR_T wchar_t +# define OTHER_CHAR_T char /* This is a hack!!! There should be a type uwchar_t. */ # define UCHAR_T unsigned int /* uwchar_t */ # define INT_T wint_t @@ -121,21 +125,9 @@ typedef wchar_t THOUSANDS_SEP_T; # include <_itowa.h> # define PUT(F, S, N) _IO_sputn ((F), (S), (N)) -# define PAD(Padchar) \ - do { \ - if (width > 0) \ - { \ - _IO_ssize_t written = _IO_wpadn (s, (Padchar), width); \ - if (__glibc_unlikely (written != width)) \ - { \ - done = -1; \ - goto all_done; \ - } \ - done_add (written); \ - } \ - } while (0) # define PUTC(C, F) _IO_putwc_unlocked (C, F) # define ORIENT if (_IO_fwide (s, 1) != 1) return -1 +# define CONVERT_FROM_OTHER_STRING __mbsrtowcs # undef _itoa # define _itoa(Val, Buf, Base, Case) _itowa (Val, Buf, Base, Case) @@ -144,6 +136,33 @@ typedef wchar_t THOUSANDS_SEP_T; # define EOF WEOF #endif +static inline int +pad_func (FILE *s, CHAR_T padchar, int width, int done) +{ + if (width > 0) + { + _IO_ssize_t written; +#ifndef COMPILE_WPRINTF + written = _IO_padn (s, padchar, width); +#else + written = _IO_wpadn (s, padchar, width); +#endif + if (__glibc_unlikely (written != width)) + return -1; + return done_add_func (width, done); + } + return done; +} + +#define PAD(Padchar) \ + do \ + { \ + done = pad_func (s, (Padchar), width, done); \ + if (done < 0) \ + goto all_done; \ + } \ + while (0) + #include "_i18n_number.h" /* Include the shared code for parsing the format string. */ @@ -163,25 +182,124 @@ typedef wchar_t THOUSANDS_SEP_T; } \ while (0) -#define outstring(String, Len) \ - do \ - { \ - assert ((size_t) done <= (size_t) INT_MAX); \ - if ((size_t) PUT (s, (String), (Len)) != (size_t) (Len)) \ - { \ - done = -1; \ - goto all_done; \ - } \ - if (__glibc_unlikely (INT_MAX - done < (Len))) \ - { \ - done = -1; \ - __set_errno (EOVERFLOW); \ - goto all_done; \ - } \ - done += (Len); \ - } \ +static inline int +outstring_func (FILE *s, const UCHAR_T *string, size_t length, int done) +{ + assert ((size_t) done <= (size_t) INT_MAX); + if ((size_t) PUT (s, string, length) != (size_t) (length)) + return -1; + return done_add_func (length, done); +} + +#define outstring(String, Len) \ + do \ + { \ + const void *string_ = (String); \ + done = outstring_func (s, string_, (Len), done); \ + if (done < 0) \ + goto all_done; \ + } \ while (0) +/* Write the string SRC to S. If PREC is non-negative, write at most + PREC bytes. If LEFT is true, perform left justification. */ +static int +outstring_converted_wide_string (FILE *s, const OTHER_CHAR_T *src, int prec, + int width, bool left, int done) +{ + mbstate_t mbstate; + + /* Use a small buffer to combine processing of multiple characters. + CONVERT_FROM_OTHER_STRING expects the buffer size in (wide) + characters, and buf_length counts that. */ +#ifdef COMPILE_WPRINTF + enum { buf_length = 64 }; + wchar_t buf[buf_length]; +#else + enum { buf_length = 256 }; + char buf[buf_length]; + _Static_assert (sizeof (buf) > MB_LEN_MAX, + "buffer is large enough for a single multi-byte character"); +#endif + + /* Add the initial padding if needed. */ + if (width > 0 && !left) + { + /* Make a first pass to find the output width, so that we can + add the required padding. */ + memset (&mbstate, 0, sizeof (mbstate_t)); + const OTHER_CHAR_T *src_copy = src; + size_t total_written; + if (prec < 0) + total_written = CONVERT_FROM_OTHER_STRING + (NULL, &src_copy, 0, &mbstate); + else + { + /* The source might not be null-terminated. Enforce the + limit manually, based on the output length. */ + total_written = 0; + size_t limit = prec; + while (limit > 0 && src_copy != NULL) + { + size_t write_limit = buf_length; + if (write_limit > limit) + write_limit = limit; + size_t written = CONVERT_FROM_OTHER_STRING + (buf, &src_copy, write_limit, &mbstate); + if (written == (size_t) -1) + return -1; + if (written == 0) + break; + total_written += written; + limit -= written; + } + } + + /* Output initial padding. */ + if (total_written < width) + { + done = pad_func (s, L_(' '), width - total_written, done); + if (done < 0) + return done; + } + } + + /* Convert the input string, piece by piece. */ + memset (&mbstate, 0, sizeof (mbstate_t)); + size_t total_written = 0; + { + /* If prec is negative, remaining is not decremented, otherwise, + it serves as the write limit. */ + size_t remaining = -1; + if (prec >= 0) + remaining = prec; + while (remaining > 0 && src != NULL) + { + size_t write_limit = buf_length; + if (remaining < write_limit) + write_limit = remaining; + size_t written = CONVERT_FROM_OTHER_STRING + (buf, &src, write_limit, &mbstate); + if (written == (size_t) -1) + return -1; + if (written == 0) + break; + done = outstring_func (s, (const UCHAR_T *) buf, written, done); + if (done < 0) + return done; + total_written += written; + if (prec >= 0) + remaining -= written; + } + } + + /* Add final padding. */ + if (width > 0 && left && total_written < width) + return pad_func (s, L_(' '), width - total_written, done); + else + return done; +} + /* For handling long_double and longlong we use the same flag. If `long' and `long long' are effectively the same type define it to zero. */ @@ -978,7 +1096,6 @@ static const uint8_t jump_table[] = LABEL (form_string): \ { \ size_t len; \ - int string_malloced; \ \ /* The string argument could in fact be `char *' or `wchar_t *'. \ But this should not make a difference here. */ \ @@ -990,7 +1107,6 @@ static const uint8_t jump_table[] = /* Entry point for printing other strings. */ \ LABEL (print_string): \ \ - string_malloced = 0; \ if (string == NULL) \ { \ /* Write "(null)" if there's space. */ \ @@ -1008,41 +1124,12 @@ static const uint8_t jump_table[] = } \ else if (!is_long && spec != L_('S')) \ { \ - /* This is complicated. We have to transform the multibyte \ - string into a wide character string. */ \ - const char *mbs = (const char *) string; \ - mbstate_t mbstate; \ - \ - len = prec != -1 ? __strnlen (mbs, (size_t) prec) : strlen (mbs); \ - \ - /* Allocate dynamically an array which definitely is long \ - enough for the wide character version. Each byte in the \ - multi-byte string can produce at most one wide character. */ \ - if (__glibc_unlikely (len > SIZE_MAX / sizeof (wchar_t))) \ - { \ - __set_errno (EOVERFLOW); \ - done = -1; \ - goto all_done; \ - } \ - else if (__libc_use_alloca (len * sizeof (wchar_t))) \ - string = (CHAR_T *) alloca (len * sizeof (wchar_t)); \ - else if ((string = (CHAR_T *) malloc (len * sizeof (wchar_t))) \ - == NULL) \ - { \ - done = -1; \ - goto all_done; \ - } \ - else \ - string_malloced = 1; \ - \ - memset (&mbstate, '\0', sizeof (mbstate_t)); \ - len = __mbsrtowcs (string, &mbs, len, &mbstate); \ - if (len == (size_t) -1) \ - { \ - /* Illegal multibyte character. */ \ - done = -1; \ - goto all_done; \ - } \ + done = outstring_converted_wide_string \ + (s, (const char *) string, prec, width, left, done); \ + if (done < 0) \ + goto all_done; \ + /* The padding has already been written. */ \ + break; \ } \ else \ { \ @@ -1065,8 +1152,6 @@ static const uint8_t jump_table[] = outstring (string, len); \ if (left) \ PAD (L' '); \ - if (__glibc_unlikely (string_malloced)) \ - free (string); \ } \ break; #else @@ -1115,7 +1200,6 @@ static const uint8_t jump_table[] = LABEL (form_string): \ { \ size_t len; \ - int string_malloced; \ \ /* The string argument could in fact be `char *' or `wchar_t *'. \ But this should not make a difference here. */ \ @@ -1127,7 +1211,6 @@ static const uint8_t jump_table[] = /* Entry point for printing other strings. */ \ LABEL (print_string): \ \ - string_malloced = 0; \ if (string == NULL) \ { \ /* Write "(null)" if there's space. */ \ @@ -1153,51 +1236,12 @@ static const uint8_t jump_table[] = } \ else \ { \ - const wchar_t *s2 = (const wchar_t *) string; \ - mbstate_t mbstate; \ - \ - memset (&mbstate, '\0', sizeof (mbstate_t)); \ - \ - if (prec >= 0) \ - { \ - /* The string `s2' might not be NUL terminated. */ \ - if (__libc_use_alloca (prec)) \ - string = (char *) alloca (prec); \ - else if ((string = (char *) malloc (prec)) == NULL) \ - { \ - done = -1; \ - goto all_done; \ - } \ - else \ - string_malloced = 1; \ - len = __wcsrtombs (string, &s2, prec, &mbstate); \ - } \ - else \ - { \ - len = __wcsrtombs (NULL, &s2, 0, &mbstate); \ - if (len != (size_t) -1) \ - { \ - assert (__mbsinit (&mbstate)); \ - s2 = (const wchar_t *) string; \ - if (__libc_use_alloca (len + 1)) \ - string = (char *) alloca (len + 1); \ - else if ((string = (char *) malloc (len + 1)) == NULL) \ - { \ - done = -1; \ - goto all_done; \ - } \ - else \ - string_malloced = 1; \ - (void) __wcsrtombs (string, &s2, len + 1, &mbstate); \ - } \ - } \ - \ - if (len == (size_t) -1) \ - { \ - /* Illegal wide-character string. */ \ - done = -1; \ - goto all_done; \ - } \ + done = outstring_converted_wide_string \ + (s, (const wchar_t *) string, prec, width, left, done); \ + if (done < 0) \ + goto all_done; \ + /* The padding has already been written. */ \ + break; \ } \ \ if ((width -= len) < 0) \ @@ -1211,8 +1255,6 @@ static const uint8_t jump_table[] = outstring (string, len); \ if (left) \ PAD (' '); \ - if (__glibc_unlikely (string_malloced)) \ - free (string); \ } \ break; #endif