[v6,08/11] libio: Convert __vasprintf_internal to buffers

Message ID 588c49fc2dd201fcad2f3e2f76f9d43157b34470.1671221440.git.fweimer@redhat.com
State Committed
Commit af7f4165512ea242b5f711ee03a04f6afe22232d
Headers
Series vfprintf refactor |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Florian Weimer Dec. 16, 2022, 8:15 p.m. UTC
  The buffer resizing algorithm is slightly different.  The initial
buffer is on the stack, and small buffers are directly allocated
on the heap using the exact required size.  The overhead of the
additional copy is compensated by the lowered setup cost for buffers
compared to libio streams.
---
 include/printf_buffer.h            |   9 ++
 libio/vasprintf.c                  | 141 ++++++++++++++++++++---------
 stdio-common/printf_buffer_flush.c |   4 +
 3 files changed, 109 insertions(+), 45 deletions(-)
  

Comments

Adhemerval Zanella Netto Dec. 19, 2022, 4:37 p.m. UTC | #1
On 16/12/22 17:15, Florian Weimer via Libc-alpha wrote:
> The buffer resizing algorithm is slightly different.  The initial
> buffer is on the stack, and small buffers are directly allocated
> on the heap using the exact required size.  The overhead of the
> additional copy is compensated by the lowered setup cost for buffers
> compared to libio streams.

LGTM, thanks.

Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>

> ---
>  include/printf_buffer.h            |   9 ++
>  libio/vasprintf.c                  | 141 ++++++++++++++++++++---------
>  stdio-common/printf_buffer_flush.c |   4 +
>  3 files changed, 109 insertions(+), 45 deletions(-)
> 
> diff --git a/include/printf_buffer.h b/include/printf_buffer.h
> index cc9f8e5346..c8e22e1e90 100644
> --- a/include/printf_buffer.h
> +++ b/include/printf_buffer.h
> @@ -49,6 +49,7 @@ enum __printf_buffer_mode
>      __printf_buffer_mode_snprintf,
>      __printf_buffer_mode_sprintf_chk,
>      __printf_buffer_mode_to_file,
> +    __printf_buffer_mode_asprintf,
>      __printf_buffer_mode_strfmon,
>      __printf_buffer_mode_fp,         /* For __printf_fp_l_buffer.  */
>      __printf_buffer_mode_fp_to_wide, /* For __wprintf_fp_l_buffer.  */
> @@ -304,6 +305,9 @@ void __printf_buffer_flush_snprintf (struct __printf_buffer_snprintf *)
>  struct __printf_buffer_to_file;
>  void __printf_buffer_flush_to_file (struct __printf_buffer_to_file *)
>    attribute_hidden;
> +struct __printf_buffer_asprintf;
> +void __printf_buffer_flush_asprintf (struct __printf_buffer_asprintf *)
> +  attribute_hidden;
>  struct __printf_buffer_fp;
>  void __printf_buffer_flush_fp (struct __printf_buffer_fp *)
>    attribute_hidden;
> @@ -332,4 +336,9 @@ void __wprintf_buffer_flush_to_file (struct __wprintf_buffer_to_file *)
>  /* Temporary buffer used during floating point digit translation.  */
>  #define PRINTF_BUFFER_SIZE_DIGITS 64
>  
> +/* Size of the initial on-stack buffer for asprintf.  It should be
> +   large enough to copy almost all asprintf usages with just a single
> +   (final, correctly sized) heap allocation.  */
> +#define PRINTF_BUFFER_SIZE_ASPRINTF 200
> +
>  #endif /* PRINTF_BUFFER_H */

Ok.

> diff --git a/libio/vasprintf.c b/libio/vasprintf.c
> index 4430a266c6..4f69cb61f9 100644
> --- a/libio/vasprintf.c
> +++ b/libio/vasprintf.c
> @@ -24,64 +24,115 @@
>     This exception applies to code released by its copyright holders
>     in files containing the exception.  */
>  
> -#include <string.h>
> +#include <array_length.h>
> +#include <errno.h>
> +#include <limits.h>
> +#include <math_ldbl_opt.h>
> +#include <printf.h>
> +#include <stdio.h>
>  #include <stdlib.h>
> -#include <strfile.h>
> +#include <string.h>
> +#include <printf_buffer.h>
> +
> +struct __printf_buffer_asprintf
> +{
> +  /* base.write_base points either to a heap-allocated buffer, or to
> +     the direct array below.  */
> +  struct __printf_buffer base;
> +
> +  /* Initial allocation.  200 should be large enough to copy almost
> +     all asprintf usages with just a single (final, correctly sized)
> +     heap allocation.  */
> +  char direct[PRINTF_BUFFER_SIZE_ASPRINTF];
> +};
> +
> +void
> +__printf_buffer_flush_asprintf (struct __printf_buffer_asprintf *buf)
> +{
> +  size_t current_pos = buf->base.write_ptr - buf->base.write_base;
> +  if (current_pos >= INT_MAX)
> +    {
> +      /* The result is not representable.  No need to continue.  */
> +      __set_errno (EOVERFLOW);
> +      __printf_buffer_mark_failed (&buf->base);
> +      return;
> +    }
> +
> +  size_t current_size = buf->base.write_end - buf->base.write_base;
> +  /* Implement an exponentiation sizing policy.  Keep the size
> +     congruent 8 (mod 16), to account for the footer in glibc
> +     malloc.  */
> +  size_t new_size = ALIGN_UP (current_size + current_size / 2, 16) | 8;
> +  char *new_buffer;
> +  if (buf->base.write_base == buf->direct)
> +    {
> +      new_buffer = malloc (new_size);
> +      if (new_buffer == NULL)
> +	{
> +	  __printf_buffer_mark_failed (&buf->base);
> +	  return;
> +	}
> +      memcpy (new_buffer, buf->direct, current_pos);
> +    }
> +  else
> +    {
> +      new_buffer = realloc (buf->base.write_base, new_size);
> +      if (new_buffer == NULL)
> +	{
> +	  __printf_buffer_mark_failed (&buf->base);
> +	  return;
> +	}
> +    }
> +
> +  /* Set up the new write area.  */
> +  buf->base.write_base = new_buffer;
> +  buf->base.write_ptr = new_buffer + current_pos;
> +  buf->base.write_end = new_buffer + new_size;
> +}
> +
>  
>  int
>  __vasprintf_internal (char **result_ptr, const char *format, va_list args,
>  		      unsigned int mode_flags)
>  {
> -  /* Initial size of the buffer to be used.  Will be doubled each time an
> -     overflow occurs.  */
> -  const size_t init_string_size = 100;
> -  char *string;
> -  _IO_strfile sf;
> -  int ret;
> -  size_t needed;
> -  size_t allocated;
> -  /* No need to clear the memory here (unlike for open_memstream) since
> -     we know we will never seek on the stream.  */
> -  string = (char *) malloc (init_string_size);
> -  if (string == NULL)
> -    return -1;
> -#ifdef _IO_MTSAFE_IO
> -  sf._sbf._f._lock = NULL;
> -#endif
> -  _IO_no_init (&sf._sbf._f, _IO_USER_LOCK, -1, NULL, NULL);
> -  _IO_JUMPS (&sf._sbf) = &_IO_str_jumps;
> -  _IO_str_init_static_internal (&sf, string, init_string_size, string);
> -  sf._sbf._f._flags &= ~_IO_USER_BUF;
> -  sf._s._allocate_buffer_unused = (_IO_alloc_type) malloc;
> -  sf._s._free_buffer_unused = (_IO_free_type) free;
> -  ret = __vfprintf_internal (&sf._sbf._f, format, args, mode_flags);
> -  if (ret < 0)
> +  struct __printf_buffer_asprintf buf;
> +  __printf_buffer_init (&buf.base, buf.direct, array_length (buf.direct),
> +			__printf_buffer_mode_asprintf);
> +
> +  __printf_buffer (&buf.base, format, args, mode_flags);
> +  int done = __printf_buffer_done (&buf.base);
> +  if (done < 0)
>      {
> -      free (sf._sbf._f._IO_buf_base);
> -      return ret;
> +      if (buf.base.write_base != buf.direct)
> +	free (buf.base.write_base);
> +      return done;
> +    }
> +
> +  /* Transfer to the final buffer.  */
> +  char *result;
> +  size_t size = buf.base.write_ptr - buf.base.write_base;
> +  if (buf.base.write_base == buf.direct)
> +    {
> +      result = malloc (size + 1);
> +      if (result == NULL)
> +	return -1;
> +      memcpy (result, buf.direct, size);
>      }
> -  /* Only use realloc if the size we need is of the same (binary)
> -     order of magnitude then the memory we allocated.  */
> -  needed = sf._sbf._f._IO_write_ptr - sf._sbf._f._IO_write_base + 1;
> -  allocated = sf._sbf._f._IO_write_end - sf._sbf._f._IO_write_base;
> -  if ((allocated >> 1) <= needed)
> -    *result_ptr = (char *) realloc (sf._sbf._f._IO_buf_base, needed);
>    else
>      {
> -      *result_ptr = (char *) malloc (needed);
> -      if (*result_ptr != NULL)
> +      result = realloc (buf.base.write_base, size + 1);
> +      if (result == NULL)
>  	{
> -	  memcpy (*result_ptr, sf._sbf._f._IO_buf_base, needed - 1);
> -	  free (sf._sbf._f._IO_buf_base);
> +	  free (buf.base.write_base);
> +	  return -1;
>  	}
> -      else
> -	/* We have no choice, use the buffer we already have.  */
> -	*result_ptr = (char *) realloc (sf._sbf._f._IO_buf_base, needed);
>      }
> -  if (*result_ptr == NULL)
> -    *result_ptr = sf._sbf._f._IO_buf_base;
> -  (*result_ptr)[needed - 1] = '\0';
> -  return ret;
> +
> +  /* Add NUL termination.  */
> +  result[size] = '\0';
> +  *result_ptr = result;
> +
> +  return done;
>  }
>  
>  int

Ok.

> diff --git a/stdio-common/printf_buffer_flush.c b/stdio-common/printf_buffer_flush.c
> index f44c8b887f..14fe1b2df4 100644
> --- a/stdio-common/printf_buffer_flush.c
> +++ b/stdio-common/printf_buffer_flush.c
> @@ -27,6 +27,7 @@
>  #ifndef SHARED
>  # pragma weak __printf_buffer_flush_snprintf
>  # pragma weak __printf_buffer_flush_to_file
> +# pragma weak __printf_buffer_flush_asprintf
>  # pragma weak __printf_buffer_flush_fp
>  # pragma weak __printf_buffer_flush_fp_to_wide
>  # pragma weak __printf_buffer_flush_fphex_to_wide
> @@ -49,6 +50,9 @@ __printf_buffer_do_flush (struct __printf_buffer *buf)
>      case __printf_buffer_mode_to_file:
>        __printf_buffer_flush_to_file ((struct __printf_buffer_to_file *) buf);
>        return;
> +    case __printf_buffer_mode_asprintf:
> +      __printf_buffer_flush_asprintf ((struct __printf_buffer_asprintf *) buf);
> +      return;
>      case __printf_buffer_mode_strfmon:
>        __set_errno (E2BIG);
>        __printf_buffer_mark_failed (buf);

Ok.
  

Patch

diff --git a/include/printf_buffer.h b/include/printf_buffer.h
index cc9f8e5346..c8e22e1e90 100644
--- a/include/printf_buffer.h
+++ b/include/printf_buffer.h
@@ -49,6 +49,7 @@  enum __printf_buffer_mode
     __printf_buffer_mode_snprintf,
     __printf_buffer_mode_sprintf_chk,
     __printf_buffer_mode_to_file,
+    __printf_buffer_mode_asprintf,
     __printf_buffer_mode_strfmon,
     __printf_buffer_mode_fp,         /* For __printf_fp_l_buffer.  */
     __printf_buffer_mode_fp_to_wide, /* For __wprintf_fp_l_buffer.  */
@@ -304,6 +305,9 @@  void __printf_buffer_flush_snprintf (struct __printf_buffer_snprintf *)
 struct __printf_buffer_to_file;
 void __printf_buffer_flush_to_file (struct __printf_buffer_to_file *)
   attribute_hidden;
+struct __printf_buffer_asprintf;
+void __printf_buffer_flush_asprintf (struct __printf_buffer_asprintf *)
+  attribute_hidden;
 struct __printf_buffer_fp;
 void __printf_buffer_flush_fp (struct __printf_buffer_fp *)
   attribute_hidden;
@@ -332,4 +336,9 @@  void __wprintf_buffer_flush_to_file (struct __wprintf_buffer_to_file *)
 /* Temporary buffer used during floating point digit translation.  */
 #define PRINTF_BUFFER_SIZE_DIGITS 64
 
+/* Size of the initial on-stack buffer for asprintf.  It should be
+   large enough to copy almost all asprintf usages with just a single
+   (final, correctly sized) heap allocation.  */
+#define PRINTF_BUFFER_SIZE_ASPRINTF 200
+
 #endif /* PRINTF_BUFFER_H */
diff --git a/libio/vasprintf.c b/libio/vasprintf.c
index 4430a266c6..4f69cb61f9 100644
--- a/libio/vasprintf.c
+++ b/libio/vasprintf.c
@@ -24,64 +24,115 @@ 
    This exception applies to code released by its copyright holders
    in files containing the exception.  */
 
-#include <string.h>
+#include <array_length.h>
+#include <errno.h>
+#include <limits.h>
+#include <math_ldbl_opt.h>
+#include <printf.h>
+#include <stdio.h>
 #include <stdlib.h>
-#include <strfile.h>
+#include <string.h>
+#include <printf_buffer.h>
+
+struct __printf_buffer_asprintf
+{
+  /* base.write_base points either to a heap-allocated buffer, or to
+     the direct array below.  */
+  struct __printf_buffer base;
+
+  /* Initial allocation.  200 should be large enough to copy almost
+     all asprintf usages with just a single (final, correctly sized)
+     heap allocation.  */
+  char direct[PRINTF_BUFFER_SIZE_ASPRINTF];
+};
+
+void
+__printf_buffer_flush_asprintf (struct __printf_buffer_asprintf *buf)
+{
+  size_t current_pos = buf->base.write_ptr - buf->base.write_base;
+  if (current_pos >= INT_MAX)
+    {
+      /* The result is not representable.  No need to continue.  */
+      __set_errno (EOVERFLOW);
+      __printf_buffer_mark_failed (&buf->base);
+      return;
+    }
+
+  size_t current_size = buf->base.write_end - buf->base.write_base;
+  /* Implement an exponentiation sizing policy.  Keep the size
+     congruent 8 (mod 16), to account for the footer in glibc
+     malloc.  */
+  size_t new_size = ALIGN_UP (current_size + current_size / 2, 16) | 8;
+  char *new_buffer;
+  if (buf->base.write_base == buf->direct)
+    {
+      new_buffer = malloc (new_size);
+      if (new_buffer == NULL)
+	{
+	  __printf_buffer_mark_failed (&buf->base);
+	  return;
+	}
+      memcpy (new_buffer, buf->direct, current_pos);
+    }
+  else
+    {
+      new_buffer = realloc (buf->base.write_base, new_size);
+      if (new_buffer == NULL)
+	{
+	  __printf_buffer_mark_failed (&buf->base);
+	  return;
+	}
+    }
+
+  /* Set up the new write area.  */
+  buf->base.write_base = new_buffer;
+  buf->base.write_ptr = new_buffer + current_pos;
+  buf->base.write_end = new_buffer + new_size;
+}
+
 
 int
 __vasprintf_internal (char **result_ptr, const char *format, va_list args,
 		      unsigned int mode_flags)
 {
-  /* Initial size of the buffer to be used.  Will be doubled each time an
-     overflow occurs.  */
-  const size_t init_string_size = 100;
-  char *string;
-  _IO_strfile sf;
-  int ret;
-  size_t needed;
-  size_t allocated;
-  /* No need to clear the memory here (unlike for open_memstream) since
-     we know we will never seek on the stream.  */
-  string = (char *) malloc (init_string_size);
-  if (string == NULL)
-    return -1;
-#ifdef _IO_MTSAFE_IO
-  sf._sbf._f._lock = NULL;
-#endif
-  _IO_no_init (&sf._sbf._f, _IO_USER_LOCK, -1, NULL, NULL);
-  _IO_JUMPS (&sf._sbf) = &_IO_str_jumps;
-  _IO_str_init_static_internal (&sf, string, init_string_size, string);
-  sf._sbf._f._flags &= ~_IO_USER_BUF;
-  sf._s._allocate_buffer_unused = (_IO_alloc_type) malloc;
-  sf._s._free_buffer_unused = (_IO_free_type) free;
-  ret = __vfprintf_internal (&sf._sbf._f, format, args, mode_flags);
-  if (ret < 0)
+  struct __printf_buffer_asprintf buf;
+  __printf_buffer_init (&buf.base, buf.direct, array_length (buf.direct),
+			__printf_buffer_mode_asprintf);
+
+  __printf_buffer (&buf.base, format, args, mode_flags);
+  int done = __printf_buffer_done (&buf.base);
+  if (done < 0)
     {
-      free (sf._sbf._f._IO_buf_base);
-      return ret;
+      if (buf.base.write_base != buf.direct)
+	free (buf.base.write_base);
+      return done;
+    }
+
+  /* Transfer to the final buffer.  */
+  char *result;
+  size_t size = buf.base.write_ptr - buf.base.write_base;
+  if (buf.base.write_base == buf.direct)
+    {
+      result = malloc (size + 1);
+      if (result == NULL)
+	return -1;
+      memcpy (result, buf.direct, size);
     }
-  /* Only use realloc if the size we need is of the same (binary)
-     order of magnitude then the memory we allocated.  */
-  needed = sf._sbf._f._IO_write_ptr - sf._sbf._f._IO_write_base + 1;
-  allocated = sf._sbf._f._IO_write_end - sf._sbf._f._IO_write_base;
-  if ((allocated >> 1) <= needed)
-    *result_ptr = (char *) realloc (sf._sbf._f._IO_buf_base, needed);
   else
     {
-      *result_ptr = (char *) malloc (needed);
-      if (*result_ptr != NULL)
+      result = realloc (buf.base.write_base, size + 1);
+      if (result == NULL)
 	{
-	  memcpy (*result_ptr, sf._sbf._f._IO_buf_base, needed - 1);
-	  free (sf._sbf._f._IO_buf_base);
+	  free (buf.base.write_base);
+	  return -1;
 	}
-      else
-	/* We have no choice, use the buffer we already have.  */
-	*result_ptr = (char *) realloc (sf._sbf._f._IO_buf_base, needed);
     }
-  if (*result_ptr == NULL)
-    *result_ptr = sf._sbf._f._IO_buf_base;
-  (*result_ptr)[needed - 1] = '\0';
-  return ret;
+
+  /* Add NUL termination.  */
+  result[size] = '\0';
+  *result_ptr = result;
+
+  return done;
 }
 
 int
diff --git a/stdio-common/printf_buffer_flush.c b/stdio-common/printf_buffer_flush.c
index f44c8b887f..14fe1b2df4 100644
--- a/stdio-common/printf_buffer_flush.c
+++ b/stdio-common/printf_buffer_flush.c
@@ -27,6 +27,7 @@ 
 #ifndef SHARED
 # pragma weak __printf_buffer_flush_snprintf
 # pragma weak __printf_buffer_flush_to_file
+# pragma weak __printf_buffer_flush_asprintf
 # pragma weak __printf_buffer_flush_fp
 # pragma weak __printf_buffer_flush_fp_to_wide
 # pragma weak __printf_buffer_flush_fphex_to_wide
@@ -49,6 +50,9 @@  __printf_buffer_do_flush (struct __printf_buffer *buf)
     case __printf_buffer_mode_to_file:
       __printf_buffer_flush_to_file ((struct __printf_buffer_to_file *) buf);
       return;
+    case __printf_buffer_mode_asprintf:
+      __printf_buffer_flush_asprintf ((struct __printf_buffer_asprintf *) buf);
+      return;
     case __printf_buffer_mode_strfmon:
       __set_errno (E2BIG);
       __printf_buffer_mark_failed (buf);