[RFC/PoC,3/4] getdelim: Introduce getdelim_append

Message ID 1428928117-8643-4-git-send-email-rv@rasmusvillemoes.dk
State New, archived
Headers

Commit Message

Rasmus Villemoes April 13, 2015, 12:28 p.m. UTC
  If getdelim fails (e.g. due to ENOMEM), the contents which have
already been read from the stream and copied to the output buffer are
effectively lost: the bytes exist in the output buffer and *n
faithfully reflects the allocated size of that, but the caller has no
way of knowing how many actually came from the stream, and how many
might be random malloc/realloc junk.

This means that there is no way for the application to try to free
some memory and retry the getdelim call or falling back to some other
method (e.g. a slow getc loop). One way to solve this is to introduce
getdelim_append, which has an extra in/out parameter allowing the
caller to indicate the initial offset in the buffer to start writing
at. getdelim_append updates this parameter every time content is
copied to the output buffer.

There are other use cases apart from allowing the application to try
to recover from an error. For example, one can imagine reading a file
format where a set of header lines is delimited by a blank
line. Reading the entire header can then be done without maintaining
both a line buffer and a final buffer, copying from one to the other:

  char *buf = NULL;
  size_t cap = 0, len = 0, old_len;
  ssize_t ret;
  do {
    old_len = len;
    ret = getdelim_append(&buf, &cap, '\n', f, &len);
  } while (ret > 0 && len-old_len > 1);

(this could probably just be ret > 1, but in more complicated
situations one could use old_len with ret and/or len to inspect the
last record read).

Signed-off-by: Rasmus Villemoes <rv@rasmusvillemoes.dk>
---
 libio/iogetdelim.c | 30 +++++++++++++++++++++++-------
 libio/libioP.h     |  1 +
 2 files changed, 24 insertions(+), 7 deletions(-)
  

Patch

diff --git a/libio/iogetdelim.c b/libio/iogetdelim.c
index eeda0eb..1d20594 100644
--- a/libio/iogetdelim.c
+++ b/libio/iogetdelim.c
@@ -37,14 +37,14 @@ 
    null terminator), or -1 on error or EOF.  */
 
 _IO_ssize_t
-_IO_getdelim (lineptr, n, delimiter, fp)
+_IO_getdelim_append (lineptr, n, delimiter, fp, cur_len)
      char **lineptr;
      _IO_size_t *n;
      int delimiter;
      _IO_FILE *fp;
+     _IO_size_t *cur_len;
 {
   _IO_ssize_t result = 0;
-  _IO_size_t cur_len = 0;
   _IO_ssize_t len;
 
   if (lineptr == NULL || n == NULL)
@@ -89,7 +89,7 @@  _IO_getdelim (lineptr, n, delimiter, fp)
       t = (char *) memchr ((void *) fp->_IO_read_ptr, delimiter, len);
       if (t != NULL)
 	len = (t - fp->_IO_read_ptr) + 1;
-      if (__glibc_unlikely (len >= SIZE_MAX - cur_len) ||
+      if (__glibc_unlikely (len >= SIZE_MAX - *cur_len) ||
 	  __glibc_unlikely (len >= SSIZE_MAX - result))
 	{
 	  __set_errno (EOVERFLOW);
@@ -97,7 +97,7 @@  _IO_getdelim (lineptr, n, delimiter, fp)
 	  goto unlock_return;
 	}
       /* Make enough space for len+1 (for final NUL) bytes.  */
-      needed = cur_len + len + 1;
+      needed = *cur_len + len + 1;
       if (needed > *n)
 	{
 	  char *new_lineptr;
@@ -113,15 +113,15 @@  _IO_getdelim (lineptr, n, delimiter, fp)
 	  *lineptr = new_lineptr;
 	  *n = needed;
 	}
-      memcpy (*lineptr + cur_len, (void *) fp->_IO_read_ptr, len);
+      memcpy (*lineptr + *cur_len, (void *) fp->_IO_read_ptr, len);
       fp->_IO_read_ptr += len;
-      cur_len += len;
+      *cur_len += len;
       result += len;
       if (t != NULL || __underflow (fp) == EOF)
 	break;
       len = fp->_IO_read_end - fp->_IO_read_ptr;
     }
-  (*lineptr)[cur_len] = '\0';
+  (*lineptr)[*cur_len] = '\0';
 
 unlock_return:
   _IO_release_lock (fp);
@@ -129,6 +129,22 @@  unlock_return:
 }
 
 #ifdef weak_alias
+weak_alias (_IO_getdelim_append, __getdelim_append)
+weak_alias (_IO_getdelim_append, getdelim_append)
+#endif
+
+_IO_ssize_t
+_IO_getdelim (lineptr, n, delimiter, fp)
+     char **lineptr;
+     _IO_size_t *n;
+     int delimiter;
+     _IO_FILE *fp;
+{
+	_IO_size_t offset = 0;
+	return _IO_getdelim_append (lineptr, n, delimiter, fp, &offset);
+}
+
+#ifdef weak_alias
 weak_alias (_IO_getdelim, __getdelim)
 weak_alias (_IO_getdelim, getdelim)
 #endif
diff --git a/libio/libioP.h b/libio/libioP.h
index d8604ca..73f9597 100644
--- a/libio/libioP.h
+++ b/libio/libioP.h
@@ -688,6 +688,7 @@  extern _IO_size_t _IO_getline_info (_IO_FILE *,char *, _IO_size_t,
 				    int, int, int *);
 libc_hidden_proto (_IO_getline_info)
 extern _IO_ssize_t _IO_getdelim (char **, _IO_size_t *, int, _IO_FILE *);
+extern _IO_ssize_t _IO_getdelim_append (char **, _IO_size_t *, int, _IO_FILE *, _IO_size_t *);
 extern _IO_size_t _IO_getwline (_IO_FILE *,wchar_t *, _IO_size_t, wint_t, int);
 extern _IO_size_t _IO_getwline_info (_IO_FILE *,wchar_t *, _IO_size_t,
 				     wint_t, int, wint_t *);