diff mbox

[v2,2/2] iconv_prog: Track if invalid sequences were seen [BZ #18799]

Message ID 1439332066-13664-3-git-send-email-ben@0x539.de
State New, archived
Headers show

Commit Message

Benjamin Herr Aug. 11, 2015, 10:27 p.m. UTC
As far as I can tell, Posix requires that "The presence or absence of -c
shall not affect the exit status of iconv." iconv_prog needs to be more
vigilant to not clobber the variable tracking whether it has skipped an
invalid input sequence.

In addition, it seems reasonable to treat an incomplete input sequence
at the end of the input the same way as an invalid input sequence
elsewhere if -c is passed.
---
2015-08-12  Benjamin Herr  <ben@0x539.de>

	* iconv/iconv_prog.c: Track whether invalid input sequences have been
	encountered, even if they were ignored because of the '-c' flag.
	* iconv/tst-iconv-c-exit.sh: New file. Test for the above.
	* iconv/Makefile (($objpfx)tst-iconv-c-exit.out): New.
	(xtests-special): Add the above.


 iconv/Makefile            |   6 ++-
 iconv/iconv_prog.c        | 108 +++++++++++++++++++++++++++-------------------
 iconv/tst-iconv-c-exit.sh |  27 ++++++++++++
 3 files changed, 95 insertions(+), 46 deletions(-)
 create mode 100644 iconv/tst-iconv-c-exit.sh
diff mbox

Patch

diff --git a/iconv/Makefile b/iconv/Makefile
index 0d55eda..3122b72 100644
--- a/iconv/Makefile
+++ b/iconv/Makefile
@@ -58,7 +58,7 @@  lib := iconvprogs
 include $(patsubst %,$(..)cppflags-iterator.mk,$(cpp-srcs-left))
 
 ifeq ($(run-built-tests),yes)
-xtests-special += $(objpfx)test-iconvconfig.out
+xtests-special += $(objpfx)test-iconvconfig.out $(objpfx)tst-iconv-c-exit.out
 endif
 
 include ../Rules
@@ -77,3 +77,7 @@  $(objpfx)test-iconvconfig.out: /dev/null $(objpfx)iconvconfig
 	 cmp $$tmp $(inst_gconvdir)/gconv-modules.cache; \
 	 rm -f $$tmp) > $@; \
 	$(evaluate-test)
+
+$(objpfx)tst-iconv-c-exit.out: tst-iconv-c-exit.sh $(objpfx)iconv_prog
+	$(SHELL) $^ > $@; \
+	$(evaluate-test)
diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c
index 63b8e33..f11eeed 100644
--- a/iconv/iconv_prog.c
+++ b/iconv/iconv_prog.c
@@ -494,14 +494,33 @@  incomplete character or shift sequence at end of buffer"));
 
 #define BUF_SIZE	32768
 static int
-flush_state (iconv_t cd, FILE **output, const char *output_file, size_t offset)
+flush_state (iconv_t cd, FILE **output, const char *output_file, size_t offset,
+	     size_t len, int invalid_sequence_seen)
 {
   char outbuf[BUF_SIZE];
   char *outptr;
   size_t outlen;
   size_t n;
 
-  /* All the input test is processed.  For state-dependent
+  /* If there is input left over, there is an incomplete multibyte
+     sequence at the end.  */
+  if (len > 0)
+    {
+      /* Incomplete multibyte sequences at the end of the input are not any
+	 more invalid than spurious bytes elsewhere.  */
+      if (omit_invalid)
+	{
+	  invalid_sequence_seen = 1;
+	}
+      else
+	{
+	  errno = EINVAL;
+	  report_iconv_error (offset);
+	  return -1;
+	}
+    }
+
+  /* All the input text is processed.  For state-dependent
      character sets we have to flush the state now.  */
   outptr = outbuf;
   outlen = BUF_SIZE;
@@ -515,11 +534,18 @@  flush_state (iconv_t cd, FILE **output, const char *output_file, size_t offset)
 
   if (n == (size_t) -1)
     {
-      report_iconv_error (offset);
-      return 1;
+      if (!omit_invalid || (errno != EINVAL && errno != EILSEQ))
+	{
+	  report_iconv_error (offset);
+	  return -1;
+	}
+      else
+	{
+	  invalid_sequence_seen = 1;
+	}
     }
 
-  return 0;
+  return invalid_sequence_seen;
 }
 
 static size_t
@@ -540,36 +566,39 @@  process_part (iconv_t cd, char **addr, size_t *len, size_t offset,
   char *outptr;
   size_t outlen;
   size_t n;
-  int ret = 0;
+  int invalid_sequence_seen = 0;
 
-  while (*len > 0)
+  for (;;)
     {
       outptr = outbuf;
       outlen = BUF_SIZE;
       n = iconv (cd, addr, len, &outptr, &outlen);
 
-      if (n == (size_t) -1 && omit_invalid && errno == EILSEQ)
-	{
-	  ret = 1;
-	  if (*len == 0)
-	    n = 0;
-	  else
-	    errno = E2BIG;
-	}
-
       if (outptr != outbuf)
 	{
-	  ret = write_output (outbuf, outptr, output, output_file);
+	  int ret = write_output (outbuf, outptr, output, output_file);
 	  if (ret != 0)
-	    break;
+	    return ret;
 	}
 
+      /* Done with the input buffer.  */
+      if (n != (size_t) -1)
+	break;
+
       /* Incomplete multibyte characters might be completed by the next
          chunk, so do not treat them as an error here. */
-      if (n != (size_t) -1 || errno == EINVAL)
+      if (errno == EINVAL)
+	break;
+
+      if (omit_invalid && errno == EILSEQ)
 	{
-	  ret = 0;
-	  break;
+	  /* Remember that we saw an invalid character for the sake of our
+	     exit status, but otherwise carry on.  */
+	  invalid_sequence_seen = 1;
+	  if (*len == 0)
+	    break;
+	  else
+	    errno = E2BIG;
 	}
 
       if (errno != E2BIG)
@@ -581,7 +610,7 @@  process_part (iconv_t cd, char **addr, size_t *len, size_t offset,
 	}
     }
 
-  return ret;
+  return invalid_sequence_seen;
 }
 
 static int
@@ -593,19 +622,10 @@  process_block (iconv_t cd, char *addr, size_t len, FILE **output,
   /* Process everything in one go.  */
   int ret = process_part (cd, &addr, &len, 0, output, output_file);
 
-  if (ret != 0)
+  if (ret < 0)
     return ret;
 
-  /* If there is input left over, there is an incomplete multibyte
-     sequence at the end.  */
-  if (len > 0)
-    {
-      errno = EINVAL;
-      report_iconv_error (addr - start);
-      return -1;
-    }
-
-  return flush_state (cd, output, output_file, addr - start);
+  return flush_state (cd, output, output_file, addr - start, len, ret);
 }
 
 
@@ -616,11 +636,12 @@  process_fd (iconv_t cd, int fd, FILE **output, const char *output_file)
   size_t len = 0;
   size_t offset = 0;
   ssize_t n;
+  int ret;
+  int invalid_sequence_seen = 0;
 
   /* Read into the buffer past unconsumed bytes from the last iteration.  */
   while ((n = read (fd, inbuf + len, BUF_SIZE - len)) > 0)
     {
-      int ret;
       char *inptr;
 
       len += n;
@@ -628,8 +649,13 @@  process_fd (iconv_t cd, int fd, FILE **output, const char *output_file)
       inptr = inbuf;
       /* Process what we have read.  */
       ret = process_part (cd, &inptr, &len, offset, output, output_file);
-      if (ret != 0)
+      if (ret < 0)
 	return ret;
+
+      /* Remember this for the sake of our exit status, but carry on.  */
+      if (ret > 0)
+	invalid_sequence_seen = ret;
+
       /* Keep track of overall position in the input for error reporting.  */
       offset = saturating_add (offset, inptr - inbuf);
 
@@ -644,16 +670,8 @@  process_fd (iconv_t cd, int fd, FILE **output, const char *output_file)
       return -1;
     }
 
-  /* No more input available, so incomplete multibyte sequences are
-     not going to get completed at this point. */
-  if (len > 0)
-    {
-      errno = EINVAL;
-      report_iconv_error (offset);
-      return -1;
-    }
-
-  return flush_state (cd, output, output_file, offset);
+  return flush_state (cd, output, output_file, offset, len,
+                      invalid_sequence_seen);
 }
 
 
diff --git a/iconv/tst-iconv-c-exit.sh b/iconv/tst-iconv-c-exit.sh
new file mode 100644
index 0000000..0b8cc65
--- /dev/null
+++ b/iconv/tst-iconv-c-exit.sh
@@ -0,0 +1,27 @@ 
+#! /bin/sh
+# Test that iconv_prog -c exit status reports failure given invalid input.
+# Copyright (C) 2015 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+
+iconv_prog=$1
+
+for input_str in 'OK \377' '\377 OK' '\377'
+do
+  printf "$input_str" | "$iconv_prog" -c -f ascii > /dev/null && exit 1
+done
+
+exit 0