Avoid redundant shift character in iconv output at block boundary (bug 17197)

Message ID mvmpoy79910.fsf@hawking.suse.de
State New, archived
Headers

Commit Message

Andreas Schwab Dec. 15, 2015, 12:43 p.m. UTC
  This is what I checked in.

Andreas.

	[BZ #17197]
	* iconvdata/ibm930.c (BODY for TO_LOOP): Record current DBCS state
	immediately after emitting SI.
	* iconvdata/ibm933.c (BODY for TO_LOOP): Likewise.
	* iconvdata/ibm935.c (BODY for TO_LOOP): Likewise.
	* iconvdata/ibm937.c (BODY for TO_LOOP): Likewise.
	* iconvdata/ibm939.c (BODY for TO_LOOP): Likewise.
	* iconvdata/bug-iconv10.c: New file.
	* iconvdata/Makefile (tests): Add bug-iconv10.
	($(objpfx)bug-iconv10.out): New rule.
  

Patch

diff --git a/iconvdata/Makefile b/iconvdata/Makefile
index 0cd6518..4d25792 100644
--- a/iconvdata/Makefile
+++ b/iconvdata/Makefile
@@ -67,7 +67,8 @@  modules.so := $(addsuffix .so, $(modules))
 
 ifeq (yes,$(build-shared))
 tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \
-	tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9
+	tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \
+	bug-iconv10
 ifeq ($(have-thread-library),yes)
 tests += bug-iconv3
 endif
@@ -306,6 +307,8 @@  $(objpfx)tst-iconv4.out: $(objpfx)gconv-modules \
 			 $(addprefix $(objpfx),$(modules.so))
 $(objpfx)tst-iconv7.out: $(objpfx)gconv-modules \
 			 $(addprefix $(objpfx),$(modules.so))
+$(objpfx)bug-iconv10.out: $(objpfx)gconv-modules \
+			  $(addprefix $(objpfx),$(modules.so))
 
 $(objpfx)iconv-test.out: run-iconv-test.sh $(objpfx)gconv-modules \
 			 $(addprefix $(objpfx),$(modules.so)) \
diff --git a/iconvdata/bug-iconv10.c b/iconvdata/bug-iconv10.c
new file mode 100644
index 0000000..9a0541e
--- /dev/null
+++ b/iconvdata/bug-iconv10.c
@@ -0,0 +1,94 @@ 
+/* bug 17197: check that iconv doesn't emit invalid extra shift character
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <iconv.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+static int
+do_test (void)
+{
+  static const char *charsets[] =
+    { "IBM930", "IBM933", "IBM935", "IBM937", "IBM939" };
+  static const char *expects[] =
+    { "\016\x44\x4d\017", "\016\x41\x63\017", "\016\x44\x4d\017",
+      "\016\x44\x4d\017", "\016\x44\x4d\017" };
+  int ret = 0;
+
+  for (int i = 0; i < sizeof (charsets) / sizeof (*charsets); i++)
+    {
+      const char *charset = charsets[i];
+      iconv_t cd = iconv_open (charset, "UTF-8");
+      if (cd == (iconv_t) -1)
+	{
+	  printf ("iconv_open failed (%s)\n", charset);
+	  ret = 1;
+	  continue;
+	}
+
+      char input[] = "\xe2\x88\x9e.";
+      const char *expect1 = expects[i];
+      const char expect2[] = "\x4b";
+      size_t input_len = sizeof (input);
+      char output[4];
+      size_t inlen = input_len;
+      size_t outlen = sizeof (output);
+      char *inptr = input;
+      char *outptr = output;
+      /* First round: expect conversion to stop before ".".  */
+      size_t r = iconv (cd, &inptr, &inlen, &outptr, &outlen);
+      if (r != -1
+	  || errno != E2BIG
+	  || inlen != 2
+	  || inptr != input + input_len - 2
+	  || outlen != 0
+	  || memcmp (output, expect1, sizeof (output)) != 0)
+	{
+	  printf ("wrong first conversion (%s)", charset);
+	  ret = 1;
+	  goto do_close;
+	}
+
+      outlen = sizeof (output);
+      outptr = output;
+      r = iconv (cd, &inptr, &inlen, &outptr, &outlen);
+      if (r != 0
+	  || inlen != 0
+	  || outlen != sizeof (output) - sizeof (expect2)
+	  || memcmp (output, expect2, sizeof (expect2)) != 0)
+	{
+	  printf ("wrong second conversion (%s)\n", charset);
+	  ret = 1;
+	}
+
+    do_close:
+      if (iconv_close (cd) != 0)
+	{
+	  printf ("iconv_close failed (%s)\n", charset);
+	  ret = 1;
+	  continue;
+	}
+    }
+  return ret;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
diff --git a/iconvdata/ibm930.c b/iconvdata/ibm930.c
index 535d81f..a3a54c6 100644
--- a/iconvdata/ibm930.c
+++ b/iconvdata/ibm930.c
@@ -261,6 +261,7 @@  enum
 		break;							      \
 	      }								      \
 	    *outptr++ = SI;						      \
+	    curcs = sb;							      \
 	  }								      \
 									      \
 	if (__glibc_unlikely (outptr + 1 > outend))			      \
@@ -274,7 +275,6 @@  enum
 	  *outptr++ = 0x5b;						      \
 	else								      \
 	  *outptr++ = cp[0];						      \
-	curcs = sb;							      \
       }									      \
 									      \
     /* Now that we wrote the output increment the input pointer.  */	      \
diff --git a/iconvdata/ibm933.c b/iconvdata/ibm933.c
index 86096c7..7186950 100644
--- a/iconvdata/ibm933.c
+++ b/iconvdata/ibm933.c
@@ -259,6 +259,7 @@  enum
 		break;							      \
 	      }								      \
 	    *outptr++ = SI;						      \
+	    curcs = sb;							      \
 	  }								      \
 									      \
 	if (__glibc_unlikely (outptr + 1 > outend))			      \
@@ -267,7 +268,6 @@  enum
 	    break;							      \
 	  }								      \
 	*outptr++ = cp[0];						      \
-	curcs = sb;							      \
       }									      \
 									      \
     /* Now that we wrote the output increment the input pointer.  */	      \
diff --git a/iconvdata/ibm935.c b/iconvdata/ibm935.c
index 5af836a..12bc5ad 100644
--- a/iconvdata/ibm935.c
+++ b/iconvdata/ibm935.c
@@ -260,6 +260,7 @@  enum
 		break;							      \
 	      }								      \
 	    *outptr++ = SI;						      \
+	    curcs = sb;							      \
 	  }								      \
 									      \
 	if (__glibc_unlikely (outptr + 1 > outend))			      \
@@ -268,7 +269,6 @@  enum
 	    break;							      \
 	  }								      \
 	*outptr++ = cp[0];						      \
-	curcs = sb;							      \
       }									      \
 									      \
     /* Now that we wrote the output increment the input pointer.  */	      \
diff --git a/iconvdata/ibm937.c b/iconvdata/ibm937.c
index 6fe5b11..8cba11a 100644
--- a/iconvdata/ibm937.c
+++ b/iconvdata/ibm937.c
@@ -260,6 +260,7 @@  enum
 		break;							      \
 	      }								      \
 	    *outptr++ = SI;						      \
+	    curcs = sb;							      \
 	  }								      \
 									      \
 	if (__glibc_unlikely (outptr + 1 > outend))			      \
@@ -268,7 +269,6 @@  enum
 	    break;							      \
 	  }								      \
 	*outptr++ = cp[0];						      \
-	curcs = sb;							      \
       }									      \
 									      \
     /* Now that we wrote the output increment the input pointer.  */	      \
diff --git a/iconvdata/ibm939.c b/iconvdata/ibm939.c
index c4bf3f2..0affd38 100644
--- a/iconvdata/ibm939.c
+++ b/iconvdata/ibm939.c
@@ -260,6 +260,7 @@  enum
 		break;							      \
 	      }								      \
 	    *outptr++ = SI;						      \
+	    curcs = sb;							      \
 	  }								      \
 									      \
 	if (__glibc_unlikely (outptr + 1 > outend))			      \
@@ -273,7 +274,6 @@  enum
 	  *outptr++ = 0xb2;						      \
 	else								      \
 	  *outptr++ = cp[0];						      \
-	curcs = sb;							      \
       }									      \
 									      \
     /* Now that we wrote the output increment the input pointer.  */	      \