From patchwork Tue Dec 15 12:43:55 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andreas Schwab X-Patchwork-Id: 10015 Received: (qmail 41095 invoked by alias); 15 Dec 2015 12:44:01 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 41082 invoked by uid 89); 15 Dec 2015 12:44:00 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.8 required=5.0 tests=AWL, BAYES_50, RCVD_IN_DNSWL_LOW, SPF_PASS, T_RP_MATCHES_RCVD autolearn=ham version=3.3.2 X-HELO: mx2.suse.de From: Andreas Schwab To: Florian Weimer Cc: Martin Sebor , libc-alpha@sourceware.org Subject: Re: [PATCH] Avoid redundant shift character in iconv output at block boundary (bug 17197) References: <5668C219.8070606@gmail.com> <5669C008.7050902@gmail.com> <566EECEE.5050705@gmail.com> <566F0224.7020004@redhat.com> <566FE259.1030403@redhat.com> X-Yow: World War Three can be averted by adherence to a strictly enforced dress code! Date: Tue, 15 Dec 2015 13:43:55 +0100 In-Reply-To: <566FE259.1030403@redhat.com> (Florian Weimer's message of "Tue, 15 Dec 2015 10:50:17 +0100") Message-ID: User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/24.5 (gnu/linux) MIME-Version: 1.0 This is what I checked in. Andreas. [BZ #17197] * iconvdata/ibm930.c (BODY for TO_LOOP): Record current DBCS state immediately after emitting SI. * iconvdata/ibm933.c (BODY for TO_LOOP): Likewise. * iconvdata/ibm935.c (BODY for TO_LOOP): Likewise. * iconvdata/ibm937.c (BODY for TO_LOOP): Likewise. * iconvdata/ibm939.c (BODY for TO_LOOP): Likewise. * iconvdata/bug-iconv10.c: New file. * iconvdata/Makefile (tests): Add bug-iconv10. ($(objpfx)bug-iconv10.out): New rule. diff --git a/iconvdata/Makefile b/iconvdata/Makefile index 0cd6518..4d25792 100644 --- a/iconvdata/Makefile +++ b/iconvdata/Makefile @@ -67,7 +67,8 @@ modules.so := $(addsuffix .so, $(modules)) ifeq (yes,$(build-shared)) tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \ - tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 + tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \ + bug-iconv10 ifeq ($(have-thread-library),yes) tests += bug-iconv3 endif @@ -306,6 +307,8 @@ $(objpfx)tst-iconv4.out: $(objpfx)gconv-modules \ $(addprefix $(objpfx),$(modules.so)) $(objpfx)tst-iconv7.out: $(objpfx)gconv-modules \ $(addprefix $(objpfx),$(modules.so)) +$(objpfx)bug-iconv10.out: $(objpfx)gconv-modules \ + $(addprefix $(objpfx),$(modules.so)) $(objpfx)iconv-test.out: run-iconv-test.sh $(objpfx)gconv-modules \ $(addprefix $(objpfx),$(modules.so)) \ diff --git a/iconvdata/bug-iconv10.c b/iconvdata/bug-iconv10.c new file mode 100644 index 0000000..9a0541e --- /dev/null +++ b/iconvdata/bug-iconv10.c @@ -0,0 +1,94 @@ +/* bug 17197: check that iconv doesn't emit invalid extra shift character + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include + +static int +do_test (void) +{ + static const char *charsets[] = + { "IBM930", "IBM933", "IBM935", "IBM937", "IBM939" }; + static const char *expects[] = + { "\016\x44\x4d\017", "\016\x41\x63\017", "\016\x44\x4d\017", + "\016\x44\x4d\017", "\016\x44\x4d\017" }; + int ret = 0; + + for (int i = 0; i < sizeof (charsets) / sizeof (*charsets); i++) + { + const char *charset = charsets[i]; + iconv_t cd = iconv_open (charset, "UTF-8"); + if (cd == (iconv_t) -1) + { + printf ("iconv_open failed (%s)\n", charset); + ret = 1; + continue; + } + + char input[] = "\xe2\x88\x9e."; + const char *expect1 = expects[i]; + const char expect2[] = "\x4b"; + size_t input_len = sizeof (input); + char output[4]; + size_t inlen = input_len; + size_t outlen = sizeof (output); + char *inptr = input; + char *outptr = output; + /* First round: expect conversion to stop before ".". */ + size_t r = iconv (cd, &inptr, &inlen, &outptr, &outlen); + if (r != -1 + || errno != E2BIG + || inlen != 2 + || inptr != input + input_len - 2 + || outlen != 0 + || memcmp (output, expect1, sizeof (output)) != 0) + { + printf ("wrong first conversion (%s)", charset); + ret = 1; + goto do_close; + } + + outlen = sizeof (output); + outptr = output; + r = iconv (cd, &inptr, &inlen, &outptr, &outlen); + if (r != 0 + || inlen != 0 + || outlen != sizeof (output) - sizeof (expect2) + || memcmp (output, expect2, sizeof (expect2)) != 0) + { + printf ("wrong second conversion (%s)\n", charset); + ret = 1; + } + + do_close: + if (iconv_close (cd) != 0) + { + printf ("iconv_close failed (%s)\n", charset); + ret = 1; + continue; + } + } + return ret; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/iconvdata/ibm930.c b/iconvdata/ibm930.c index 535d81f..a3a54c6 100644 --- a/iconvdata/ibm930.c +++ b/iconvdata/ibm930.c @@ -261,6 +261,7 @@ enum break; \ } \ *outptr++ = SI; \ + curcs = sb; \ } \ \ if (__glibc_unlikely (outptr + 1 > outend)) \ @@ -274,7 +275,6 @@ enum *outptr++ = 0x5b; \ else \ *outptr++ = cp[0]; \ - curcs = sb; \ } \ \ /* Now that we wrote the output increment the input pointer. */ \ diff --git a/iconvdata/ibm933.c b/iconvdata/ibm933.c index 86096c7..7186950 100644 --- a/iconvdata/ibm933.c +++ b/iconvdata/ibm933.c @@ -259,6 +259,7 @@ enum break; \ } \ *outptr++ = SI; \ + curcs = sb; \ } \ \ if (__glibc_unlikely (outptr + 1 > outend)) \ @@ -267,7 +268,6 @@ enum break; \ } \ *outptr++ = cp[0]; \ - curcs = sb; \ } \ \ /* Now that we wrote the output increment the input pointer. */ \ diff --git a/iconvdata/ibm935.c b/iconvdata/ibm935.c index 5af836a..12bc5ad 100644 --- a/iconvdata/ibm935.c +++ b/iconvdata/ibm935.c @@ -260,6 +260,7 @@ enum break; \ } \ *outptr++ = SI; \ + curcs = sb; \ } \ \ if (__glibc_unlikely (outptr + 1 > outend)) \ @@ -268,7 +269,6 @@ enum break; \ } \ *outptr++ = cp[0]; \ - curcs = sb; \ } \ \ /* Now that we wrote the output increment the input pointer. */ \ diff --git a/iconvdata/ibm937.c b/iconvdata/ibm937.c index 6fe5b11..8cba11a 100644 --- a/iconvdata/ibm937.c +++ b/iconvdata/ibm937.c @@ -260,6 +260,7 @@ enum break; \ } \ *outptr++ = SI; \ + curcs = sb; \ } \ \ if (__glibc_unlikely (outptr + 1 > outend)) \ @@ -268,7 +269,6 @@ enum break; \ } \ *outptr++ = cp[0]; \ - curcs = sb; \ } \ \ /* Now that we wrote the output increment the input pointer. */ \ diff --git a/iconvdata/ibm939.c b/iconvdata/ibm939.c index c4bf3f2..0affd38 100644 --- a/iconvdata/ibm939.c +++ b/iconvdata/ibm939.c @@ -260,6 +260,7 @@ enum break; \ } \ *outptr++ = SI; \ + curcs = sb; \ } \ \ if (__glibc_unlikely (outptr + 1 > outend)) \ @@ -273,7 +274,6 @@ enum *outptr++ = 0xb2; \ else \ *outptr++ = cp[0]; \ - curcs = sb; \ } \ \ /* Now that we wrote the output increment the input pointer. */ \