From patchwork Thu Jun 30 12:52:13 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Tom Honermann X-Patchwork-Id: 55600 Return-Path: X-Original-To: patchwork@sourceware.org Delivered-To: patchwork@sourceware.org Received: from server2.sourceware.org (localhost [IPv6:::1]) by sourceware.org (Postfix) with ESMTP id 27CB83841454 for ; Thu, 30 Jun 2022 12:53:01 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 27CB83841454 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1656593581; bh=56kqwzzHX3PFZdbnrBhiWSjAfkF6a8A23KMu1gyffY4=; h=To:Subject:Date:In-Reply-To:References:List-Id:List-Unsubscribe: List-Archive:List-Post:List-Help:List-Subscribe:From:Reply-To:Cc: From; b=QIYZVB4Tl46ykF+8nsB8aWVIxN59AbKMfvedJiDbMyOhjGmp1MtqqT4y5y+uOugUt lWq/p196hyVdJAWx5jB0g87m1reSMIaD/4BU3EtPRpBlPZnvaDtniNX29ivltR3rP9 IfOiTHAkZH9GBKC/IPlrowKZXHV2KjLkHTKXPN0A= X-Original-To: libc-alpha@sourceware.org Delivered-To: libc-alpha@sourceware.org Received: from smtp116.iad3b.emailsrvr.com (smtp116.iad3b.emailsrvr.com [146.20.161.116]) by sourceware.org (Postfix) with ESMTPS id CC93F3870871 for ; Thu, 30 Jun 2022 12:52:38 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org CC93F3870871 X-Auth-ID: tom@honermann.net Received: by smtp15.relay.iad3b.emailsrvr.com (Authenticated sender: tom-AT-honermann.net) with ESMTPSA id 61295C00B4; Thu, 30 Jun 2022 08:52:33 -0400 (EDT) To: libc-alpha@sourceware.org Subject: [PATCH v4 1/3] gconv: Correct Big5-HKSCS conversion to preserve all state bits. [BZ #25744] Date: Thu, 30 Jun 2022 08:52:13 -0400 Message-Id: <20220630125215.6052-2-tom@honermann.net> X-Mailer: git-send-email 2.32.0 In-Reply-To: <20220630125215.6052-1-tom@honermann.net> References: <20220630125215.6052-1-tom@honermann.net> MIME-Version: 1.0 X-Classification-ID: bd4e2ac9-54ec-4bf3-bea9-66e0e087bd86-2-1 X-Spam-Status: No, score=-9.3 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, GIT_PATCH_0, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP, T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-Patchwork-Original-From: Tom Honermann via Libc-alpha From: Tom Honermann Reply-To: Tom Honermann Cc: Tom Honermann Errors-To: libc-alpha-bounces+patchwork=sourceware.org@sourceware.org Sender: "Libc-alpha" This patch corrects the Big5-HKSCS converter to preserve the lowest 3 bits of the mbstate_t __count data member when the converter encounters an incomplete multibyte character. This fixes BZ #25744. Reviewed-by: Adhemerval Zanella --- iconvdata/big5hkscs.c | 16 +++--- iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c | 65 +++++++++++++++++++++++ 2 files changed, 73 insertions(+), 8 deletions(-) diff --git a/iconvdata/big5hkscs.c b/iconvdata/big5hkscs.c index a28b18a5ec..d12389b2e3 100644 --- a/iconvdata/big5hkscs.c +++ b/iconvdata/big5hkscs.c @@ -17769,7 +17769,7 @@ static struct the output state to the initial state. This has to be done during the flushing. */ #define EMIT_SHIFT_TO_INIT \ - if (data->__statep->__count != 0) \ + if ((data->__statep->__count >> 3) != 0) \ { \ if (FROM_DIRECTION) \ { \ @@ -17778,7 +17778,7 @@ static struct /* Write out the last character. */ \ *((uint32_t *) outbuf) = data->__statep->__count >> 3; \ outbuf += sizeof (uint32_t); \ - data->__statep->__count = 0; \ + data->__statep->__count &= 7; \ } \ else \ /* We don't have enough room in the output buffer. */ \ @@ -17792,7 +17792,7 @@ static struct uint32_t lasttwo = data->__statep->__count >> 3; \ *outbuf++ = (lasttwo >> 8) & 0xff; \ *outbuf++ = lasttwo & 0xff; \ - data->__statep->__count = 0; \ + data->__statep->__count &= 7; \ } \ else \ /* We don't have enough room in the output buffer. */ \ @@ -17878,7 +17878,7 @@ static struct \ /* Otherwise store only the first character now, and \ put the second one into the queue. */ \ - *statep = ch2 << 3; \ + *statep = (ch2 << 3) | (*statep & 7); \ /* Tell the caller why we terminate the loop. */ \ result = __GCONV_FULL_OUTPUT; \ break; \ @@ -17895,7 +17895,7 @@ static struct } \ else \ /* Clear the queue and proceed to output the saved character. */ \ - *statep = 0; \ + *statep &= 7; \ \ put32 (outptr, ch); \ outptr += 4; \ @@ -17946,7 +17946,7 @@ static struct } \ *outptr++ = (ch >> 8) & 0xff; \ *outptr++ = ch & 0xff; \ - *statep = 0; \ + *statep &= 7; \ inptr += 4; \ continue; \ \ @@ -17959,7 +17959,7 @@ static struct } \ *outptr++ = (lasttwo >> 8) & 0xff; \ *outptr++ = lasttwo & 0xff; \ - *statep = 0; \ + *statep &= 7; \ continue; \ } \ \ @@ -17996,7 +17996,7 @@ static struct /* Check for possible combining character. */ \ if (__glibc_unlikely (ch == 0xca || ch == 0xea)) \ { \ - *statep = ((cp[0] << 8) | cp[1]) << 3; \ + *statep = (((cp[0] << 8) | cp[1]) << 3) | (*statep & 7); \ inptr += 4; \ continue; \ } \ diff --git a/iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c b/iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c index 9601b6c1d9..e1472dc2e2 100644 --- a/iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c +++ b/iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c @@ -128,6 +128,71 @@ check_conversion (struct testdata test) printf ("error: Result of third conversion was wrong.\n"); err++; } + + /* Now perform the same test as above consuming one byte at a time. */ + mbs = test.input; + memset (&st, 0, sizeof (st)); + + /* Consume the first byte; expect an incomplete multibyte character. */ + ret = mbrtowc (&wc, mbs, 1, &st); + if (ret != -2) + { + printf ("error: First byte conversion returned %zd.\n", ret); + err++; + } + /* Advance past the first consumed byte. */ + mbs += 1; + /* Consume the second byte; expect the first wchar_t. */ + ret = mbrtowc (&wc, mbs, 1, &st); + if (ret != 1) + { + printf ("error: Second byte conversion returned %zd.\n", ret); + err++; + } + /* Advance past the second consumed byte. */ + mbs += 1; + if (wc != test.expected[0]) + { + printf ("error: Result of first wchar_t conversion was wrong.\n"); + err++; + } + /* Consume no bytes; expect the second wchar_t. */ + ret = mbrtowc (&wc, mbs, 1, &st); + if (ret != 0) + { + printf ("error: First attempt of third byte conversion returned %zd.\n", ret); + err++; + } + /* Do not advance past the third byte. */ + mbs += 0; + if (wc != test.expected[1]) + { + printf ("error: Result of second wchar_t conversion was wrong.\n"); + err++; + } + /* After the second wchar_t conversion, the converter should be in + the initial state since the two input BIG5-HKSCS bytes have been + consumed and the two wchar_t's have been output. */ + if (mbsinit (&st) == 0) + { + printf ("error: Converter not in initial state.\n"); + err++; + } + /* Consume the third byte; expect the third wchar_t. */ + ret = mbrtowc (&wc, mbs, 1, &st); + if (ret != 1) + { + printf ("error: Third byte conversion returned %zd.\n", ret); + err++; + } + /* Advance past the third consumed byte. */ + mbs += 1; + if (wc != test.expected[2]) + { + printf ("error: Result of third wchar_t conversion was wrong.\n"); + err++; + } + /* Return 0 if we saw no errors. */ return err; }