gconv: Fix assertion failure in ISO-2022-JP-3 module (bug 27256)

Message ID 875z3i5zao.fsf@oldenburg.str.redhat.com
State Committed
Commit 7d88c6142c6efc160c0ee5e4f85cde382c072888
Headers
Series gconv: Fix assertion failure in ISO-2022-JP-3 module (bug 27256) |

Commit Message

Florian Weimer Jan. 27, 2021, 12:39 p.m. UTC
  The conversion loop to the internal encoding does not follow
the interface contract that __GCONV_FULL_OUTPUT is only returned
after the internal wchar_t buffer has been filled completely.  This
is enforced by the first of the two asserts in iconv/skeleton.c:

	      /* We must run out of output buffer space in this
		 rerun.  */
	      assert (outbuf == outerr);
	      assert (nstatus == __GCONV_FULL_OUTPUT);

This commit solves this issue by queuing a second wide character
which cannot be written immediately in the state variable, like
other converters already do (e.g., BIG5-HKSCS or TSCII).

Reported-by: Tavis Ormandy <taviso@gmail.com>

---
Thanks to Andreas Schwab and Bruno Haible for off-list review.  We
decided that no embargo was needed.

 iconvdata/Makefile        |   4 +-
 iconvdata/bug-iconv14.c   | 127 ++++++++++++++++++++++++++++++++++++++++++++++
 iconvdata/iso-2022-jp-3.c |  67 +++++++++++++++++-------
 3 files changed, 178 insertions(+), 20 deletions(-)
  

Comments

Adhemerval Zanella Netto Jan. 27, 2021, 12:47 p.m. UTC | #1
On 27/01/2021 09:39, Florian Weimer wrote:
> The conversion loop to the internal encoding does not follow
> the interface contract that __GCONV_FULL_OUTPUT is only returned
> after the internal wchar_t buffer has been filled completely.  This
> is enforced by the first of the two asserts in iconv/skeleton.c:
> 
> 	      /* We must run out of output buffer space in this
> 		 rerun.  */
> 	      assert (outbuf == outerr);
> 	      assert (nstatus == __GCONV_FULL_OUTPUT);
> 
> This commit solves this issue by queuing a second wide character
> which cannot be written immediately in the state variable, like
> other converters already do (e.g., BIG5-HKSCS or TSCII).
> 
> Reported-by: Tavis Ormandy <taviso@gmail.com>

This is ok for 2.33, since we discussed it off-line and Andreas and
Bruno both acked this change.

> 
> ---
> Thanks to Andreas Schwab and Bruno Haible for off-list review.  We
> decided that no embargo was needed.
> 
>  iconvdata/Makefile        |   4 +-
>  iconvdata/bug-iconv14.c   | 127 ++++++++++++++++++++++++++++++++++++++++++++++
>  iconvdata/iso-2022-jp-3.c |  67 +++++++++++++++++-------
>  3 files changed, 178 insertions(+), 20 deletions(-)
> 
> diff --git a/iconvdata/Makefile b/iconvdata/Makefile
> index c8c532a3e4..55c527a5f7 100644
> --- a/iconvdata/Makefile
> +++ b/iconvdata/Makefile
> @@ -74,7 +74,7 @@ ifeq (yes,$(build-shared))
>  tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \
>  	tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \
>  	bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 \
> -	bug-iconv13
> +	bug-iconv13 bug-iconv14
>  ifeq ($(have-thread-library),yes)
>  tests += bug-iconv3
>  endif
> @@ -322,6 +322,8 @@ $(objpfx)bug-iconv10.out: $(objpfx)gconv-modules \
>  			  $(addprefix $(objpfx),$(modules.so))
>  $(objpfx)bug-iconv12.out: $(objpfx)gconv-modules \
>  			  $(addprefix $(objpfx),$(modules.so))
> +$(objpfx)bug-iconv14.out: $(objpfx)gconv-modules \
> +			  $(addprefix $(objpfx),$(modules.so))
>  
>  $(objpfx)iconv-test.out: run-iconv-test.sh $(objpfx)gconv-modules \
>  			 $(addprefix $(objpfx),$(modules.so)) \
> diff --git a/iconvdata/bug-iconv14.c b/iconvdata/bug-iconv14.c
> new file mode 100644
> index 0000000000..902f140fa9
> --- /dev/null
> +++ b/iconvdata/bug-iconv14.c
> @@ -0,0 +1,127 @@
> +/* Assertion in ISO-2022-JP-3 due to two-character sequence (bug 27256).
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <iconv.h>
> +#include <string.h>
> +#include <errno.h>
> +#include <support/check.h>
> +
> +/* Use an escape sequence to return to the initial state.  */
> +static void
> +with_escape_sequence (void)
> +{
> +  iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3");
> +  TEST_VERIFY_EXIT (c != (iconv_t) -1);
> +
> +  char in[] = "\e$(O+D\e(B";
> +  char *inbuf = in;
> +  size_t inleft = strlen (in);
> +  char out[3];                  /* Space for one output character.  */
> +  char *outbuf;
> +  size_t outleft;
> +
> +  outbuf = out;
> +  outleft = sizeof (out);
> +  TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1);
> +  TEST_COMPARE (errno, E2BIG);
> +  TEST_COMPARE (inleft, 3);
> +  TEST_COMPARE (inbuf - in, strlen (in) - 3);
> +  TEST_COMPARE (outleft, sizeof (out) - 2);
> +  TEST_COMPARE (outbuf - out, 2);
> +  TEST_COMPARE (out[0] & 0xff, 0xc3);
> +  TEST_COMPARE (out[1] & 0xff, 0xa6);
> +
> +  /* Return to the initial shift state, producing the pending
> +     character.  */
> +  outbuf = out;
> +  outleft = sizeof (out);
> +  TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), 0);
> +  TEST_COMPARE (inleft, 0);
> +  TEST_COMPARE (inbuf - in, strlen (in));
> +  TEST_COMPARE (outleft, sizeof (out) - 2);
> +  TEST_COMPARE (outbuf - out, 2);
> +  TEST_COMPARE (out[0] & 0xff, 0xcc);
> +  TEST_COMPARE (out[1] & 0xff, 0x80);
> +
> +  /* Nothing should be flushed the second time.  */
> +  outbuf = out;
> +  outleft = sizeof (out);
> +  TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
> +  TEST_COMPARE (outleft, sizeof (out));
> +  TEST_COMPARE (outbuf - out, 0);
> +  TEST_COMPARE (out[0] & 0xff, 0xcc);
> +  TEST_COMPARE (out[1] & 0xff, 0x80);
> +
> +  TEST_COMPARE (iconv_close (c), 0);
> +}
> +
> +/* Use an explicit flush to return to the initial state.  */
> +static void
> +with_flush (void)
> +{
> +  iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3");
> +  TEST_VERIFY_EXIT (c != (iconv_t) -1);
> +
> +  char in[] = "\e$(O+D";
> +  char *inbuf = in;
> +  size_t inleft = strlen (in);
> +  char out[3];                  /* Space for one output character.  */
> +  char *outbuf;
> +  size_t outleft;
> +
> +  outbuf = out;
> +  outleft = sizeof (out);
> +  TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1);
> +  TEST_COMPARE (errno, E2BIG);
> +  TEST_COMPARE (inleft, 0);
> +  TEST_COMPARE (inbuf - in, strlen (in));
> +  TEST_COMPARE (outleft, sizeof (out) - 2);
> +  TEST_COMPARE (outbuf - out, 2);
> +  TEST_COMPARE (out[0] & 0xff, 0xc3);
> +  TEST_COMPARE (out[1] & 0xff, 0xa6);
> +
> +  /* Flush the pending character.  */
> +  outbuf = out;
> +  outleft = sizeof (out);
> +  TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
> +  TEST_COMPARE (outleft, sizeof (out) - 2);
> +  TEST_COMPARE (outbuf - out, 2);
> +  TEST_COMPARE (out[0] & 0xff, 0xcc);
> +  TEST_COMPARE (out[1] & 0xff, 0x80);
> +
> +  /* Nothing should be flushed the second time.  */
> +  outbuf = out;
> +  outleft = sizeof (out);
> +  TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
> +  TEST_COMPARE (outleft, sizeof (out));
> +  TEST_COMPARE (outbuf - out, 0);
> +  TEST_COMPARE (out[0] & 0xff, 0xcc);
> +  TEST_COMPARE (out[1] & 0xff, 0x80);
> +
> +  TEST_COMPARE (iconv_close (c), 0);
> +}
> +
> +static int
> +do_test (void)
> +{
> +  with_escape_sequence ();
> +  with_flush ();
> +  return 0;
> +}
> +
> +#include <support/test-driver.c>
> diff --git a/iconvdata/iso-2022-jp-3.c b/iconvdata/iso-2022-jp-3.c
> index 3eaa847ad9..c8ba88cdc9 100644
> --- a/iconvdata/iso-2022-jp-3.c
> +++ b/iconvdata/iso-2022-jp-3.c
> @@ -67,23 +67,34 @@ enum
>    CURRENT_SEL_MASK = 7 << 3
>  };
>  
> -/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the state
> -   also contains the last two bytes to be output, shifted by 6 bits, and a
> -   one-bit indicator whether they must be preceded by the shift sequence,
> -   in bit 22.  */
> +/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the
> +   state also contains the last two bytes to be output, shifted by 6
> +   bits, and a one-bit indicator whether they must be preceded by the
> +   shift sequence, in bit 22.  During ISO-2022-JP-3 to UCS-4
> +   conversion, COUNT may also contain a non-zero pending wide
> +   character, shifted by six bits.  This happens for certain inputs in
> +   JISX0213_1_2004_set and JISX0213_2_set if the second wide character
> +   in a combining sequence cannot be written because the buffer is
> +   full.  */
>  
>  /* Since this is a stateful encoding we have to provide code which resets
>     the output state to the initial state.  This has to be done during the
>     flushing.  */
>  #define EMIT_SHIFT_TO_INIT \
> -  if ((data->__statep->__count & ~7) != ASCII_set)			      \
> +  if (data->__statep->__count != ASCII_set)			      \
>      {									      \
>        if (FROM_DIRECTION)						      \
>  	{								      \
> -	  /* It's easy, we don't have to emit anything, we just reset the     \
> -	     state for the input.  */					      \
> -	  data->__statep->__count &= 7;					      \
> -	  data->__statep->__count |= ASCII_set;				      \
> +	  if (__glibc_likely (outbuf + 4 <= outend))			      \
> +	    {								      \
> +	      /* Write out the last character.  */			      \
> +	      *((uint32_t *) outbuf) = data->__statep->__count >> 6;	      \
> +	      outbuf += sizeof (uint32_t);				      \
> +	      data->__statep->__count = ASCII_set;			\
> +	    }								      \
> +	  else								      \
> +	    /* We don't have enough room in the output buffer.  */	      \
> +	    status = __GCONV_FULL_OUTPUT;				      \
>  	}								      \
>        else								      \
>  	{								      \
> @@ -151,7 +162,21 @@ enum
>  #define LOOPFCT			FROM_LOOP
>  #define BODY \
>    {									      \
> -    uint32_t ch = *inptr;						      \
> +    uint32_t ch;							      \
> +									      \
> +    /* Output any pending character.  */				      \
> +    ch = set >> 6;							      \
> +    if (__glibc_unlikely (ch != 0))					      \
> +      {									      \
> +	put32 (outptr, ch);						      \
> +	outptr += 4;							      \
> +	/* Remove the pending character, but preserve state bits.  */	      \
> +	set &= (1 << 6) - 1;						      \
> +	continue;							      \
> +      }									      \
> +									      \
> +    /* Otherwise read the next input byte.  */				      \
> +    ch = *inptr;							      \
>  									      \
>      /* Recognize escape sequences.  */					      \
>      if (__glibc_unlikely (ch == ESC))					      \
> @@ -297,21 +322,25 @@ enum
>  	    uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0];	      \
>  	    uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1];	      \
>  									      \
> +	    inptr += 2;							      \
> +									      \
> +	    put32 (outptr, u1);						      \
> +	    outptr += 4;						      \
> +									      \
>  	    /* See whether we have room for two characters.  */		      \
> -	    if (outptr + 8 <= outend)					      \
> +	    if (outptr + 4 <= outend)					      \
>  	      {								      \
> -		inptr += 2;						      \
> -		put32 (outptr, u1);					      \
> -		outptr += 4;						      \
>  		put32 (outptr, u2);					      \
>  		outptr += 4;						      \
>  		continue;						      \
>  	      }								      \
> -	    else							      \
> -	      {								      \
> -		result = __GCONV_FULL_OUTPUT;				      \
> -		break;							      \
> -	      }								      \
> +									      \
> +	    /* Otherwise store only the first character now, and	      \
> +	       put the second one into the queue.  */			      \
> +	    set |= u2 << 6;						      \
> +	    /* Tell the caller why we terminate the loop.  */		      \
> +	    result = __GCONV_FULL_OUTPUT;				      \
> +	    break;							      \
>  	  }								      \
>  									      \
>  	inptr += 2;							      \
>
  
Andreas Schwab Jan. 27, 2021, 1:30 p.m. UTC | #2
On Jan 27 2021, Florian Weimer via Libc-alpha wrote:

> diff --git a/iconvdata/iso-2022-jp-3.c b/iconvdata/iso-2022-jp-3.c
> index 3eaa847ad9..c8ba88cdc9 100644
> --- a/iconvdata/iso-2022-jp-3.c
> +++ b/iconvdata/iso-2022-jp-3.c
> @@ -67,23 +67,34 @@ enum
>    CURRENT_SEL_MASK = 7 << 3
>  };
>  
> -/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the state
> -   also contains the last two bytes to be output, shifted by 6 bits, and a
> -   one-bit indicator whether they must be preceded by the shift sequence,
> -   in bit 22.  */
> +/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the
> +   state also contains the last two bytes to be output, shifted by 6
> +   bits, and a one-bit indicator whether they must be preceded by the
> +   shift sequence, in bit 22.  During ISO-2022-JP-3 to UCS-4
> +   conversion, COUNT may also contain a non-zero pending wide
> +   character, shifted by six bits.  This happens for certain inputs in
> +   JISX0213_1_2004_set and JISX0213_2_set if the second wide character
> +   in a combining sequence cannot be written because the buffer is
> +   full.  */
>  
>  /* Since this is a stateful encoding we have to provide code which resets
>     the output state to the initial state.  This has to be done during the
>     flushing.  */
>  #define EMIT_SHIFT_TO_INIT \
> -  if ((data->__statep->__count & ~7) != ASCII_set)			      \
> +  if (data->__statep->__count != ASCII_set)			      \
>      {									      \
>        if (FROM_DIRECTION)						      \
>  	{								      \
> -	  /* It's easy, we don't have to emit anything, we just reset the     \
> -	     state for the input.  */					      \
> -	  data->__statep->__count &= 7;					      \
> -	  data->__statep->__count |= ASCII_set;				      \
> +	  if (__glibc_likely (outbuf + 4 <= outend))			      \
> +	    {								      \
> +	      /* Write out the last character.  */			      \
> +	      *((uint32_t *) outbuf) = data->__statep->__count >> 6;	      \

This should consistently use put32.

Andreas.
  

Patch

diff --git a/iconvdata/Makefile b/iconvdata/Makefile
index c8c532a3e4..55c527a5f7 100644
--- a/iconvdata/Makefile
+++ b/iconvdata/Makefile
@@ -74,7 +74,7 @@  ifeq (yes,$(build-shared))
 tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \
 	tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \
 	bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 \
-	bug-iconv13
+	bug-iconv13 bug-iconv14
 ifeq ($(have-thread-library),yes)
 tests += bug-iconv3
 endif
@@ -322,6 +322,8 @@  $(objpfx)bug-iconv10.out: $(objpfx)gconv-modules \
 			  $(addprefix $(objpfx),$(modules.so))
 $(objpfx)bug-iconv12.out: $(objpfx)gconv-modules \
 			  $(addprefix $(objpfx),$(modules.so))
+$(objpfx)bug-iconv14.out: $(objpfx)gconv-modules \
+			  $(addprefix $(objpfx),$(modules.so))
 
 $(objpfx)iconv-test.out: run-iconv-test.sh $(objpfx)gconv-modules \
 			 $(addprefix $(objpfx),$(modules.so)) \
diff --git a/iconvdata/bug-iconv14.c b/iconvdata/bug-iconv14.c
new file mode 100644
index 0000000000..902f140fa9
--- /dev/null
+++ b/iconvdata/bug-iconv14.c
@@ -0,0 +1,127 @@ 
+/* Assertion in ISO-2022-JP-3 due to two-character sequence (bug 27256).
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <iconv.h>
+#include <string.h>
+#include <errno.h>
+#include <support/check.h>
+
+/* Use an escape sequence to return to the initial state.  */
+static void
+with_escape_sequence (void)
+{
+  iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3");
+  TEST_VERIFY_EXIT (c != (iconv_t) -1);
+
+  char in[] = "\e$(O+D\e(B";
+  char *inbuf = in;
+  size_t inleft = strlen (in);
+  char out[3];                  /* Space for one output character.  */
+  char *outbuf;
+  size_t outleft;
+
+  outbuf = out;
+  outleft = sizeof (out);
+  TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1);
+  TEST_COMPARE (errno, E2BIG);
+  TEST_COMPARE (inleft, 3);
+  TEST_COMPARE (inbuf - in, strlen (in) - 3);
+  TEST_COMPARE (outleft, sizeof (out) - 2);
+  TEST_COMPARE (outbuf - out, 2);
+  TEST_COMPARE (out[0] & 0xff, 0xc3);
+  TEST_COMPARE (out[1] & 0xff, 0xa6);
+
+  /* Return to the initial shift state, producing the pending
+     character.  */
+  outbuf = out;
+  outleft = sizeof (out);
+  TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), 0);
+  TEST_COMPARE (inleft, 0);
+  TEST_COMPARE (inbuf - in, strlen (in));
+  TEST_COMPARE (outleft, sizeof (out) - 2);
+  TEST_COMPARE (outbuf - out, 2);
+  TEST_COMPARE (out[0] & 0xff, 0xcc);
+  TEST_COMPARE (out[1] & 0xff, 0x80);
+
+  /* Nothing should be flushed the second time.  */
+  outbuf = out;
+  outleft = sizeof (out);
+  TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
+  TEST_COMPARE (outleft, sizeof (out));
+  TEST_COMPARE (outbuf - out, 0);
+  TEST_COMPARE (out[0] & 0xff, 0xcc);
+  TEST_COMPARE (out[1] & 0xff, 0x80);
+
+  TEST_COMPARE (iconv_close (c), 0);
+}
+
+/* Use an explicit flush to return to the initial state.  */
+static void
+with_flush (void)
+{
+  iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3");
+  TEST_VERIFY_EXIT (c != (iconv_t) -1);
+
+  char in[] = "\e$(O+D";
+  char *inbuf = in;
+  size_t inleft = strlen (in);
+  char out[3];                  /* Space for one output character.  */
+  char *outbuf;
+  size_t outleft;
+
+  outbuf = out;
+  outleft = sizeof (out);
+  TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1);
+  TEST_COMPARE (errno, E2BIG);
+  TEST_COMPARE (inleft, 0);
+  TEST_COMPARE (inbuf - in, strlen (in));
+  TEST_COMPARE (outleft, sizeof (out) - 2);
+  TEST_COMPARE (outbuf - out, 2);
+  TEST_COMPARE (out[0] & 0xff, 0xc3);
+  TEST_COMPARE (out[1] & 0xff, 0xa6);
+
+  /* Flush the pending character.  */
+  outbuf = out;
+  outleft = sizeof (out);
+  TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
+  TEST_COMPARE (outleft, sizeof (out) - 2);
+  TEST_COMPARE (outbuf - out, 2);
+  TEST_COMPARE (out[0] & 0xff, 0xcc);
+  TEST_COMPARE (out[1] & 0xff, 0x80);
+
+  /* Nothing should be flushed the second time.  */
+  outbuf = out;
+  outleft = sizeof (out);
+  TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
+  TEST_COMPARE (outleft, sizeof (out));
+  TEST_COMPARE (outbuf - out, 0);
+  TEST_COMPARE (out[0] & 0xff, 0xcc);
+  TEST_COMPARE (out[1] & 0xff, 0x80);
+
+  TEST_COMPARE (iconv_close (c), 0);
+}
+
+static int
+do_test (void)
+{
+  with_escape_sequence ();
+  with_flush ();
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/iconvdata/iso-2022-jp-3.c b/iconvdata/iso-2022-jp-3.c
index 3eaa847ad9..c8ba88cdc9 100644
--- a/iconvdata/iso-2022-jp-3.c
+++ b/iconvdata/iso-2022-jp-3.c
@@ -67,23 +67,34 @@  enum
   CURRENT_SEL_MASK = 7 << 3
 };
 
-/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the state
-   also contains the last two bytes to be output, shifted by 6 bits, and a
-   one-bit indicator whether they must be preceded by the shift sequence,
-   in bit 22.  */
+/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the
+   state also contains the last two bytes to be output, shifted by 6
+   bits, and a one-bit indicator whether they must be preceded by the
+   shift sequence, in bit 22.  During ISO-2022-JP-3 to UCS-4
+   conversion, COUNT may also contain a non-zero pending wide
+   character, shifted by six bits.  This happens for certain inputs in
+   JISX0213_1_2004_set and JISX0213_2_set if the second wide character
+   in a combining sequence cannot be written because the buffer is
+   full.  */
 
 /* Since this is a stateful encoding we have to provide code which resets
    the output state to the initial state.  This has to be done during the
    flushing.  */
 #define EMIT_SHIFT_TO_INIT \
-  if ((data->__statep->__count & ~7) != ASCII_set)			      \
+  if (data->__statep->__count != ASCII_set)			      \
     {									      \
       if (FROM_DIRECTION)						      \
 	{								      \
-	  /* It's easy, we don't have to emit anything, we just reset the     \
-	     state for the input.  */					      \
-	  data->__statep->__count &= 7;					      \
-	  data->__statep->__count |= ASCII_set;				      \
+	  if (__glibc_likely (outbuf + 4 <= outend))			      \
+	    {								      \
+	      /* Write out the last character.  */			      \
+	      *((uint32_t *) outbuf) = data->__statep->__count >> 6;	      \
+	      outbuf += sizeof (uint32_t);				      \
+	      data->__statep->__count = ASCII_set;			\
+	    }								      \
+	  else								      \
+	    /* We don't have enough room in the output buffer.  */	      \
+	    status = __GCONV_FULL_OUTPUT;				      \
 	}								      \
       else								      \
 	{								      \
@@ -151,7 +162,21 @@  enum
 #define LOOPFCT			FROM_LOOP
 #define BODY \
   {									      \
-    uint32_t ch = *inptr;						      \
+    uint32_t ch;							      \
+									      \
+    /* Output any pending character.  */				      \
+    ch = set >> 6;							      \
+    if (__glibc_unlikely (ch != 0))					      \
+      {									      \
+	put32 (outptr, ch);						      \
+	outptr += 4;							      \
+	/* Remove the pending character, but preserve state bits.  */	      \
+	set &= (1 << 6) - 1;						      \
+	continue;							      \
+      }									      \
+									      \
+    /* Otherwise read the next input byte.  */				      \
+    ch = *inptr;							      \
 									      \
     /* Recognize escape sequences.  */					      \
     if (__glibc_unlikely (ch == ESC))					      \
@@ -297,21 +322,25 @@  enum
 	    uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0];	      \
 	    uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1];	      \
 									      \
+	    inptr += 2;							      \
+									      \
+	    put32 (outptr, u1);						      \
+	    outptr += 4;						      \
+									      \
 	    /* See whether we have room for two characters.  */		      \
-	    if (outptr + 8 <= outend)					      \
+	    if (outptr + 4 <= outend)					      \
 	      {								      \
-		inptr += 2;						      \
-		put32 (outptr, u1);					      \
-		outptr += 4;						      \
 		put32 (outptr, u2);					      \
 		outptr += 4;						      \
 		continue;						      \
 	      }								      \
-	    else							      \
-	      {								      \
-		result = __GCONV_FULL_OUTPUT;				      \
-		break;							      \
-	      }								      \
+									      \
+	    /* Otherwise store only the first character now, and	      \
+	       put the second one into the queue.  */			      \
+	    set |= u2 << 6;						      \
+	    /* Tell the caller why we terminate the loop.  */		      \
+	    result = __GCONV_FULL_OUTPUT;				      \
+	    break;							      \
 	  }								      \
 									      \
 	inptr += 2;							      \