Use pending character state in IBM1364-derived character sets (CVE-2026-4046)

Message ID 87bjg2eprs.fsf@oldenburg.str.redhat.com (mailing list archive)
State Superseded
Headers
Series Use pending character state in IBM1364-derived character sets (CVE-2026-4046) |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
linaro-tcwg-bot/tcwg_glibc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_glibc_check--master-arm success Test passed
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 success Test passed
redhat-pt-bot/TryBot-32bit success Build for i686

Commit Message

Florian Weimer April 1, 2026, 2:09 p.m. UTC
  Follow the example in iso-2022-jp-3.c and use the __count state
variable to store the pending character.  This avoids restarting
the conversion if the output buffer ends between two 4-byte UCS-4
code points, so that the assert reported in the bug can no longer
happen.

This defect impacts the character sets IBM1364, IBM1371, IBM1388,
IBM1390, IBM1399.

This fixes bug 33980.

I wanted to posted this now because I had not a chance yet to write a
proper test case for this yet.  It should iterate over the potentially
impacted character sets and input patterns, and different output buffer
sizes.  If the character set matches the expected input for that
character, check that the output matches (concatenated over multiple
calls reusing the buffer) the precomputed expected output.  Otherwise,
the test just verifies that there is no assert or other crash.

Thanks,
Florian
---
 iconvdata/ibm1364.c | 70 +++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 55 insertions(+), 15 deletions(-)


base-commit: 6abe432ec4aa1456151be8f9567c4d68f41d68f7
  

Patch

diff --git a/iconvdata/ibm1364.c b/iconvdata/ibm1364.c
index 4f41f22c12..49a30913a8 100644
--- a/iconvdata/ibm1364.c
+++ b/iconvdata/ibm1364.c
@@ -67,12 +67,29 @@ 
 
 /* Since this is a stateful encoding we have to provide code which resets
    the output state to the initial state.  This has to be done during the
-   flushing.  */
+   flushing.  For the to-internal direction (FROM_DIRECTION is true),
+   there may be a pending character that needs flushing.  */
 #define EMIT_SHIFT_TO_INIT \
   if ((data->__statep->__count & ~7) != sb)				      \
     {									      \
       if (FROM_DIRECTION)						      \
-	data->__statep->__count &= 7;					      \
+	{								      \
+	  uint32_t ch = data->__statep->__count >> 7;			      \
+	  if (__glibc_unlikely (ch != 0))				      \
+	    {								      \
+	      if (__glibc_unlikely (outend - outbuf < 4))		      \
+		status = __GCONV_FULL_OUTPUT;				      \
+	      else							      \
+		{							      \
+		  put32 (outbuf, ch);					      \
+		  outbuf += 4;						      \
+		  /* Clear character and db bit.  */			      \
+		  data->__statep->__count &= 7;				      \
+		}							      \
+	    }								      \
+	  else								      \
+	    data->__statep->__count &= 7;				      \
+	}								      \
       else								      \
 	{								      \
 	  /* We are not in the initial state.  To switch back we have	      \
@@ -99,11 +116,13 @@ 
     *curcsp = save_curcs
 
 
-/* Current codeset type.  */
+/* Current codeset type.  The bit is stored in the __count variable of
+   the conversion state.  If the db bit is set, bit 7 and above store
+   a pending UCS-4 code point if non-zero.  */
 enum
 {
-  sb = 0,
-  db = 64
+  sb = 0,			/* Single byte mode.  */
+  db = 64			/* Double byte mode.  */
 };
 
 
@@ -119,21 +138,29 @@  enum
       }									      \
     else								      \
       {									      \
-	/* This is a combined character.  Make sure we have room.  */	      \
-	if (__glibc_unlikely (outptr + 8 > outend))			      \
-	  {								      \
-	    result = __GCONV_FULL_OUTPUT;				      \
-	    break;							      \
-	  }								      \
-									      \
 	const struct divide *cmbp					      \
 	  = &DB_TO_UCS4_COMB[ch - __TO_UCS4_COMBINED_MIN];		      \
 	assert (cmbp->res1 != 0 && cmbp->res2 != 0);			      \
 									      \
 	put32 (outptr, cmbp->res1);					      \
 	outptr += 4;							      \
-	put32 (outptr, cmbp->res2);					      \
-	outptr += 4;							      \
+									      \
+	/* See whether we have room for two characters.  */		      \
+	if (outend - outptr >= 4)					      \
+	  {								      \
+	    put32 (outptr, cmbp->res2);					      \
+	    outptr += 4;						      \
+	  }								      \
+	else								      \
+	  {								      \
+	    /* Otherwise store only the first character now, and	      \
+	       put the second one into the queue.  */			      \
+	    curcs |= cmbp->res2 << 7;					      \
+	    inptr += 2;							      \
+	    /* Tell the caller why we terminate the loop.  */		      \
+	    result = __GCONV_FULL_OUTPUT;				      \
+	    break;							      \
+	  }								      \
       }									      \
   }
 #else
@@ -153,7 +180,20 @@  enum
 #define LOOPFCT 		FROM_LOOP
 #define BODY \
   {									      \
-    uint32_t ch = *inptr;						      \
+    uint32_t ch;							      \
+									      \
+    ch = curcs >> 7;							      \
+    if (__glibc_unlikely (ch != 0))					      \
+      {									      \
+	put32 (outptr, ch);						      \
+	outptr += 4;							      \
+	/* Remove the pending character, but preserve state bits.  */	      \
+	curcs &= (1 << 7) - 1;						      \
+	continue;							      \
+      }									      \
+									      \
+    /* Otherwise read the next input byte.  */				      \
+    ch = *inptr;							      \
 									      \
     if (__builtin_expect (ch, 0) == SO)					      \
       {									      \