Don't error out writing a multibyte character to an unbuffered stream (bug 17522)

Message ID mvm38a5kcyh.fsf@hawking.suse.de
State Committed
Headers

Commit Message

Andreas Schwab Oct. 30, 2014, 11:35 a.m. UTC
  An unbuffered stream uses the _shortbuf[1] member of FILE as the buffer,
but code conversion may need up to MB_LEN_MAX bytes.

Andreas.

	[BZ #17522]
	* libio/wfileops.c (_IO_wdo_write): If the file buffer has room
	for less than MB_LEN_MAX use a local buffer of that size.
	* libio/tst-fputws.c: New file.
	* libio/Makefile (tests): Add tst-fputws.
---
 libio/Makefile     |  2 +-
 libio/tst-fputws.c | 39 +++++++++++++++++++++++++++++++++++++++
 libio/wfileops.c   | 25 ++++++++++++++++++++-----
 3 files changed, 60 insertions(+), 6 deletions(-)
 create mode 100644 libio/tst-fputws.c
  

Comments

Carlos O'Donell Oct. 31, 2014, 3:52 p.m. UTC | #1
On 10/30/2014 07:35 AM, Andreas Schwab wrote:
> An unbuffered stream uses the _shortbuf[1] member of FILE as the buffer,
> but code conversion may need up to MB_LEN_MAX bytes.
> 
> Andreas.
> 
> 	[BZ #17522]
> 	* libio/wfileops.c (_IO_wdo_write): If the file buffer has room
> 	for less than MB_LEN_MAX use a local buffer of that size.
> 	* libio/tst-fputws.c: New file.
> 	* libio/Makefile (tests): Add tst-fputws.

Looks good to me.

On question:

I expect it is unsupported to write a partial multi-byte character
to an unbuffered stream? You have nowhere to store the partial
character so you just return an error. Which makes sense to me,
but I wanted to be explicit.

> ---
>  libio/Makefile     |  2 +-
>  libio/tst-fputws.c | 39 +++++++++++++++++++++++++++++++++++++++
>  libio/wfileops.c   | 25 ++++++++++++++++++++-----
>  3 files changed, 60 insertions(+), 6 deletions(-)
>  create mode 100644 libio/tst-fputws.c
> 
> diff --git a/libio/Makefile b/libio/Makefile
> index 56952ce..2742128 100644
> --- a/libio/Makefile
> +++ b/libio/Makefile
> @@ -61,7 +61,7 @@ tests = tst_swprintf tst_wprintf tst_swscanf tst_wscanf tst_getwc tst_putwc   \
>  	bug-memstream1 bug-wmemstream1 \
>  	tst-setvbuf1 tst-popen1 tst-fgetwc bug-wsetpos tst-fseek \
>  	tst-fwrite-error tst-ftell-partial-wide tst-ftell-active-handler \
> -	tst-ftell-append
> +	tst-ftell-append tst-fputws

OK.

>  ifeq (yes,$(build-shared))
>  # Add test-fopenloc only if shared library is enabled since it depends on
>  # shared localedata objects.
> diff --git a/libio/tst-fputws.c b/libio/tst-fputws.c
> new file mode 100644
> index 0000000..09f53df
> --- /dev/null
> +++ b/libio/tst-fputws.c
> @@ -0,0 +1,39 @@
> +/* Test that we can write a multibyte character to an unbuffered stream.
> +   Copyright (C) 2014 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#include <locale.h>
> +#include <stdio.h>
> +#include <wchar.h>
> +
> +static int
> +do_test (void)
> +{
> +  const wchar_t str[] = L"\xbe\n";
> +
> +  setlocale (LC_ALL, "en_US.UTF-8");
> +  setvbuf (stdout, NULL, _IONBF, 0);
> +
> +  if (fputws (str, stdout) < 0)
> +    return 1;
> +
> +  return 0;
> +}
> +
> +#define TEST_FUNCTION do_test ()

OK.

> +
> +#include <test-skeleton.c>
> diff --git a/libio/wfileops.c b/libio/wfileops.c
> index c5ec5f7..6a088b1 100644
> --- a/libio/wfileops.c
> +++ b/libio/wfileops.c
> @@ -75,17 +75,32 @@ _IO_wdo_write (fp, data, to_do)
>  	{
>  	  enum __codecvt_result result;
>  	  const wchar_t *new_data;
> +	  char mb_buf[MB_LEN_MAX];
> +	  char *write_base, *write_ptr, *buf_end;
> +
> +	  if (fp->_IO_write_ptr - fp->_IO_write_base < sizeof (mb_buf))

OK.

> +	    {
> +	      /* Make sure we have room for at least one multibyte
> +		 character.  */
> +	      write_ptr = write_base = mb_buf;
> +	      buf_end = mb_buf + sizeof (mb_buf);

OK.

> +	    }
> +	  else
> +	    {
> +	      write_ptr = fp->_IO_write_ptr;
> +	      write_base = fp->_IO_write_base;
> +	      buf_end = fp->_IO_buf_end;
> +	    }
>  
>  	  /* Now convert from the internal format into the external buffer.  */
>  	  result = (*cc->__codecvt_do_out) (cc, &fp->_wide_data->_IO_state,
>  					    data, data + to_do, &new_data,
> -					    fp->_IO_write_ptr,
> -					    fp->_IO_buf_end,
> -					    &fp->_IO_write_ptr);
> +					    write_ptr,
> +					    buf_end,
> +					    &write_ptr);
>  
>  	  /* Write out what we produced so far.  */
> -	  if (_IO_new_do_write (fp, fp->_IO_write_base,
> -				fp->_IO_write_ptr - fp->_IO_write_base) == EOF)
> +	  if (_IO_new_do_write (fp, write_base, write_ptr - write_base) == EOF)

OK.

>  	    /* Something went wrong.  */
>  	    return WEOF;
>  
> 

Cheers,
Carlos.
  
Andreas Schwab Nov. 3, 2014, 8:53 a.m. UTC | #2
"Carlos O'Donell" <carlos@redhat.com> writes:

> I expect it is unsupported to write a partial multi-byte character
> to an unbuffered stream? You have nowhere to store the partial
> character so you just return an error. Which makes sense to me,
> but I wanted to be explicit.

That question doesn't make sense to me.  The wide character I/O
functions write wide characters, which cannot be partial.  In any case,
there is no change in that behaviour in any way.

Andreas.
  
Carlos O'Donell Nov. 3, 2014, 7:47 p.m. UTC | #3
On 11/03/2014 03:53 AM, Andreas Schwab wrote:
> "Carlos O'Donell" <carlos@redhat.com> writes:
> 
>> I expect it is unsupported to write a partial multi-byte character
>> to an unbuffered stream? You have nowhere to store the partial
>> character so you just return an error. Which makes sense to me,
>> but I wanted to be explicit.
> 
> That question doesn't make sense to me.  The wide character I/O
> functions write wide characters, which cannot be partial.  In any case,
> there is no change in that behaviour in any way.

You are absolutely correct. I'm not sure what I was thinking. All of the
output is converting from wchar_t to mb, and a wchar_t is always a complete
character.

You patch makes things better. Thanks for fixing things.

Cheers,
Carlos.
  

Patch

diff --git a/libio/Makefile b/libio/Makefile
index 56952ce..2742128 100644
--- a/libio/Makefile
+++ b/libio/Makefile
@@ -61,7 +61,7 @@  tests = tst_swprintf tst_wprintf tst_swscanf tst_wscanf tst_getwc tst_putwc   \
 	bug-memstream1 bug-wmemstream1 \
 	tst-setvbuf1 tst-popen1 tst-fgetwc bug-wsetpos tst-fseek \
 	tst-fwrite-error tst-ftell-partial-wide tst-ftell-active-handler \
-	tst-ftell-append
+	tst-ftell-append tst-fputws
 ifeq (yes,$(build-shared))
 # Add test-fopenloc only if shared library is enabled since it depends on
 # shared localedata objects.
diff --git a/libio/tst-fputws.c b/libio/tst-fputws.c
new file mode 100644
index 0000000..09f53df
--- /dev/null
+++ b/libio/tst-fputws.c
@@ -0,0 +1,39 @@ 
+/* Test that we can write a multibyte character to an unbuffered stream.
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <locale.h>
+#include <stdio.h>
+#include <wchar.h>
+
+static int
+do_test (void)
+{
+  const wchar_t str[] = L"\xbe\n";
+
+  setlocale (LC_ALL, "en_US.UTF-8");
+  setvbuf (stdout, NULL, _IONBF, 0);
+
+  if (fputws (str, stdout) < 0)
+    return 1;
+
+  return 0;
+}
+
+#define TEST_FUNCTION do_test ()
+
+#include <test-skeleton.c>
diff --git a/libio/wfileops.c b/libio/wfileops.c
index c5ec5f7..6a088b1 100644
--- a/libio/wfileops.c
+++ b/libio/wfileops.c
@@ -75,17 +75,32 @@  _IO_wdo_write (fp, data, to_do)
 	{
 	  enum __codecvt_result result;
 	  const wchar_t *new_data;
+	  char mb_buf[MB_LEN_MAX];
+	  char *write_base, *write_ptr, *buf_end;
+
+	  if (fp->_IO_write_ptr - fp->_IO_write_base < sizeof (mb_buf))
+	    {
+	      /* Make sure we have room for at least one multibyte
+		 character.  */
+	      write_ptr = write_base = mb_buf;
+	      buf_end = mb_buf + sizeof (mb_buf);
+	    }
+	  else
+	    {
+	      write_ptr = fp->_IO_write_ptr;
+	      write_base = fp->_IO_write_base;
+	      buf_end = fp->_IO_buf_end;
+	    }
 
 	  /* Now convert from the internal format into the external buffer.  */
 	  result = (*cc->__codecvt_do_out) (cc, &fp->_wide_data->_IO_state,
 					    data, data + to_do, &new_data,
-					    fp->_IO_write_ptr,
-					    fp->_IO_buf_end,
-					    &fp->_IO_write_ptr);
+					    write_ptr,
+					    buf_end,
+					    &write_ptr);
 
 	  /* Write out what we produced so far.  */
-	  if (_IO_new_do_write (fp, fp->_IO_write_base,
-				fp->_IO_write_ptr - fp->_IO_write_base) == EOF)
+	  if (_IO_new_do_write (fp, write_base, write_ptr - write_base) == EOF)
 	    /* Something went wrong.  */
 	    return WEOF;