[v4,3/4] iconv: make utf-7.c able to use variants

Message ID 20211209093152.313872-4-mg@max.gautier.name
State Superseded
Headers
Series iconv: Add support for UTF-7-IMAP |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Max Gautier Dec. 9, 2021, 9:31 a.m. UTC
  Add infrastructure in utf-7.c to handle variants. The approach comes from
iso646.c
The variant is defined at gconv_init time and is passed as a
supplementary variable.

Signed-off-by: Max Gautier <mg@max.gautier.name>
---
 iconvdata/utf-7.c | 239 +++++++++++++++++++++++++++++++++-------------
 1 file changed, 174 insertions(+), 65 deletions(-)
  

Comments

Adhemerval Zanella Netto March 7, 2022, 12:34 p.m. UTC | #1
On 09/12/2021 06:31, Max Gautier via Libc-alpha wrote:
> Add infrastructure in utf-7.c to handle variants. The approach comes from
> iso646.c
> The variant is defined at gconv_init time and is passed as a
> supplementary variable.
> 
> Signed-off-by: Max Gautier <mg@max.gautier.name>

Patch looks ok in general, there are style issues that needed to be fixed and some
minor suggestions below.

> ---
>  iconvdata/utf-7.c | 239 +++++++++++++++++++++++++++++++++-------------
>  1 file changed, 174 insertions(+), 65 deletions(-)
> 
> diff --git a/iconvdata/utf-7.c b/iconvdata/utf-7.c
> index ac7d78141a..965d4220f1 100644
> --- a/iconvdata/utf-7.c
> +++ b/iconvdata/utf-7.c
> @@ -29,6 +29,24 @@
>  #include <stdlib.h>
>  
>  
> +enum variant
> +{
> +    UTF7,
> +};
> +
> +/* Must be in the same order as enum variant above.  */
> +static const char names[] =
> +  "UTF-7//\0"
> +  "\0";
> +
> +static uint32_t
> +shift_character(enum variant const var)
> +{
> +    if (var == UTF7)
> +        return '+';
> +    else
> +        abort();
> +}

Please use the expected indentation on glibc [1] and other places as well.

[1] https://sourceware.org/glibc/wiki/Style_and_Conventions

>  
>  static int
>  between(uint32_t const ch,
> @@ -38,37 +56,43 @@ between(uint32_t const ch,
>  }
>  
>  /* The set of "direct characters":
> +   FOR UTF-7
>     A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr
>  */
>  
>  static int
> -isdirect (uint32_t ch)
> +isdirect (uint32_t ch, enum variant var)
>  {
> -    return (between(ch, 'A', 'Z')
> -	    || between(ch, 'a', 'z')
> -	    || between(ch, '0', '9')
> -	    || ch == '\'' || ch == '(' || ch == ')'
> -	    || between(ch, ',', '/')
> -	    || ch == ':' || ch == '?'
> -	    || ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
> +    if (var == UTF7)
> +        return (between(ch, 'A', 'Z')
> +                || between(ch, 'a', 'z')
> +                || between(ch, '0', '9')
> +                || ch == '\'' || ch == '(' || ch == ')'
> +                || between(ch, ',', '/')
> +                || ch == ':' || ch == '?'
> +                || ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
> +    abort();
>  }
>  
>  
>  /* The set of "direct and optional direct characters":
> +   (UTF-7 only)
>     A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr
>     ! " # $ % & * ; < = > @ [ ] ^ _ ` { | }
>  */
>  
> -
>  static int
> -isxdirect (uint32_t ch)
> +isxdirect (uint32_t ch, enum variant var)
>  {
> -    return (ch == '\t'
> -            || ch == '\n'
> -            || ch == '\r'
> -            || (between(ch, ' ','}')
> -                && ch != '+' && ch != '\\')
> -           );
> +    return(isdirect(ch, var)
> +            || (var == UTF7 &&
> +                (between(ch, '!', '&')
> +                 || ch == '*'
> +                 || between(ch, ';', '@')
> +                 || (between(ch, '[', '`') && ch != '\\')
> +                 || between(ch, '{', '}'))
> +                )
> +          );
>  }
>  

The change is ok, but maybe adding the variant out makes it more clear:

   if (var != UTF7)
     return 0;
   [...]

Also I think since you change it, it would be better to use 'bool' as
return type.

>  
> @@ -91,7 +115,7 @@ needs_explicit_shift (uint32_t ch)
>  
>  /* Converts a value in the range 0..63 to a base64 encoded char.  */
>  static unsigned char
> -base64 (unsigned int i)
> +base64 (unsigned int i, enum variant var)
>  {
>    if (i < 26)
>      return i + 'A';
> @@ -101,7 +125,7 @@ base64 (unsigned int i)
>      return i - 52 + '0';
>    else if (i == 62)
>      return '+';
> -  else if (i == 63)
> +  else if (i == 63 && var == UTF7)
>      return '/';
>    else
>      abort ();
> @@ -109,9 +133,8 @@ base64 (unsigned int i)
>  
>  

Ok.

>  /* Definitions used in the body of the `gconv' function.  */
> -#define CHARSET_NAME		"UTF-7//"
> -#define DEFINE_INIT		1
> -#define DEFINE_FINI		1
> +#define DEFINE_INIT		0
> +#define DEFINE_FINI		0
>  #define FROM_LOOP		from_utf7_loop
>  #define TO_LOOP			to_utf7_loop
>  #define MIN_NEEDED_FROM		1
> @@ -119,11 +142,27 @@ base64 (unsigned int i)
>  #define MIN_NEEDED_TO		4
>  #define MAX_NEEDED_TO		4
>  #define ONE_DIRECTION		0
> +#define FROM_DIRECTION      (dir == from_utf7)
>  #define PREPARE_LOOP \
>    mbstate_t saved_state;						      \
> -  mbstate_t *statep = data->__statep;
> -#define EXTRA_LOOP_ARGS		, statep
> +  mbstate_t *statep = data->__statep;					      \
> +  enum direction dir = ((struct utf7_data *) step->__data)->dir;	      \
> +  enum direction var = ((struct utf7_data *) step->__data)->var;
> +#define EXTRA_LOOP_ARGS		, statep, var
> +
> +
> +enum direction
> +{
> +   illegal_dir,
> +   from_utf7,
> +   to_utf7
> +};

Style, use two spaces.

>  
> +struct utf7_data
> +{
> +  enum direction dir;
> +  enum variant var;
> +};
>  
>  /* Since we might have to reset input pointer we must be able to save
>     and restore the state.  */
> @@ -133,6 +172,72 @@ base64 (unsigned int i)
>    else									      \
>      *statep = saved_state
>  
> +extern int gconv_init (struct __gconv_step *step);

I think there is no need to add the prototype here.

> +int
> +gconv_init (struct __gconv_step *step)
> +{
> +  /* Determine which direction.  */
> +  struct utf7_data *new_data;
> +  enum direction dir = illegal_dir;
> +
> +  enum variant var = 0;
> +  for (const char *name = names; *name != '\0';
> +       name = __rawmemchr (name, '\0') + 1)
> +    {
> +      if (__strcasecmp (step->__from_name, name) == 0)
> +	{
> +	  dir = from_utf7;
> +	  break;
> +	}
> +      else if (__strcasecmp (step->__to_name, name) == 0)
> +	{
> +	  dir = to_utf7;
> +	  break;
> +	}
> +      ++var;
> +    }
> +
> +  if (__builtin_expect (dir, from_utf7) != illegal_dir)

Use __glibc_likely.

> +  {
> +      new_data = malloc (sizeof (*new_data));
> +      if (new_data == NULL)
> +          return __GCONV_NOMEM;
> +
> +      new_data->dir = dir;
> +      new_data->var = var;
> +      step->__data = new_data;
> +
> +      if (dir == from_utf7)
> +      {
> +          step->__min_needed_from = MIN_NEEDED_FROM;
> +          step->__max_needed_from = MAX_NEEDED_FROM;
> +          step->__min_needed_to = MIN_NEEDED_TO;
> +          step->__max_needed_to = MAX_NEEDED_TO;
> +      }
> +      else
> +      {
> +          step->__min_needed_from = MIN_NEEDED_TO;
> +          step->__max_needed_from = MAX_NEEDED_TO;
> +          step->__min_needed_to = MIN_NEEDED_FROM;
> +          step->__max_needed_to = MAX_NEEDED_FROM;
> +      }
> +  }
> +  else
> +    return __GCONV_NOCONV;
> +
> +  step->__stateful = 1;
> +
> +  return __GCONV_OK;
> +}
> +
> +extern void gconv_end (struct __gconv_step *data);
> +void
> +gconv_end (struct __gconv_step *data)
> +{
> +  free (data->__data);
> +}
> +
> +
>  
>  /* First define the conversion function from UTF-7 to UCS4.
>     The state is structured as follows:
> @@ -160,13 +265,13 @@ base64 (unsigned int i)
>      if ((statep->__count >> 3) == 0)					      \
>        {									      \
>  	/* base64 encoding inactive.  */				      \
> -	if (isxdirect (ch))						      \
> +	if (isxdirect (ch, var))					      \
>  	  {								      \
>  	    inptr++;							      \
>  	    put32 (outptr, ch);						      \
>  	    outptr += 4;						      \
>  	  }								      \
> -	else if (__glibc_likely (ch == '+'))				      \
> +	else if (__glibc_likely (ch == shift_character(var)))		      \
>  	  {								      \
>  	    if (__glibc_unlikely (inptr + 2 > inend))			      \
>  	      {								      \
> @@ -291,7 +396,7 @@ base64 (unsigned int i)
>        }									      \
>    }
>  #define LOOP_NEED_FLAGS
> -#define EXTRA_LOOP_DECLS	, mbstate_t *statep
> +#define EXTRA_LOOP_DECLS	, mbstate_t *statep, enum variant var
>  #include <iconv/loop.c>
>  
>  
> @@ -322,7 +427,7 @@ base64 (unsigned int i)
>      if ((statep->__count & 0x18) == 0)					      \
>        {									      \
>  	/* base64 encoding inactive */					      \
> -	if (isdirect (ch))   						      \
> +	if (isdirect (ch, var))						      \
>  	  {								      \
>  	    *outptr++ = (unsigned char) ch;				      \
>  	  }								      \
> @@ -330,7 +435,7 @@ base64 (unsigned int i)
>  	  {								      \
>  	    size_t count;						      \
>  									      \
> -	    if (ch == '+')						      \
> +	    if (ch == shift_character(var))				      \
>  	      count = 2;						      \
>  	    else if (ch < 0x10000)					      \
>  	      count = 3;						      \
> @@ -345,13 +450,13 @@ base64 (unsigned int i)
>  		break;							      \
>  	      }								      \
>  									      \
> -	    *outptr++ = '+';						      \
> -	    if (ch == '+')						      \
> +	    *outptr++ = shift_character(var);				      \
> +	    if (ch == shift_character(var))				      \
>  	      *outptr++ = '-';						      \
>  	    else if (ch < 0x10000)					      \
>  	      {								      \
> -		*outptr++ = base64 (ch >> 10);				      \
> -		*outptr++ = base64 ((ch >> 4) & 0x3f);			      \
> +		*outptr++ = base64 (ch >> 10, var);			      \
> +		*outptr++ = base64 ((ch >> 4) & 0x3f, var);		      \
>  		statep->__count = ((ch & 15) << 5) | (3 << 3);		      \
>  	      }								      \
>  	    else if (ch < 0x110000)					      \
> @@ -360,11 +465,11 @@ base64 (unsigned int i)
>  		uint32_t ch2 = 0xdc00 + ((ch - 0x10000) & 0x3ff);	      \
>  									      \
>  		ch = (ch1 << 16) | ch2;					      \
> -		*outptr++ = base64 (ch >> 26);				      \
> -		*outptr++ = base64 ((ch >> 20) & 0x3f);			      \
> -		*outptr++ = base64 ((ch >> 14) & 0x3f);			      \
> -		*outptr++ = base64 ((ch >> 8) & 0x3f);			      \
> -		*outptr++ = base64 ((ch >> 2) & 0x3f);			      \
> +		*outptr++ = base64 (ch >> 26, var);			      \
> +		*outptr++ = base64 ((ch >> 20) & 0x3f, var);		      \
> +		*outptr++ = base64 ((ch >> 14) & 0x3f, var);		      \
> +		*outptr++ = base64 ((ch >> 8) & 0x3f, var);		      \
> +		*outptr++ = base64 ((ch >> 2) & 0x3f, var);		      \
>  		statep->__count = ((ch & 3) << 7) | (2 << 3);		      \
>  	      }								      \
>  	    else							      \
> @@ -374,7 +479,7 @@ base64 (unsigned int i)
>      else								      \
>        {									      \
>  	/* base64 encoding active */					      \
> -	if (isdirect (ch))						      \
> +	if (isdirect (ch, var))						      \
>  	  {								      \
>  	    /* deactivate base64 encoding */				      \
>  	    size_t count;						      \
> @@ -388,7 +493,7 @@ base64 (unsigned int i)
>  	      }								      \
>  									      \
>  	    if ((statep->__count & 0x18) >= 0x10)			      \
> -	      *outptr++ = base64 ((statep->__count >> 3) & ~3);		      \
> +	      *outptr++ = base64 ((statep->__count >> 3) & ~3, var);	      \
>  	    if (needs_explicit_shift (ch))				      \
>  	      *outptr++ = '-';						      \
>  	    *outptr++ = (unsigned char) ch;				      \
> @@ -416,22 +521,24 @@ base64 (unsigned int i)
>  		switch ((statep->__count >> 3) & 3)			      \
>  		  {							      \
>  		  case 1:						      \
> -		    *outptr++ = base64 (ch >> 10);			      \
> -		    *outptr++ = base64 ((ch >> 4) & 0x3f);		      \
> +		    *outptr++ = base64 (ch >> 10, var);			      \
> +		    *outptr++ = base64 ((ch >> 4) & 0x3f, var);		      \
>  		    statep->__count = ((ch & 15) << 5) | (3 << 3);	      \
>  		    break;						      \
>  		  case 2:						      \
>  		    *outptr++ =						      \
> -		      base64 (((statep->__count >> 3) & ~3) | (ch >> 12));    \
> -		    *outptr++ = base64 ((ch >> 6) & 0x3f);		      \
> -		    *outptr++ = base64 (ch & 0x3f);			      \
> +		      base64 (((statep->__count >> 3) & ~3) | (ch >> 12),     \
> +			      var);					      \
> +		    *outptr++ = base64 ((ch >> 6) & 0x3f, var);		      \
> +		    *outptr++ = base64 (ch & 0x3f, var);		      \
>  		    statep->__count = (1 << 3);				      \
>  		    break;						      \
>  		  case 3:						      \
>  		    *outptr++ =						      \
> -		      base64 (((statep->__count >> 3) & ~3) | (ch >> 14));    \
> -		    *outptr++ = base64 ((ch >> 8) & 0x3f);		      \
> -		    *outptr++ = base64 ((ch >> 2) & 0x3f);		      \
> +		      base64 (((statep->__count >> 3) & ~3) | (ch >> 14),     \
> +			      var);					      \
> +		    *outptr++ = base64 ((ch >> 8) & 0x3f, var);		      \
> +		    *outptr++ = base64 ((ch >> 2) & 0x3f, var);		      \
>  		    statep->__count = ((ch & 3) << 7) | (2 << 3);	      \
>  		    break;						      \
>  		  default:						      \
> @@ -447,30 +554,32 @@ base64 (unsigned int i)
>  		switch ((statep->__count >> 3) & 3)			      \
>  		  {							      \
>  		  case 1:						      \
> -		    *outptr++ = base64 (ch >> 26);			      \
> -		    *outptr++ = base64 ((ch >> 20) & 0x3f);		      \
> -		    *outptr++ = base64 ((ch >> 14) & 0x3f);		      \
> -		    *outptr++ = base64 ((ch >> 8) & 0x3f);		      \
> -		    *outptr++ = base64 ((ch >> 2) & 0x3f);		      \
> +		    *outptr++ = base64 (ch >> 26, var);			      \
> +		    *outptr++ = base64 ((ch >> 20) & 0x3f, var);	      \
> +		    *outptr++ = base64 ((ch >> 14) & 0x3f, var);	      \
> +		    *outptr++ = base64 ((ch >> 8) & 0x3f, var);		      \
> +		    *outptr++ = base64 ((ch >> 2) & 0x3f, var);		      \
>  		    statep->__count = ((ch & 3) << 7) | (2 << 3);	      \
>  		    break;						      \
>  		  case 2:						      \
>  		    *outptr++ =						      \
> -		      base64 (((statep->__count >> 3) & ~3) | (ch >> 28));    \
> -		    *outptr++ = base64 ((ch >> 22) & 0x3f);		      \
> -		    *outptr++ = base64 ((ch >> 16) & 0x3f);		      \
> -		    *outptr++ = base64 ((ch >> 10) & 0x3f);		      \
> -		    *outptr++ = base64 ((ch >> 4) & 0x3f);		      \
> +		      base64 (((statep->__count >> 3) & ~3) | (ch >> 28),     \
> +			      var);					      \
> +		    *outptr++ = base64 ((ch >> 22) & 0x3f, var);	      \
> +		    *outptr++ = base64 ((ch >> 16) & 0x3f, var);	      \
> +		    *outptr++ = base64 ((ch >> 10) & 0x3f, var);	      \
> +		    *outptr++ = base64 ((ch >> 4) & 0x3f, var);		      \
>  		    statep->__count = ((ch & 15) << 5) | (3 << 3);	      \
>  		    break;						      \
>  		  case 3:						      \
>  		    *outptr++ =						      \
> -		      base64 (((statep->__count >> 3) & ~3) | (ch >> 30));    \
> -		    *outptr++ = base64 ((ch >> 24) & 0x3f);		      \
> -		    *outptr++ = base64 ((ch >> 18) & 0x3f);		      \
> -		    *outptr++ = base64 ((ch >> 12) & 0x3f);		      \
> -		    *outptr++ = base64 ((ch >> 6) & 0x3f);		      \
> -		    *outptr++ = base64 (ch & 0x3f);			      \
> +		      base64 (((statep->__count >> 3) & ~3) | (ch >> 30),     \
> +			      var);					      \
> +		    *outptr++ = base64 ((ch >> 24) & 0x3f, var);	      \
> +		    *outptr++ = base64 ((ch >> 18) & 0x3f, var);	      \
> +		    *outptr++ = base64 ((ch >> 12) & 0x3f, var);	      \
> +		    *outptr++ = base64 ((ch >> 6) & 0x3f, var);		      \
> +		    *outptr++ = base64 (ch & 0x3f, var);		      \
>  		    statep->__count = (1 << 3);				      \
>  		    break;						      \
>  		  default:						      \
> @@ -486,7 +595,7 @@ base64 (unsigned int i)
>      inptr += 4;								      \
>    }


Ok

>  #define LOOP_NEED_FLAGS
> -#define EXTRA_LOOP_DECLS	, mbstate_t *statep
> +#define EXTRA_LOOP_DECLS	, mbstate_t *statep, enum variant var
>  #include <iconv/loop.c>
>  
>  
> @@ -516,7 +625,7 @@ base64 (unsigned int i)
>  	    {								      \
>  	      /* Write out the shift sequence.  */			      \
>  	      if ((state & 0x18) >= 0x10)				      \
> -		*outbuf++ = base64 ((state >> 3) & ~3);			      \
> +		*outbuf++ = base64 ((state >> 3) & ~3, var);		      \
>  	      *outbuf++ = '-';						      \
>  									      \
>  	      data->__statep->__count = 0;				      \

Ok.
  
Max Gautier March 12, 2022, 11:07 a.m. UTC | #2
On Mon, Mar 07, 2022 at 09:34:47AM -0300, Adhemerval Zanella wrote:
> >  static int
> > -isxdirect (uint32_t ch)
> > +isxdirect (uint32_t ch, enum variant var)
> >  {
> > -    return (ch == '\t'
> > -            || ch == '\n'
> > -            || ch == '\r'
> > -            || (between(ch, ' ','}')
> > -                && ch != '+' && ch != '\\')
> > -           );
> > +    return(isdirect(ch, var)
> > +            || (var == UTF7 &&
> > +                (between(ch, '!', '&')
> > +                 || ch == '*'
> > +                 || between(ch, ';', '@')
> > +                 || (between(ch, '[', '`') && ch != '\\')
> > +                 || between(ch, '{', '}'))
> > +                )
> > +          );
> >  }
> >  
> 
> The change is ok, but maybe adding the variant out makes it more clear:
> 
>    if (var != UTF7)
>      return 0;
>    [...]
> 
something like this ? 

if (isdirect(ch, var))
   return true;
if (var != UTF7)
   return false;
return (between(ch, '!', '&')
  || ch == '*'
  || between(ch, ';', '@')
  || (between(ch, '[', '`') && ch != '\\')
  || between(ch, '{', '}'))
 );

I'd prefer the single expression form, but that works too.

> Also I think since you change it, it would be better to use 'bool' as
> return type.

Ok. I was not sure whether it was ok to use bool type or not.

Thanks for the review.
  
Adhemerval Zanella Netto March 14, 2022, 12:17 p.m. UTC | #3
On 12/03/2022 08:07, Max Gautier wrote:
> On Mon, Mar 07, 2022 at 09:34:47AM -0300, Adhemerval Zanella wrote:
>>>  static int
>>> -isxdirect (uint32_t ch)
>>> +isxdirect (uint32_t ch, enum variant var)
>>>  {
>>> -    return (ch == '\t'
>>> -            || ch == '\n'
>>> -            || ch == '\r'
>>> -            || (between(ch, ' ','}')
>>> -                && ch != '+' && ch != '\\')
>>> -           );
>>> +    return(isdirect(ch, var)
>>> +            || (var == UTF7 &&
>>> +                (between(ch, '!', '&')
>>> +                 || ch == '*'
>>> +                 || between(ch, ';', '@')
>>> +                 || (between(ch, '[', '`') && ch != '\\')
>>> +                 || between(ch, '{', '}'))
>>> +                )
>>> +          );
>>>  }
>>>  
>>
>> The change is ok, but maybe adding the variant out makes it more clear:
>>
>>    if (var != UTF7)
>>      return 0;
>>    [...]
>>
> something like this ? 
> 
> if (isdirect(ch, var))
>    return true;
> if (var != UTF7)
>    return false;
> return (between(ch, '!', '&')
>   || ch == '*'
>   || between(ch, ';', '@')
>   || (between(ch, '[', '`') && ch != '\\')
>   || between(ch, '{', '}'))
>  );
> 

Yes, it is slight better for readability (don't forget the space before '('
and the extra parenthesis in return is not required). 

> I'd prefer the single expression form, but that works too.
> 
>> Also I think since you change it, it would be better to use 'bool' as
>> return type.
> 
> Ok. I was not sure whether it was ok to use bool type or not.

We build internally with gnu11, so we can use most of c11 facilities (there
are some spots like atomics that we are still migrating and require extra
care to not call libatomics).

> 
> Thanks for the review.
>
  

Patch

diff --git a/iconvdata/utf-7.c b/iconvdata/utf-7.c
index ac7d78141a..965d4220f1 100644
--- a/iconvdata/utf-7.c
+++ b/iconvdata/utf-7.c
@@ -29,6 +29,24 @@ 
 #include <stdlib.h>
 
 
+enum variant
+{
+    UTF7,
+};
+
+/* Must be in the same order as enum variant above.  */
+static const char names[] =
+  "UTF-7//\0"
+  "\0";
+
+static uint32_t
+shift_character(enum variant const var)
+{
+    if (var == UTF7)
+        return '+';
+    else
+        abort();
+}
 
 static int
 between(uint32_t const ch,
@@ -38,37 +56,43 @@  between(uint32_t const ch,
 }
 
 /* The set of "direct characters":
+   FOR UTF-7
    A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr
 */
 
 static int
-isdirect (uint32_t ch)
+isdirect (uint32_t ch, enum variant var)
 {
-    return (between(ch, 'A', 'Z')
-	    || between(ch, 'a', 'z')
-	    || between(ch, '0', '9')
-	    || ch == '\'' || ch == '(' || ch == ')'
-	    || between(ch, ',', '/')
-	    || ch == ':' || ch == '?'
-	    || ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
+    if (var == UTF7)
+        return (between(ch, 'A', 'Z')
+                || between(ch, 'a', 'z')
+                || between(ch, '0', '9')
+                || ch == '\'' || ch == '(' || ch == ')'
+                || between(ch, ',', '/')
+                || ch == ':' || ch == '?'
+                || ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
+    abort();
 }
 
 
 /* The set of "direct and optional direct characters":
+   (UTF-7 only)
    A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr
    ! " # $ % & * ; < = > @ [ ] ^ _ ` { | }
 */
 
-
 static int
-isxdirect (uint32_t ch)
+isxdirect (uint32_t ch, enum variant var)
 {
-    return (ch == '\t'
-            || ch == '\n'
-            || ch == '\r'
-            || (between(ch, ' ','}')
-                && ch != '+' && ch != '\\')
-           );
+    return(isdirect(ch, var)
+            || (var == UTF7 &&
+                (between(ch, '!', '&')
+                 || ch == '*'
+                 || between(ch, ';', '@')
+                 || (between(ch, '[', '`') && ch != '\\')
+                 || between(ch, '{', '}'))
+                )
+          );
 }
 
 
@@ -91,7 +115,7 @@  needs_explicit_shift (uint32_t ch)
 
 /* Converts a value in the range 0..63 to a base64 encoded char.  */
 static unsigned char
-base64 (unsigned int i)
+base64 (unsigned int i, enum variant var)
 {
   if (i < 26)
     return i + 'A';
@@ -101,7 +125,7 @@  base64 (unsigned int i)
     return i - 52 + '0';
   else if (i == 62)
     return '+';
-  else if (i == 63)
+  else if (i == 63 && var == UTF7)
     return '/';
   else
     abort ();
@@ -109,9 +133,8 @@  base64 (unsigned int i)
 
 
 /* Definitions used in the body of the `gconv' function.  */
-#define CHARSET_NAME		"UTF-7//"
-#define DEFINE_INIT		1
-#define DEFINE_FINI		1
+#define DEFINE_INIT		0
+#define DEFINE_FINI		0
 #define FROM_LOOP		from_utf7_loop
 #define TO_LOOP			to_utf7_loop
 #define MIN_NEEDED_FROM		1
@@ -119,11 +142,27 @@  base64 (unsigned int i)
 #define MIN_NEEDED_TO		4
 #define MAX_NEEDED_TO		4
 #define ONE_DIRECTION		0
+#define FROM_DIRECTION      (dir == from_utf7)
 #define PREPARE_LOOP \
   mbstate_t saved_state;						      \
-  mbstate_t *statep = data->__statep;
-#define EXTRA_LOOP_ARGS		, statep
+  mbstate_t *statep = data->__statep;					      \
+  enum direction dir = ((struct utf7_data *) step->__data)->dir;	      \
+  enum direction var = ((struct utf7_data *) step->__data)->var;
+#define EXTRA_LOOP_ARGS		, statep, var
+
+
+enum direction
+{
+   illegal_dir,
+   from_utf7,
+   to_utf7
+};
 
+struct utf7_data
+{
+  enum direction dir;
+  enum variant var;
+};
 
 /* Since we might have to reset input pointer we must be able to save
    and restore the state.  */
@@ -133,6 +172,72 @@  base64 (unsigned int i)
   else									      \
     *statep = saved_state
 
+extern int gconv_init (struct __gconv_step *step);
+int
+gconv_init (struct __gconv_step *step)
+{
+  /* Determine which direction.  */
+  struct utf7_data *new_data;
+  enum direction dir = illegal_dir;
+
+  enum variant var = 0;
+  for (const char *name = names; *name != '\0';
+       name = __rawmemchr (name, '\0') + 1)
+    {
+      if (__strcasecmp (step->__from_name, name) == 0)
+	{
+	  dir = from_utf7;
+	  break;
+	}
+      else if (__strcasecmp (step->__to_name, name) == 0)
+	{
+	  dir = to_utf7;
+	  break;
+	}
+      ++var;
+    }
+
+  if (__builtin_expect (dir, from_utf7) != illegal_dir)
+  {
+      new_data = malloc (sizeof (*new_data));
+      if (new_data == NULL)
+          return __GCONV_NOMEM;
+
+      new_data->dir = dir;
+      new_data->var = var;
+      step->__data = new_data;
+
+      if (dir == from_utf7)
+      {
+          step->__min_needed_from = MIN_NEEDED_FROM;
+          step->__max_needed_from = MAX_NEEDED_FROM;
+          step->__min_needed_to = MIN_NEEDED_TO;
+          step->__max_needed_to = MAX_NEEDED_TO;
+      }
+      else
+      {
+          step->__min_needed_from = MIN_NEEDED_TO;
+          step->__max_needed_from = MAX_NEEDED_TO;
+          step->__min_needed_to = MIN_NEEDED_FROM;
+          step->__max_needed_to = MAX_NEEDED_FROM;
+      }
+  }
+  else
+    return __GCONV_NOCONV;
+
+  step->__stateful = 1;
+
+  return __GCONV_OK;
+}
+
+extern void gconv_end (struct __gconv_step *data);
+void
+gconv_end (struct __gconv_step *data)
+{
+  free (data->__data);
+}
+
+
 
 /* First define the conversion function from UTF-7 to UCS4.
    The state is structured as follows:
@@ -160,13 +265,13 @@  base64 (unsigned int i)
     if ((statep->__count >> 3) == 0)					      \
       {									      \
 	/* base64 encoding inactive.  */				      \
-	if (isxdirect (ch))						      \
+	if (isxdirect (ch, var))					      \
 	  {								      \
 	    inptr++;							      \
 	    put32 (outptr, ch);						      \
 	    outptr += 4;						      \
 	  }								      \
-	else if (__glibc_likely (ch == '+'))				      \
+	else if (__glibc_likely (ch == shift_character(var)))		      \
 	  {								      \
 	    if (__glibc_unlikely (inptr + 2 > inend))			      \
 	      {								      \
@@ -291,7 +396,7 @@  base64 (unsigned int i)
       }									      \
   }
 #define LOOP_NEED_FLAGS
-#define EXTRA_LOOP_DECLS	, mbstate_t *statep
+#define EXTRA_LOOP_DECLS	, mbstate_t *statep, enum variant var
 #include <iconv/loop.c>
 
 
@@ -322,7 +427,7 @@  base64 (unsigned int i)
     if ((statep->__count & 0x18) == 0)					      \
       {									      \
 	/* base64 encoding inactive */					      \
-	if (isdirect (ch))   						      \
+	if (isdirect (ch, var))						      \
 	  {								      \
 	    *outptr++ = (unsigned char) ch;				      \
 	  }								      \
@@ -330,7 +435,7 @@  base64 (unsigned int i)
 	  {								      \
 	    size_t count;						      \
 									      \
-	    if (ch == '+')						      \
+	    if (ch == shift_character(var))				      \
 	      count = 2;						      \
 	    else if (ch < 0x10000)					      \
 	      count = 3;						      \
@@ -345,13 +450,13 @@  base64 (unsigned int i)
 		break;							      \
 	      }								      \
 									      \
-	    *outptr++ = '+';						      \
-	    if (ch == '+')						      \
+	    *outptr++ = shift_character(var);				      \
+	    if (ch == shift_character(var))				      \
 	      *outptr++ = '-';						      \
 	    else if (ch < 0x10000)					      \
 	      {								      \
-		*outptr++ = base64 (ch >> 10);				      \
-		*outptr++ = base64 ((ch >> 4) & 0x3f);			      \
+		*outptr++ = base64 (ch >> 10, var);			      \
+		*outptr++ = base64 ((ch >> 4) & 0x3f, var);		      \
 		statep->__count = ((ch & 15) << 5) | (3 << 3);		      \
 	      }								      \
 	    else if (ch < 0x110000)					      \
@@ -360,11 +465,11 @@  base64 (unsigned int i)
 		uint32_t ch2 = 0xdc00 + ((ch - 0x10000) & 0x3ff);	      \
 									      \
 		ch = (ch1 << 16) | ch2;					      \
-		*outptr++ = base64 (ch >> 26);				      \
-		*outptr++ = base64 ((ch >> 20) & 0x3f);			      \
-		*outptr++ = base64 ((ch >> 14) & 0x3f);			      \
-		*outptr++ = base64 ((ch >> 8) & 0x3f);			      \
-		*outptr++ = base64 ((ch >> 2) & 0x3f);			      \
+		*outptr++ = base64 (ch >> 26, var);			      \
+		*outptr++ = base64 ((ch >> 20) & 0x3f, var);		      \
+		*outptr++ = base64 ((ch >> 14) & 0x3f, var);		      \
+		*outptr++ = base64 ((ch >> 8) & 0x3f, var);		      \
+		*outptr++ = base64 ((ch >> 2) & 0x3f, var);		      \
 		statep->__count = ((ch & 3) << 7) | (2 << 3);		      \
 	      }								      \
 	    else							      \
@@ -374,7 +479,7 @@  base64 (unsigned int i)
     else								      \
       {									      \
 	/* base64 encoding active */					      \
-	if (isdirect (ch))						      \
+	if (isdirect (ch, var))						      \
 	  {								      \
 	    /* deactivate base64 encoding */				      \
 	    size_t count;						      \
@@ -388,7 +493,7 @@  base64 (unsigned int i)
 	      }								      \
 									      \
 	    if ((statep->__count & 0x18) >= 0x10)			      \
-	      *outptr++ = base64 ((statep->__count >> 3) & ~3);		      \
+	      *outptr++ = base64 ((statep->__count >> 3) & ~3, var);	      \
 	    if (needs_explicit_shift (ch))				      \
 	      *outptr++ = '-';						      \
 	    *outptr++ = (unsigned char) ch;				      \
@@ -416,22 +521,24 @@  base64 (unsigned int i)
 		switch ((statep->__count >> 3) & 3)			      \
 		  {							      \
 		  case 1:						      \
-		    *outptr++ = base64 (ch >> 10);			      \
-		    *outptr++ = base64 ((ch >> 4) & 0x3f);		      \
+		    *outptr++ = base64 (ch >> 10, var);			      \
+		    *outptr++ = base64 ((ch >> 4) & 0x3f, var);		      \
 		    statep->__count = ((ch & 15) << 5) | (3 << 3);	      \
 		    break;						      \
 		  case 2:						      \
 		    *outptr++ =						      \
-		      base64 (((statep->__count >> 3) & ~3) | (ch >> 12));    \
-		    *outptr++ = base64 ((ch >> 6) & 0x3f);		      \
-		    *outptr++ = base64 (ch & 0x3f);			      \
+		      base64 (((statep->__count >> 3) & ~3) | (ch >> 12),     \
+			      var);					      \
+		    *outptr++ = base64 ((ch >> 6) & 0x3f, var);		      \
+		    *outptr++ = base64 (ch & 0x3f, var);		      \
 		    statep->__count = (1 << 3);				      \
 		    break;						      \
 		  case 3:						      \
 		    *outptr++ =						      \
-		      base64 (((statep->__count >> 3) & ~3) | (ch >> 14));    \
-		    *outptr++ = base64 ((ch >> 8) & 0x3f);		      \
-		    *outptr++ = base64 ((ch >> 2) & 0x3f);		      \
+		      base64 (((statep->__count >> 3) & ~3) | (ch >> 14),     \
+			      var);					      \
+		    *outptr++ = base64 ((ch >> 8) & 0x3f, var);		      \
+		    *outptr++ = base64 ((ch >> 2) & 0x3f, var);		      \
 		    statep->__count = ((ch & 3) << 7) | (2 << 3);	      \
 		    break;						      \
 		  default:						      \
@@ -447,30 +554,32 @@  base64 (unsigned int i)
 		switch ((statep->__count >> 3) & 3)			      \
 		  {							      \
 		  case 1:						      \
-		    *outptr++ = base64 (ch >> 26);			      \
-		    *outptr++ = base64 ((ch >> 20) & 0x3f);		      \
-		    *outptr++ = base64 ((ch >> 14) & 0x3f);		      \
-		    *outptr++ = base64 ((ch >> 8) & 0x3f);		      \
-		    *outptr++ = base64 ((ch >> 2) & 0x3f);		      \
+		    *outptr++ = base64 (ch >> 26, var);			      \
+		    *outptr++ = base64 ((ch >> 20) & 0x3f, var);	      \
+		    *outptr++ = base64 ((ch >> 14) & 0x3f, var);	      \
+		    *outptr++ = base64 ((ch >> 8) & 0x3f, var);		      \
+		    *outptr++ = base64 ((ch >> 2) & 0x3f, var);		      \
 		    statep->__count = ((ch & 3) << 7) | (2 << 3);	      \
 		    break;						      \
 		  case 2:						      \
 		    *outptr++ =						      \
-		      base64 (((statep->__count >> 3) & ~3) | (ch >> 28));    \
-		    *outptr++ = base64 ((ch >> 22) & 0x3f);		      \
-		    *outptr++ = base64 ((ch >> 16) & 0x3f);		      \
-		    *outptr++ = base64 ((ch >> 10) & 0x3f);		      \
-		    *outptr++ = base64 ((ch >> 4) & 0x3f);		      \
+		      base64 (((statep->__count >> 3) & ~3) | (ch >> 28),     \
+			      var);					      \
+		    *outptr++ = base64 ((ch >> 22) & 0x3f, var);	      \
+		    *outptr++ = base64 ((ch >> 16) & 0x3f, var);	      \
+		    *outptr++ = base64 ((ch >> 10) & 0x3f, var);	      \
+		    *outptr++ = base64 ((ch >> 4) & 0x3f, var);		      \
 		    statep->__count = ((ch & 15) << 5) | (3 << 3);	      \
 		    break;						      \
 		  case 3:						      \
 		    *outptr++ =						      \
-		      base64 (((statep->__count >> 3) & ~3) | (ch >> 30));    \
-		    *outptr++ = base64 ((ch >> 24) & 0x3f);		      \
-		    *outptr++ = base64 ((ch >> 18) & 0x3f);		      \
-		    *outptr++ = base64 ((ch >> 12) & 0x3f);		      \
-		    *outptr++ = base64 ((ch >> 6) & 0x3f);		      \
-		    *outptr++ = base64 (ch & 0x3f);			      \
+		      base64 (((statep->__count >> 3) & ~3) | (ch >> 30),     \
+			      var);					      \
+		    *outptr++ = base64 ((ch >> 24) & 0x3f, var);	      \
+		    *outptr++ = base64 ((ch >> 18) & 0x3f, var);	      \
+		    *outptr++ = base64 ((ch >> 12) & 0x3f, var);	      \
+		    *outptr++ = base64 ((ch >> 6) & 0x3f, var);		      \
+		    *outptr++ = base64 (ch & 0x3f, var);		      \
 		    statep->__count = (1 << 3);				      \
 		    break;						      \
 		  default:						      \
@@ -486,7 +595,7 @@  base64 (unsigned int i)
     inptr += 4;								      \
   }
 #define LOOP_NEED_FLAGS
-#define EXTRA_LOOP_DECLS	, mbstate_t *statep
+#define EXTRA_LOOP_DECLS	, mbstate_t *statep, enum variant var
 #include <iconv/loop.c>
 
 
@@ -516,7 +625,7 @@  base64 (unsigned int i)
 	    {								      \
 	      /* Write out the shift sequence.  */			      \
 	      if ((state & 0x18) >= 0x10)				      \
-		*outbuf++ = base64 ((state >> 3) & ~3);			      \
+		*outbuf++ = base64 ((state >> 3) & ~3, var);		      \
 	      *outbuf++ = '-';						      \
 									      \
 	      data->__statep->__count = 0;				      \