[v4,2/4] iconv: Better mapping to RFC for UTF-7
Checks
Context |
Check |
Description |
dj/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
Commit Message
- Direct use of characters instead of arcane arrays
- isxbase64 is not the Modified BASE64 alphabet, but the characters who
needs to trigger an explicit shift back to US-ASCII. Make that clearer
Signed-off-by: Max Gautier <mg@max.gautier.name>
---
iconvdata/utf-7.c | 56 +++++++++++++++++++++++++++--------------------
1 file changed, 32 insertions(+), 24 deletions(-)
Comments
On 09/12/2021 06:31, Max Gautier via Libc-alpha wrote:
> - Direct use of characters instead of arcane arrays
> - isxbase64 is not the Modified BASE64 alphabet, but the characters who
> needs to trigger an explicit shift back to US-ASCII. Make that clearer
>
> Signed-off-by: Max Gautier <mg@max.gautier.name>
LGTM with style fixes below.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
> ---
> iconvdata/utf-7.c | 56 +++++++++++++++++++++++++++--------------------
> 1 file changed, 32 insertions(+), 24 deletions(-)
>
> diff --git a/iconvdata/utf-7.c b/iconvdata/utf-7.c
> index 9ba0974959..ac7d78141a 100644
> --- a/iconvdata/utf-7.c
> +++ b/iconvdata/utf-7.c
> @@ -30,20 +30,27 @@
>
>
>
> +static int
> +between(uint32_t const ch,
Space before '(') and for other usages below.. Also 'const' does not change much
here.
> + uint32_t const lower_bound, uint32_t const upper_bound)
> +{
> + return (ch >= lower_bound && ch <= upper_bound);
> +}
> +
> /* The set of "direct characters":
> A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr
> */
>
> -static const unsigned char direct_tab[128 / 8] =
> - {
> - 0x00, 0x26, 0x00, 0x00, 0x81, 0xf3, 0xff, 0x87,
> - 0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, 0x07
> - };
> -
> static int
> isdirect (uint32_t ch)
> {
> - return (ch < 128 && ((direct_tab[ch >> 3] >> (ch & 7)) & 1));
> + return (between(ch, 'A', 'Z')
Ok, it is indeed clear.
> + || between(ch, 'a', 'z')
> + || between(ch, '0', '9')
> + || ch == '\'' || ch == '(' || ch == ')'
> + || between(ch, ',', '/')
> + || ch == ':' || ch == '?'
> + || ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
> }
>
>
> @@ -52,33 +59,33 @@ isdirect (uint32_t ch)
> ! " # $ % & * ; < = > @ [ ] ^ _ ` { | }
> */
>
> -static const unsigned char xdirect_tab[128 / 8] =
> - {
> - 0x00, 0x26, 0x00, 0x00, 0xff, 0xf7, 0xff, 0xff,
> - 0xff, 0xff, 0xff, 0xef, 0xff, 0xff, 0xff, 0x3f
> - };
>
> static int
> isxdirect (uint32_t ch)
> {
> - return (ch < 128 && ((xdirect_tab[ch >> 3] >> (ch & 7)) & 1));
> + return (ch == '\t'
> + || ch == '\n'
> + || ch == '\r'
> + || (between(ch, ' ','}')
> + && ch != '+' && ch != '\\')
> + );
> }
>
>
Ok.
> -/* The set of "extended base64 characters":
> +/* Characters which needs to trigger an explicit shift back to US-ASCII (UTF-7
> + only): Modified base64 + '-' (shift back character)
> A-Z a-z 0-9 + / -
> */
>
> -static const unsigned char xbase64_tab[128 / 8] =
> - {
> - 0x00, 0x00, 0x00, 0x00, 0x00, 0xa8, 0xff, 0x03,
> - 0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, 0x07
> - };
> -
> static int
> -isxbase64 (uint32_t ch)
> +needs_explicit_shift (uint32_t ch)
> {
> - return (ch < 128 && ((xbase64_tab[ch >> 3] >> (ch & 7)) & 1));
> + return (between(ch, 'A', 'Z')
> + || between(ch, 'a', 'z')
> + || between(ch, '/', '9')
> + || ch == '+'
> + || ch == '-'
> + );
> }
>
>
Ok.
> @@ -372,7 +379,8 @@ base64 (unsigned int i)
> /* deactivate base64 encoding */ \
> size_t count; \
> \
> - count = ((statep->__count & 0x18) >= 0x10) + isxbase64 (ch) + 1; \
> + count = ((statep->__count & 0x18) >= 0x10) \
> + + needs_explicit_shift (ch) + 1; \
> if (__glibc_unlikely (outptr + count > outend)) \
> { \
> result = __GCONV_FULL_OUTPUT; \
> @@ -381,7 +389,7 @@ base64 (unsigned int i)
> \
> if ((statep->__count & 0x18) >= 0x10) \
> *outptr++ = base64 ((statep->__count >> 3) & ~3); \
> - if (isxbase64 (ch)) \
> + if (needs_explicit_shift (ch)) \
> *outptr++ = '-'; \
> *outptr++ = (unsigned char) ch; \
> statep->__count = 0; \
Ok, it just change the function name.
@@ -30,20 +30,27 @@
+static int
+between(uint32_t const ch,
+ uint32_t const lower_bound, uint32_t const upper_bound)
+{
+ return (ch >= lower_bound && ch <= upper_bound);
+}
+
/* The set of "direct characters":
A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr
*/
-static const unsigned char direct_tab[128 / 8] =
- {
- 0x00, 0x26, 0x00, 0x00, 0x81, 0xf3, 0xff, 0x87,
- 0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, 0x07
- };
-
static int
isdirect (uint32_t ch)
{
- return (ch < 128 && ((direct_tab[ch >> 3] >> (ch & 7)) & 1));
+ return (between(ch, 'A', 'Z')
+ || between(ch, 'a', 'z')
+ || between(ch, '0', '9')
+ || ch == '\'' || ch == '(' || ch == ')'
+ || between(ch, ',', '/')
+ || ch == ':' || ch == '?'
+ || ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
}
@@ -52,33 +59,33 @@ isdirect (uint32_t ch)
! " # $ % & * ; < = > @ [ ] ^ _ ` { | }
*/
-static const unsigned char xdirect_tab[128 / 8] =
- {
- 0x00, 0x26, 0x00, 0x00, 0xff, 0xf7, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xef, 0xff, 0xff, 0xff, 0x3f
- };
static int
isxdirect (uint32_t ch)
{
- return (ch < 128 && ((xdirect_tab[ch >> 3] >> (ch & 7)) & 1));
+ return (ch == '\t'
+ || ch == '\n'
+ || ch == '\r'
+ || (between(ch, ' ','}')
+ && ch != '+' && ch != '\\')
+ );
}
-/* The set of "extended base64 characters":
+/* Characters which needs to trigger an explicit shift back to US-ASCII (UTF-7
+ only): Modified base64 + '-' (shift back character)
A-Z a-z 0-9 + / -
*/
-static const unsigned char xbase64_tab[128 / 8] =
- {
- 0x00, 0x00, 0x00, 0x00, 0x00, 0xa8, 0xff, 0x03,
- 0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, 0x07
- };
-
static int
-isxbase64 (uint32_t ch)
+needs_explicit_shift (uint32_t ch)
{
- return (ch < 128 && ((xbase64_tab[ch >> 3] >> (ch & 7)) & 1));
+ return (between(ch, 'A', 'Z')
+ || between(ch, 'a', 'z')
+ || between(ch, '/', '9')
+ || ch == '+'
+ || ch == '-'
+ );
}
@@ -372,7 +379,8 @@ base64 (unsigned int i)
/* deactivate base64 encoding */ \
size_t count; \
\
- count = ((statep->__count & 0x18) >= 0x10) + isxbase64 (ch) + 1; \
+ count = ((statep->__count & 0x18) >= 0x10) \
+ + needs_explicit_shift (ch) + 1; \
if (__glibc_unlikely (outptr + count > outend)) \
{ \
result = __GCONV_FULL_OUTPUT; \
@@ -381,7 +389,7 @@ base64 (unsigned int i)
\
if ((statep->__count & 0x18) >= 0x10) \
*outptr++ = base64 ((statep->__count >> 3) & ~3); \
- if (isxbase64 (ch)) \
+ if (needs_explicit_shift (ch)) \
*outptr++ = '-'; \
*outptr++ = (unsigned char) ch; \
statep->__count = 0; \