[v1] x86: Fix bug in strchrnul-evex512 [BZ #32078]

Message ID 20240813152914.4183543-1-goldstein.w.n@gmail.com
State Committed
Commit 7da08862471dfec6fdae731c2a5f351ad485c71f
Headers
Series [v1] x86: Fix bug in strchrnul-evex512 [BZ #32078] |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
redhat-pt-bot/TryBot-32bit success Build for i686
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 success Test passed
linaro-tcwg-bot/tcwg_glibc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_glibc_check--master-arm success Test passed

Commit Message

Noah Goldstein Aug. 13, 2024, 3:29 p.m. UTC
  Issue was we were expecting not matches with CHAR before the start of
the string in the page cross case.

The check code in the page cross case:
```
    and    $0xffffffffffffffc0,%rax
    vmovdqa64 (%rax),%zmm17
    vpcmpneqb %zmm17,%zmm16,%k1
    vptestmb %zmm17,%zmm17,%k0{%k1}
    kmovq  %k0,%rax
    inc    %rax
    shr    %cl,%rax
    je     L(continue)
```

expects that all characters that neither match null nor CHAR will be
1s in `rax` prior to the `inc`. Then the `inc` will overflow all of
the 1s where no relevant match was found.

This is incorrect in the page-cross case, as the
`vmovdqa64 (%rax),%zmm17` loads from before the start of the input
string.

If there are matches with CHAR before the start of the string, `rax`
won't properly overflow.

The fix is quite simple. Just replace:

```
    inc    %rax
    shr    %cl,%rax
```
With:
```
    sar    %cl,%rax
    inc    %rax
```

The arithmetic shift will clear any matches prior to the start of the
string while maintaining the signbit so the 1s can properly overflow
to zero in the case of no matches.
---
 string/test-strchr.c                        | 65 ++++++++++++++++++++-
 sysdeps/x86_64/multiarch/strchr-evex-base.S |  8 +--
 2 files changed, 68 insertions(+), 5 deletions(-)
  

Comments

H.J. Lu Aug. 14, 2024, 5:35 p.m. UTC | #1
On Tue, Aug 13, 2024 at 8:29 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> Issue was we were expecting not matches with CHAR before the start of
> the string in the page cross case.
>
> The check code in the page cross case:
> ```
>     and    $0xffffffffffffffc0,%rax
>     vmovdqa64 (%rax),%zmm17
>     vpcmpneqb %zmm17,%zmm16,%k1
>     vptestmb %zmm17,%zmm17,%k0{%k1}
>     kmovq  %k0,%rax
>     inc    %rax
>     shr    %cl,%rax
>     je     L(continue)
> ```
>
> expects that all characters that neither match null nor CHAR will be
> 1s in `rax` prior to the `inc`. Then the `inc` will overflow all of
> the 1s where no relevant match was found.
>
> This is incorrect in the page-cross case, as the
> `vmovdqa64 (%rax),%zmm17` loads from before the start of the input
> string.
>
> If there are matches with CHAR before the start of the string, `rax`
> won't properly overflow.
>
> The fix is quite simple. Just replace:
>
> ```
>     inc    %rax
>     shr    %cl,%rax
> ```
> With:
> ```
>     sar    %cl,%rax
>     inc    %rax
> ```
>
> The arithmetic shift will clear any matches prior to the start of the
> string while maintaining the signbit so the 1s can properly overflow
> to zero in the case of no matches.
> ---
>  string/test-strchr.c                        | 65 ++++++++++++++++++++-
>  sysdeps/x86_64/multiarch/strchr-evex-base.S |  8 +--
>  2 files changed, 68 insertions(+), 5 deletions(-)
>
> diff --git a/string/test-strchr.c b/string/test-strchr.c
> index c795eac6fa..72b17af687 100644
> --- a/string/test-strchr.c
> +++ b/string/test-strchr.c
> @@ -255,6 +255,69 @@ check1 (void)
>      check_result (impl, s, c, exp_result);
>  }
>
> +static void
> +check2 (void)
> +{
> +  CHAR *s = (CHAR *) (buf1 + getpagesize () - 4 * sizeof (CHAR));
> +  CHAR *s_begin = (CHAR *) (buf1 + getpagesize () - 64);
> +#ifndef USE_FOR_STRCHRNUL
> +  CHAR *exp_result = NULL;
> +#else
> +  CHAR *exp_result = s + 1;
> +#endif
> +  CHAR val = 0x12;
> +  for (; s_begin != s; ++s_begin)
> +    *s_begin = val;
> +
> +  s[0] = val + 1;
> +  s[1] = 0;
> +  s[2] = val + 1;
> +  s[3] = val + 1;
> +
> +  {
> +    FOR_EACH_IMPL (impl, 0)
> +      check_result (impl, s, val, exp_result);
> +  }
> +  s[3] = val;
> +  {
> +    FOR_EACH_IMPL (impl, 0)
> +      check_result (impl, s, val, exp_result);
> +  }
> +  exp_result = s;
> +  s[0] = val;
> +  {
> +    FOR_EACH_IMPL (impl, 0)
> +      check_result (impl, s, val, exp_result);
> +  }
> +
> +  s[3] = val + 1;
> +  {
> +    FOR_EACH_IMPL (impl, 0)
> +      check_result (impl, s, val, exp_result);
> +  }
> +
> +  s[0] = val + 1;
> +  s[1] = val + 1;
> +  s[2] = val + 1;
> +  s[3] = val + 1;
> +  s[4] = val;
> +  exp_result = s + 4;
> +  {
> +    FOR_EACH_IMPL (impl, 0)
> +      check_result (impl, s, val, exp_result);
> +  }
> +  s[4] = 0;
> +#ifndef USE_FOR_STRCHRNUL
> +  exp_result = NULL;
> +#else
> +  exp_result = s + 4;
> +#endif
> +  {
> +    FOR_EACH_IMPL (impl, 0)
> +      check_result (impl, s, val, exp_result);
> +  }
> +}
> +
>  int
>  test_main (void)
>  {
> @@ -263,7 +326,7 @@ test_main (void)
>    test_init ();
>
>    check1 ();
> -
> +  check2 ();
>    printf ("%20s", "");
>    FOR_EACH_IMPL (impl, 0)
>      printf ("\t%s", impl->name);
> diff --git a/sysdeps/x86_64/multiarch/strchr-evex-base.S b/sysdeps/x86_64/multiarch/strchr-evex-base.S
> index 04e2c0e79e..3a0b7c9d64 100644
> --- a/sysdeps/x86_64/multiarch/strchr-evex-base.S
> +++ b/sysdeps/x86_64/multiarch/strchr-evex-base.S
> @@ -124,13 +124,13 @@ L(page_cross):
>         VPCMPNE %VMM(1), %VMM(0), %k1
>         VPTEST  %VMM(1), %VMM(1), %k0{%k1}
>         KMOV    %k0, %VRAX
> -# ifdef USE_AS_WCSCHR
> +       sar     %cl, %VRAX
> +#ifdef USE_AS_WCSCHR
>         sub     $VEC_MATCH_MASK, %VRAX
> -# else
> +#else
>         inc     %VRAX
> -# endif
> +#endif
>         /* Ignore number of character for alignment adjustment.  */
> -       shr     %cl, %VRAX
>         jz      L(align_more)
>
>         bsf     %VRAX, %VRAX
> --
> 2.34.1
>

Without the fix,  wcsmbs/test-wcschr string/test-strchr string/test-strchrnul
failed with the new test.

LGTM.

Reviewed-by: H.J. Lu <hjl.tools@gmail.com>

Thanks.
  

Patch

diff --git a/string/test-strchr.c b/string/test-strchr.c
index c795eac6fa..72b17af687 100644
--- a/string/test-strchr.c
+++ b/string/test-strchr.c
@@ -255,6 +255,69 @@  check1 (void)
     check_result (impl, s, c, exp_result);
 }
 
+static void
+check2 (void)
+{
+  CHAR *s = (CHAR *) (buf1 + getpagesize () - 4 * sizeof (CHAR));
+  CHAR *s_begin = (CHAR *) (buf1 + getpagesize () - 64);
+#ifndef USE_FOR_STRCHRNUL
+  CHAR *exp_result = NULL;
+#else
+  CHAR *exp_result = s + 1;
+#endif
+  CHAR val = 0x12;
+  for (; s_begin != s; ++s_begin)
+    *s_begin = val;
+
+  s[0] = val + 1;
+  s[1] = 0;
+  s[2] = val + 1;
+  s[3] = val + 1;
+
+  {
+    FOR_EACH_IMPL (impl, 0)
+      check_result (impl, s, val, exp_result);
+  }
+  s[3] = val;
+  {
+    FOR_EACH_IMPL (impl, 0)
+      check_result (impl, s, val, exp_result);
+  }
+  exp_result = s;
+  s[0] = val;
+  {
+    FOR_EACH_IMPL (impl, 0)
+      check_result (impl, s, val, exp_result);
+  }
+
+  s[3] = val + 1;
+  {
+    FOR_EACH_IMPL (impl, 0)
+      check_result (impl, s, val, exp_result);
+  }
+
+  s[0] = val + 1;
+  s[1] = val + 1;
+  s[2] = val + 1;
+  s[3] = val + 1;
+  s[4] = val;
+  exp_result = s + 4;
+  {
+    FOR_EACH_IMPL (impl, 0)
+      check_result (impl, s, val, exp_result);
+  }
+  s[4] = 0;
+#ifndef USE_FOR_STRCHRNUL
+  exp_result = NULL;
+#else
+  exp_result = s + 4;
+#endif
+  {
+    FOR_EACH_IMPL (impl, 0)
+      check_result (impl, s, val, exp_result);
+  }
+}
+
 int
 test_main (void)
 {
@@ -263,7 +326,7 @@  test_main (void)
   test_init ();
 
   check1 ();
-
+  check2 ();
   printf ("%20s", "");
   FOR_EACH_IMPL (impl, 0)
     printf ("\t%s", impl->name);
diff --git a/sysdeps/x86_64/multiarch/strchr-evex-base.S b/sysdeps/x86_64/multiarch/strchr-evex-base.S
index 04e2c0e79e..3a0b7c9d64 100644
--- a/sysdeps/x86_64/multiarch/strchr-evex-base.S
+++ b/sysdeps/x86_64/multiarch/strchr-evex-base.S
@@ -124,13 +124,13 @@  L(page_cross):
 	VPCMPNE	%VMM(1), %VMM(0), %k1
 	VPTEST	%VMM(1), %VMM(1), %k0{%k1}
 	KMOV	%k0, %VRAX
-# ifdef USE_AS_WCSCHR
+	sar	%cl, %VRAX
+#ifdef USE_AS_WCSCHR
 	sub	$VEC_MATCH_MASK, %VRAX
-# else
+#else
 	inc	%VRAX
-# endif
+#endif
 	/* Ignore number of character for alignment adjustment.  */
-	shr	%cl, %VRAX
 	jz	L(align_more)
 
 	bsf	%VRAX, %VRAX