[v3] x86: Fallback {str|wcs}cmp RTM in the ncmp overflow case [BZ #28896]

Message ID 20220216080935.3284536-1-goldstein.w.n@gmail.com
State Superseded
Headers
Series [v3] x86: Fallback {str|wcs}cmp RTM in the ncmp overflow case [BZ #28896] |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent
dj/TryBot-32bit fail Patch caused testsuite regressions

Commit Message

Noah Goldstein Feb. 16, 2022, 8:09 a.m. UTC
  In the overflow fallback strncmp-avx2-rtm and wcsncmp-avx2-rtm would
call strcmp-avx2 and wcsncmp-avx2 respectively. This would have
not checks around vzeroupper and would trigger spurious
aborts. This commit fixes that.

test-strcmp, test-strncmp, test-wcscmp, and test-wcsncmp all pass on
AVX2 machines with and without RTM.

Co-authored-by: H.J. Lu <hjl.tools@gmail.com>
---
 sysdeps/x86/Makefile                        |  2 +-
 sysdeps/x86/tst-strncmp-rtm.c               | 14 +++++++++++++-
 sysdeps/x86_64/multiarch/strcmp-avx2.S      |  8 ++------
 sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S |  1 +
 sysdeps/x86_64/multiarch/strncmp-avx2.S     |  1 +
 sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S |  2 +-
 sysdeps/x86_64/multiarch/wcsncmp-avx2.S     |  2 +-
 7 files changed, 20 insertions(+), 10 deletions(-)
  

Comments

H.J. Lu Feb. 16, 2022, 1:17 p.m. UTC | #1
On Wed, Feb 16, 2022 at 12:09 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> In the overflow fallback strncmp-avx2-rtm and wcsncmp-avx2-rtm would
> call strcmp-avx2 and wcsncmp-avx2 respectively. This would have
> not checks around vzeroupper and would trigger spurious
> aborts. This commit fixes that.
>
> test-strcmp, test-strncmp, test-wcscmp, and test-wcsncmp all pass on
> AVX2 machines with and without RTM.
>
> Co-authored-by: H.J. Lu <hjl.tools@gmail.com>
> ---
>  sysdeps/x86/Makefile                        |  2 +-
>  sysdeps/x86/tst-strncmp-rtm.c               | 14 +++++++++++++-
>  sysdeps/x86_64/multiarch/strcmp-avx2.S      |  8 ++------
>  sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S |  1 +
>  sysdeps/x86_64/multiarch/strncmp-avx2.S     |  1 +
>  sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S |  2 +-
>  sysdeps/x86_64/multiarch/wcsncmp-avx2.S     |  2 +-
>  7 files changed, 20 insertions(+), 10 deletions(-)
>
> diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
> index 6cf708335c..d110f7b7f2 100644
> --- a/sysdeps/x86/Makefile
> +++ b/sysdeps/x86/Makefile
> @@ -109,7 +109,7 @@ CFLAGS-tst-memset-rtm.c += -mrtm
>  CFLAGS-tst-strchr-rtm.c += -mrtm
>  CFLAGS-tst-strcpy-rtm.c += -mrtm
>  CFLAGS-tst-strlen-rtm.c += -mrtm
> -CFLAGS-tst-strncmp-rtm.c += -mrtm
> +CFLAGS-tst-strncmp-rtm.c += -mrtm -Wno-error
>  CFLAGS-tst-strrchr-rtm.c += -mrtm
>  endif
>
> diff --git a/sysdeps/x86/tst-strncmp-rtm.c b/sysdeps/x86/tst-strncmp-rtm.c
> index 09ed6fa0d6..ebc94a3a6d 100644
> --- a/sysdeps/x86/tst-strncmp-rtm.c
> +++ b/sysdeps/x86/tst-strncmp-rtm.c
> @@ -16,6 +16,7 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> +#include <stdint.h>
>  #include <tst-string-rtm.h>
>
>  #define LOOP 3000
> @@ -45,8 +46,19 @@ function (void)
>      return 1;
>  }
>
> +__attribute__ ((noinline, noclone))
> +static int
> +function_overflow (void)
> +{
> +  if (strncmp (string1, string2, SIZE_MAX) == 0)
> +    return 0;
> +  else
> +    return 1;
> +}
> +
>  static int
>  do_test (void)
>  {
> -  return do_test_1 ("strncmp", LOOP, prepare, function);
> +  return (do_test_1 ("strncmp", LOOP, prepare, function)
> +         || do_test_1 ("strncmp", LOOP, prepare, function_overflow));
>  }
> diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S
> index 99e5349be8..6da0e1a248 100644
> --- a/sysdeps/x86_64/multiarch/strcmp-avx2.S
> +++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S
> @@ -193,10 +193,10 @@ L(ret_zero):
>         .p2align 4,, 5
>  L(one_or_less):
>         jb      L(ret_zero)
> -#  ifdef USE_AS_WCSCMP
>         /* 'nbe' covers the case where length is negative (large
>            unsigned).  */
> -       jnbe    __wcscmp_avx2
> +       jnbe    OVERFLOW_STRCMP
> +#  ifdef USE_AS_WCSCMP
>         movl    (%rdi), %edx
>         xorl    %eax, %eax
>         cmpl    (%rsi), %edx
> @@ -205,10 +205,6 @@ L(one_or_less):
>         negl    %eax
>         orl     $1, %eax
>  #  else
> -       /* 'nbe' covers the case where length is negative (large
> -          unsigned).  */
> -
> -       jnbe    __strcmp_avx2
>         movzbl  (%rdi), %eax
>         movzbl  (%rsi), %ecx
>         subl    %ecx, %eax
> diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S
> index 37d1224bb9..68bad365ba 100644
> --- a/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S
> +++ b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S
> @@ -1,3 +1,4 @@
>  #define STRCMP __strncmp_avx2_rtm
>  #define USE_AS_STRNCMP 1
> +#define OVERFLOW_STRCMP        __strcmp_avx2_rtm
>  #include "strcmp-avx2-rtm.S"
> diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2.S b/sysdeps/x86_64/multiarch/strncmp-avx2.S
> index 1678bcc235..f138e9f1fd 100644
> --- a/sysdeps/x86_64/multiarch/strncmp-avx2.S
> +++ b/sysdeps/x86_64/multiarch/strncmp-avx2.S
> @@ -1,3 +1,4 @@
>  #define STRCMP __strncmp_avx2
>  #define USE_AS_STRNCMP 1
> +#define OVERFLOW_STRCMP __strcmp_avx2
>  #include "strcmp-avx2.S"
> diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S
> index 4e88c70cc6..f467582cbe 100644
> --- a/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S
> +++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S
> @@ -1,5 +1,5 @@
>  #define STRCMP __wcsncmp_avx2_rtm
>  #define USE_AS_STRNCMP 1
>  #define USE_AS_WCSCMP 1
> -
> +#define OVERFLOW_STRCMP        __wcscmp_avx2_rtm
>  #include "strcmp-avx2-rtm.S"
> diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S
> index 4fa1de4d3f..e9ede522b8 100644
> --- a/sysdeps/x86_64/multiarch/wcsncmp-avx2.S
> +++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S
> @@ -1,5 +1,5 @@
>  #define STRCMP __wcsncmp_avx2
>  #define USE_AS_STRNCMP 1
>  #define USE_AS_WCSCMP 1
> -
> +#define OVERFLOW_STRCMP        __wcscmp_avx2
>  #include "strcmp-avx2.S"
> --
> 2.25.1
>

LGTM.

Reviewed-by: H.J. Lu <hjl.tools@gmail.com>

Thanks.
  
Carlos O'Donell Feb. 16, 2022, 2:08 p.m. UTC | #2
On 2/16/22 08:17, H.J. Lu via Libc-alpha wrote:
> On Wed, Feb 16, 2022 at 12:09 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>>
>> In the overflow fallback strncmp-avx2-rtm and wcsncmp-avx2-rtm would
>> call strcmp-avx2 and wcsncmp-avx2 respectively. This would have
>> not checks around vzeroupper and would trigger spurious
>> aborts. This commit fixes that.
>>
>> test-strcmp, test-strncmp, test-wcscmp, and test-wcsncmp all pass on
>> AVX2 machines with and without RTM.
>>
>> Co-authored-by: H.J. Lu <hjl.tools@gmail.com>
>> ---
>>  sysdeps/x86/Makefile                        |  2 +-
>>  sysdeps/x86/tst-strncmp-rtm.c               | 14 +++++++++++++-
>>  sysdeps/x86_64/multiarch/strcmp-avx2.S      |  8 ++------
>>  sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S |  1 +
>>  sysdeps/x86_64/multiarch/strncmp-avx2.S     |  1 +
>>  sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S |  2 +-
>>  sysdeps/x86_64/multiarch/wcsncmp-avx2.S     |  2 +-
>>  7 files changed, 20 insertions(+), 10 deletions(-)
>>
>> diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
>> index 6cf708335c..d110f7b7f2 100644
>> --- a/sysdeps/x86/Makefile
>> +++ b/sysdeps/x86/Makefile
>> @@ -109,7 +109,7 @@ CFLAGS-tst-memset-rtm.c += -mrtm
>>  CFLAGS-tst-strchr-rtm.c += -mrtm
>>  CFLAGS-tst-strcpy-rtm.c += -mrtm
>>  CFLAGS-tst-strlen-rtm.c += -mrtm
>> -CFLAGS-tst-strncmp-rtm.c += -mrtm
>> +CFLAGS-tst-strncmp-rtm.c += -mrtm -Wno-error
>>  CFLAGS-tst-strrchr-rtm.c += -mrtm
>>  endif
>>
>> diff --git a/sysdeps/x86/tst-strncmp-rtm.c b/sysdeps/x86/tst-strncmp-rtm.c
>> index 09ed6fa0d6..ebc94a3a6d 100644
>> --- a/sysdeps/x86/tst-strncmp-rtm.c
>> +++ b/sysdeps/x86/tst-strncmp-rtm.c
>> @@ -16,6 +16,7 @@
>>     License along with the GNU C Library; if not, see
>>     <https://www.gnu.org/licenses/>.  */
>>
>> +#include <stdint.h>
>>  #include <tst-string-rtm.h>
>>
>>  #define LOOP 3000
>> @@ -45,8 +46,19 @@ function (void)
>>      return 1;
>>  }
>>
>> +__attribute__ ((noinline, noclone))
>> +static int
>> +function_overflow (void)
>> +{
>> +  if (strncmp (string1, string2, SIZE_MAX) == 0)
>> +    return 0;
>> +  else
>> +    return 1;
>> +}
>> +
>>  static int
>>  do_test (void)
>>  {
>> -  return do_test_1 ("strncmp", LOOP, prepare, function);
>> +  return (do_test_1 ("strncmp", LOOP, prepare, function)
>> +         || do_test_1 ("strncmp", LOOP, prepare, function_overflow));
>>  }
>> diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S
>> index 99e5349be8..6da0e1a248 100644
>> --- a/sysdeps/x86_64/multiarch/strcmp-avx2.S
>> +++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S
>> @@ -193,10 +193,10 @@ L(ret_zero):
>>         .p2align 4,, 5
>>  L(one_or_less):
>>         jb      L(ret_zero)
>> -#  ifdef USE_AS_WCSCMP
>>         /* 'nbe' covers the case where length is negative (large
>>            unsigned).  */
>> -       jnbe    __wcscmp_avx2
>> +       jnbe    OVERFLOW_STRCMP
>> +#  ifdef USE_AS_WCSCMP
>>         movl    (%rdi), %edx
>>         xorl    %eax, %eax
>>         cmpl    (%rsi), %edx
>> @@ -205,10 +205,6 @@ L(one_or_less):
>>         negl    %eax
>>         orl     $1, %eax
>>  #  else
>> -       /* 'nbe' covers the case where length is negative (large
>> -          unsigned).  */
>> -
>> -       jnbe    __strcmp_avx2
>>         movzbl  (%rdi), %eax
>>         movzbl  (%rsi), %ecx
>>         subl    %ecx, %eax
>> diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S
>> index 37d1224bb9..68bad365ba 100644
>> --- a/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S
>> +++ b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S
>> @@ -1,3 +1,4 @@
>>  #define STRCMP __strncmp_avx2_rtm
>>  #define USE_AS_STRNCMP 1
>> +#define OVERFLOW_STRCMP        __strcmp_avx2_rtm
>>  #include "strcmp-avx2-rtm.S"
>> diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2.S b/sysdeps/x86_64/multiarch/strncmp-avx2.S
>> index 1678bcc235..f138e9f1fd 100644
>> --- a/sysdeps/x86_64/multiarch/strncmp-avx2.S
>> +++ b/sysdeps/x86_64/multiarch/strncmp-avx2.S
>> @@ -1,3 +1,4 @@
>>  #define STRCMP __strncmp_avx2
>>  #define USE_AS_STRNCMP 1
>> +#define OVERFLOW_STRCMP __strcmp_avx2
>>  #include "strcmp-avx2.S"
>> diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S
>> index 4e88c70cc6..f467582cbe 100644
>> --- a/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S
>> +++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S
>> @@ -1,5 +1,5 @@
>>  #define STRCMP __wcsncmp_avx2_rtm
>>  #define USE_AS_STRNCMP 1
>>  #define USE_AS_WCSCMP 1
>> -
>> +#define OVERFLOW_STRCMP        __wcscmp_avx2_rtm
>>  #include "strcmp-avx2-rtm.S"
>> diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S
>> index 4fa1de4d3f..e9ede522b8 100644
>> --- a/sysdeps/x86_64/multiarch/wcsncmp-avx2.S
>> +++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S
>> @@ -1,5 +1,5 @@
>>  #define STRCMP __wcsncmp_avx2
>>  #define USE_AS_STRNCMP 1
>>  #define USE_AS_WCSCMP 1
>> -
>> +#define OVERFLOW_STRCMP        __wcscmp_avx2
>>  #include "strcmp-avx2.S"
>> --
>> 2.25.1
>>
> 
> LGTM.
> 
> Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
> 
> Thanks.
> 

This fails CI for i686.
https://patchwork.sourceware.org/project/glibc/patch/20220216080935.3284536-1-goldstein.w.n@gmail.com/
  
H.J. Lu Feb. 16, 2022, 2:26 p.m. UTC | #3
On Wed, Feb 16, 2022 at 6:08 AM Carlos O'Donell <carlos@redhat.com> wrote:
>
> On 2/16/22 08:17, H.J. Lu via Libc-alpha wrote:
> > On Wed, Feb 16, 2022 at 12:09 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >>
> >> In the overflow fallback strncmp-avx2-rtm and wcsncmp-avx2-rtm would
> >> call strcmp-avx2 and wcsncmp-avx2 respectively. This would have
> >> not checks around vzeroupper and would trigger spurious
> >> aborts. This commit fixes that.
> >>
> >> test-strcmp, test-strncmp, test-wcscmp, and test-wcsncmp all pass on
> >> AVX2 machines with and without RTM.
> >>
> >> Co-authored-by: H.J. Lu <hjl.tools@gmail.com>
> >> ---
> >>  sysdeps/x86/Makefile                        |  2 +-
> >>  sysdeps/x86/tst-strncmp-rtm.c               | 14 +++++++++++++-
> >>  sysdeps/x86_64/multiarch/strcmp-avx2.S      |  8 ++------
> >>  sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S |  1 +
> >>  sysdeps/x86_64/multiarch/strncmp-avx2.S     |  1 +
> >>  sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S |  2 +-
> >>  sysdeps/x86_64/multiarch/wcsncmp-avx2.S     |  2 +-
> >>  7 files changed, 20 insertions(+), 10 deletions(-)
> >>
> >> diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
> >> index 6cf708335c..d110f7b7f2 100644
> >> --- a/sysdeps/x86/Makefile
> >> +++ b/sysdeps/x86/Makefile
> >> @@ -109,7 +109,7 @@ CFLAGS-tst-memset-rtm.c += -mrtm
> >>  CFLAGS-tst-strchr-rtm.c += -mrtm
> >>  CFLAGS-tst-strcpy-rtm.c += -mrtm
> >>  CFLAGS-tst-strlen-rtm.c += -mrtm
> >> -CFLAGS-tst-strncmp-rtm.c += -mrtm
> >> +CFLAGS-tst-strncmp-rtm.c += -mrtm -Wno-error
> >>  CFLAGS-tst-strrchr-rtm.c += -mrtm
> >>  endif
> >>
> >> diff --git a/sysdeps/x86/tst-strncmp-rtm.c b/sysdeps/x86/tst-strncmp-rtm.c
> >> index 09ed6fa0d6..ebc94a3a6d 100644
> >> --- a/sysdeps/x86/tst-strncmp-rtm.c
> >> +++ b/sysdeps/x86/tst-strncmp-rtm.c
> >> @@ -16,6 +16,7 @@
> >>     License along with the GNU C Library; if not, see
> >>     <https://www.gnu.org/licenses/>.  */
> >>
> >> +#include <stdint.h>
> >>  #include <tst-string-rtm.h>
> >>
> >>  #define LOOP 3000
> >> @@ -45,8 +46,19 @@ function (void)
> >>      return 1;
> >>  }
> >>
> >> +__attribute__ ((noinline, noclone))
> >> +static int
> >> +function_overflow (void)
> >> +{
> >> +  if (strncmp (string1, string2, SIZE_MAX) == 0)
> >> +    return 0;
> >> +  else
> >> +    return 1;
> >> +}
> >> +
> >>  static int
> >>  do_test (void)
> >>  {
> >> -  return do_test_1 ("strncmp", LOOP, prepare, function);
> >> +  return (do_test_1 ("strncmp", LOOP, prepare, function)
> >> +         || do_test_1 ("strncmp", LOOP, prepare, function_overflow));
> >>  }
> >> diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S
> >> index 99e5349be8..6da0e1a248 100644
> >> --- a/sysdeps/x86_64/multiarch/strcmp-avx2.S
> >> +++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S
> >> @@ -193,10 +193,10 @@ L(ret_zero):
> >>         .p2align 4,, 5
> >>  L(one_or_less):
> >>         jb      L(ret_zero)
> >> -#  ifdef USE_AS_WCSCMP
> >>         /* 'nbe' covers the case where length is negative (large
> >>            unsigned).  */
> >> -       jnbe    __wcscmp_avx2
> >> +       jnbe    OVERFLOW_STRCMP
> >> +#  ifdef USE_AS_WCSCMP
> >>         movl    (%rdi), %edx
> >>         xorl    %eax, %eax
> >>         cmpl    (%rsi), %edx
> >> @@ -205,10 +205,6 @@ L(one_or_less):
> >>         negl    %eax
> >>         orl     $1, %eax
> >>  #  else
> >> -       /* 'nbe' covers the case where length is negative (large
> >> -          unsigned).  */
> >> -
> >> -       jnbe    __strcmp_avx2
> >>         movzbl  (%rdi), %eax
> >>         movzbl  (%rsi), %ecx
> >>         subl    %ecx, %eax
> >> diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S
> >> index 37d1224bb9..68bad365ba 100644
> >> --- a/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S
> >> +++ b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S
> >> @@ -1,3 +1,4 @@
> >>  #define STRCMP __strncmp_avx2_rtm
> >>  #define USE_AS_STRNCMP 1
> >> +#define OVERFLOW_STRCMP        __strcmp_avx2_rtm
> >>  #include "strcmp-avx2-rtm.S"
> >> diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2.S b/sysdeps/x86_64/multiarch/strncmp-avx2.S
> >> index 1678bcc235..f138e9f1fd 100644
> >> --- a/sysdeps/x86_64/multiarch/strncmp-avx2.S
> >> +++ b/sysdeps/x86_64/multiarch/strncmp-avx2.S
> >> @@ -1,3 +1,4 @@
> >>  #define STRCMP __strncmp_avx2
> >>  #define USE_AS_STRNCMP 1
> >> +#define OVERFLOW_STRCMP __strcmp_avx2
> >>  #include "strcmp-avx2.S"
> >> diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S
> >> index 4e88c70cc6..f467582cbe 100644
> >> --- a/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S
> >> +++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S
> >> @@ -1,5 +1,5 @@
> >>  #define STRCMP __wcsncmp_avx2_rtm
> >>  #define USE_AS_STRNCMP 1
> >>  #define USE_AS_WCSCMP 1
> >> -
> >> +#define OVERFLOW_STRCMP        __wcscmp_avx2_rtm
> >>  #include "strcmp-avx2-rtm.S"
> >> diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S
> >> index 4fa1de4d3f..e9ede522b8 100644
> >> --- a/sysdeps/x86_64/multiarch/wcsncmp-avx2.S
> >> +++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S
> >> @@ -1,5 +1,5 @@
> >>  #define STRCMP __wcsncmp_avx2
> >>  #define USE_AS_STRNCMP 1
> >>  #define USE_AS_WCSCMP 1
> >> -
> >> +#define OVERFLOW_STRCMP        __wcscmp_avx2
> >>  #include "strcmp-avx2.S"
> >> --
> >> 2.25.1
> >>
> >
> > LGTM.
> >
> > Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
> >
> > Thanks.
> >
>
> This fails CI for i686.
> https://patchwork.sourceware.org/project/glibc/patch/20220216080935.3284536-1-goldstein.w.n@gmail.com/
>
> --
> Cheers,
> Carlos.
>

Hi Noah,

We need this change on top of your patch:

diff --git a/sysdeps/x86/tst-strncmp-rtm.c b/sysdeps/x86/tst-strncmp-rtm.c
index ebc94a3a6d..9e20abaacc 100644
--- a/sysdeps/x86/tst-strncmp-rtm.c
+++ b/sysdeps/x86/tst-strncmp-rtm.c
@@ -59,6 +59,9 @@ function_overflow (void)
 static int
 do_test (void)
 {
-  return (do_test_1 ("strncmp", LOOP, prepare, function)
-    || do_test_1 ("strncmp", LOOP, prepare, function_overflow));
+  int status = do_test_1 ("strncmp", LOOP, prepare, function);
+  if (status != EXIT_SUCCESS)
+    return status;
+  status = do_test_1 ("strncmp", LOOP, prepare, function_overflow);
+  return status;
 }
  
Noah Goldstein Feb. 17, 2022, 7:15 p.m. UTC | #4
On Wed, Feb 16, 2022 at 8:27 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Wed, Feb 16, 2022 at 6:08 AM Carlos O'Donell <carlos@redhat.com> wrote:
> >
> > On 2/16/22 08:17, H.J. Lu via Libc-alpha wrote:
> > > On Wed, Feb 16, 2022 at 12:09 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > >>
> > >> In the overflow fallback strncmp-avx2-rtm and wcsncmp-avx2-rtm would
> > >> call strcmp-avx2 and wcsncmp-avx2 respectively. This would have
> > >> not checks around vzeroupper and would trigger spurious
> > >> aborts. This commit fixes that.
> > >>
> > >> test-strcmp, test-strncmp, test-wcscmp, and test-wcsncmp all pass on
> > >> AVX2 machines with and without RTM.
> > >>
> > >> Co-authored-by: H.J. Lu <hjl.tools@gmail.com>
> > >> ---
> > >>  sysdeps/x86/Makefile                        |  2 +-
> > >>  sysdeps/x86/tst-strncmp-rtm.c               | 14 +++++++++++++-
> > >>  sysdeps/x86_64/multiarch/strcmp-avx2.S      |  8 ++------
> > >>  sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S |  1 +
> > >>  sysdeps/x86_64/multiarch/strncmp-avx2.S     |  1 +
> > >>  sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S |  2 +-
> > >>  sysdeps/x86_64/multiarch/wcsncmp-avx2.S     |  2 +-
> > >>  7 files changed, 20 insertions(+), 10 deletions(-)
> > >>
> > >> diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
> > >> index 6cf708335c..d110f7b7f2 100644
> > >> --- a/sysdeps/x86/Makefile
> > >> +++ b/sysdeps/x86/Makefile
> > >> @@ -109,7 +109,7 @@ CFLAGS-tst-memset-rtm.c += -mrtm
> > >>  CFLAGS-tst-strchr-rtm.c += -mrtm
> > >>  CFLAGS-tst-strcpy-rtm.c += -mrtm
> > >>  CFLAGS-tst-strlen-rtm.c += -mrtm
> > >> -CFLAGS-tst-strncmp-rtm.c += -mrtm
> > >> +CFLAGS-tst-strncmp-rtm.c += -mrtm -Wno-error
> > >>  CFLAGS-tst-strrchr-rtm.c += -mrtm
> > >>  endif
> > >>
> > >> diff --git a/sysdeps/x86/tst-strncmp-rtm.c b/sysdeps/x86/tst-strncmp-rtm.c
> > >> index 09ed6fa0d6..ebc94a3a6d 100644
> > >> --- a/sysdeps/x86/tst-strncmp-rtm.c
> > >> +++ b/sysdeps/x86/tst-strncmp-rtm.c
> > >> @@ -16,6 +16,7 @@
> > >>     License along with the GNU C Library; if not, see
> > >>     <https://www.gnu.org/licenses/>.  */
> > >>
> > >> +#include <stdint.h>
> > >>  #include <tst-string-rtm.h>
> > >>
> > >>  #define LOOP 3000
> > >> @@ -45,8 +46,19 @@ function (void)
> > >>      return 1;
> > >>  }
> > >>
> > >> +__attribute__ ((noinline, noclone))
> > >> +static int
> > >> +function_overflow (void)
> > >> +{
> > >> +  if (strncmp (string1, string2, SIZE_MAX) == 0)
> > >> +    return 0;
> > >> +  else
> > >> +    return 1;
> > >> +}
> > >> +
> > >>  static int
> > >>  do_test (void)
> > >>  {
> > >> -  return do_test_1 ("strncmp", LOOP, prepare, function);
> > >> +  return (do_test_1 ("strncmp", LOOP, prepare, function)
> > >> +         || do_test_1 ("strncmp", LOOP, prepare, function_overflow));
> > >>  }
> > >> diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S
> > >> index 99e5349be8..6da0e1a248 100644
> > >> --- a/sysdeps/x86_64/multiarch/strcmp-avx2.S
> > >> +++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S
> > >> @@ -193,10 +193,10 @@ L(ret_zero):
> > >>         .p2align 4,, 5
> > >>  L(one_or_less):
> > >>         jb      L(ret_zero)
> > >> -#  ifdef USE_AS_WCSCMP
> > >>         /* 'nbe' covers the case where length is negative (large
> > >>            unsigned).  */
> > >> -       jnbe    __wcscmp_avx2
> > >> +       jnbe    OVERFLOW_STRCMP
> > >> +#  ifdef USE_AS_WCSCMP
> > >>         movl    (%rdi), %edx
> > >>         xorl    %eax, %eax
> > >>         cmpl    (%rsi), %edx
> > >> @@ -205,10 +205,6 @@ L(one_or_less):
> > >>         negl    %eax
> > >>         orl     $1, %eax
> > >>  #  else
> > >> -       /* 'nbe' covers the case where length is negative (large
> > >> -          unsigned).  */
> > >> -
> > >> -       jnbe    __strcmp_avx2
> > >>         movzbl  (%rdi), %eax
> > >>         movzbl  (%rsi), %ecx
> > >>         subl    %ecx, %eax
> > >> diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S
> > >> index 37d1224bb9..68bad365ba 100644
> > >> --- a/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S
> > >> +++ b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S
> > >> @@ -1,3 +1,4 @@
> > >>  #define STRCMP __strncmp_avx2_rtm
> > >>  #define USE_AS_STRNCMP 1
> > >> +#define OVERFLOW_STRCMP        __strcmp_avx2_rtm
> > >>  #include "strcmp-avx2-rtm.S"
> > >> diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2.S b/sysdeps/x86_64/multiarch/strncmp-avx2.S
> > >> index 1678bcc235..f138e9f1fd 100644
> > >> --- a/sysdeps/x86_64/multiarch/strncmp-avx2.S
> > >> +++ b/sysdeps/x86_64/multiarch/strncmp-avx2.S
> > >> @@ -1,3 +1,4 @@
> > >>  #define STRCMP __strncmp_avx2
> > >>  #define USE_AS_STRNCMP 1
> > >> +#define OVERFLOW_STRCMP __strcmp_avx2
> > >>  #include "strcmp-avx2.S"
> > >> diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S
> > >> index 4e88c70cc6..f467582cbe 100644
> > >> --- a/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S
> > >> +++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S
> > >> @@ -1,5 +1,5 @@
> > >>  #define STRCMP __wcsncmp_avx2_rtm
> > >>  #define USE_AS_STRNCMP 1
> > >>  #define USE_AS_WCSCMP 1
> > >> -
> > >> +#define OVERFLOW_STRCMP        __wcscmp_avx2_rtm
> > >>  #include "strcmp-avx2-rtm.S"
> > >> diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S
> > >> index 4fa1de4d3f..e9ede522b8 100644
> > >> --- a/sysdeps/x86_64/multiarch/wcsncmp-avx2.S
> > >> +++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S
> > >> @@ -1,5 +1,5 @@
> > >>  #define STRCMP __wcsncmp_avx2
> > >>  #define USE_AS_STRNCMP 1
> > >>  #define USE_AS_WCSCMP 1
> > >> -
> > >> +#define OVERFLOW_STRCMP        __wcscmp_avx2
> > >>  #include "strcmp-avx2.S"
> > >> --
> > >> 2.25.1
> > >>
> > >
> > > LGTM.
> > >
> > > Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
> > >
> > > Thanks.
> > >
> >
> > This fails CI for i686.
> > https://patchwork.sourceware.org/project/glibc/patch/20220216080935.3284536-1-goldstein.w.n@gmail.com/
> >
> > --
> > Cheers,
> > Carlos.
> >
>
> Hi Noah,
>
> We need this change on top of your patch:
>
> diff --git a/sysdeps/x86/tst-strncmp-rtm.c b/sysdeps/x86/tst-strncmp-rtm.c
> index ebc94a3a6d..9e20abaacc 100644
> --- a/sysdeps/x86/tst-strncmp-rtm.c
> +++ b/sysdeps/x86/tst-strncmp-rtm.c
> @@ -59,6 +59,9 @@ function_overflow (void)
>  static int
>  do_test (void)
>  {
> -  return (do_test_1 ("strncmp", LOOP, prepare, function)
> -    || do_test_1 ("strncmp", LOOP, prepare, function_overflow));
> +  int status = do_test_1 ("strncmp", LOOP, prepare, function);
> +  if (status != EXIT_SUCCESS)
> +    return status;
> +  status = do_test_1 ("strncmp", LOOP, prepare, function_overflow);
> +  return status;
>  }

Added your fix in V5.
>
> --
> H.J.
  

Patch

diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
index 6cf708335c..d110f7b7f2 100644
--- a/sysdeps/x86/Makefile
+++ b/sysdeps/x86/Makefile
@@ -109,7 +109,7 @@  CFLAGS-tst-memset-rtm.c += -mrtm
 CFLAGS-tst-strchr-rtm.c += -mrtm
 CFLAGS-tst-strcpy-rtm.c += -mrtm
 CFLAGS-tst-strlen-rtm.c += -mrtm
-CFLAGS-tst-strncmp-rtm.c += -mrtm
+CFLAGS-tst-strncmp-rtm.c += -mrtm -Wno-error
 CFLAGS-tst-strrchr-rtm.c += -mrtm
 endif
 
diff --git a/sysdeps/x86/tst-strncmp-rtm.c b/sysdeps/x86/tst-strncmp-rtm.c
index 09ed6fa0d6..ebc94a3a6d 100644
--- a/sysdeps/x86/tst-strncmp-rtm.c
+++ b/sysdeps/x86/tst-strncmp-rtm.c
@@ -16,6 +16,7 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
+#include <stdint.h>
 #include <tst-string-rtm.h>
 
 #define LOOP 3000
@@ -45,8 +46,19 @@  function (void)
     return 1;
 }
 
+__attribute__ ((noinline, noclone))
+static int
+function_overflow (void)
+{
+  if (strncmp (string1, string2, SIZE_MAX) == 0)
+    return 0;
+  else
+    return 1;
+}
+
 static int
 do_test (void)
 {
-  return do_test_1 ("strncmp", LOOP, prepare, function);
+  return (do_test_1 ("strncmp", LOOP, prepare, function)
+	  || do_test_1 ("strncmp", LOOP, prepare, function_overflow));
 }
diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S
index 99e5349be8..6da0e1a248 100644
--- a/sysdeps/x86_64/multiarch/strcmp-avx2.S
+++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S
@@ -193,10 +193,10 @@  L(ret_zero):
 	.p2align 4,, 5
 L(one_or_less):
 	jb	L(ret_zero)
-#  ifdef USE_AS_WCSCMP
 	/* 'nbe' covers the case where length is negative (large
 	   unsigned).  */
-	jnbe	__wcscmp_avx2
+	jnbe	OVERFLOW_STRCMP
+#  ifdef USE_AS_WCSCMP
 	movl	(%rdi), %edx
 	xorl	%eax, %eax
 	cmpl	(%rsi), %edx
@@ -205,10 +205,6 @@  L(one_or_less):
 	negl	%eax
 	orl	$1, %eax
 #  else
-	/* 'nbe' covers the case where length is negative (large
-	   unsigned).  */
-
-	jnbe	__strcmp_avx2
 	movzbl	(%rdi), %eax
 	movzbl	(%rsi), %ecx
 	subl	%ecx, %eax
diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S
index 37d1224bb9..68bad365ba 100644
--- a/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S
+++ b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S
@@ -1,3 +1,4 @@ 
 #define STRCMP	__strncmp_avx2_rtm
 #define USE_AS_STRNCMP 1
+#define OVERFLOW_STRCMP	__strcmp_avx2_rtm
 #include "strcmp-avx2-rtm.S"
diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2.S b/sysdeps/x86_64/multiarch/strncmp-avx2.S
index 1678bcc235..f138e9f1fd 100644
--- a/sysdeps/x86_64/multiarch/strncmp-avx2.S
+++ b/sysdeps/x86_64/multiarch/strncmp-avx2.S
@@ -1,3 +1,4 @@ 
 #define STRCMP	__strncmp_avx2
 #define USE_AS_STRNCMP 1
+#define OVERFLOW_STRCMP __strcmp_avx2
 #include "strcmp-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S
index 4e88c70cc6..f467582cbe 100644
--- a/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S
+++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S
@@ -1,5 +1,5 @@ 
 #define STRCMP __wcsncmp_avx2_rtm
 #define USE_AS_STRNCMP 1
 #define USE_AS_WCSCMP 1
-
+#define OVERFLOW_STRCMP	__wcscmp_avx2_rtm
 #include "strcmp-avx2-rtm.S"
diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S
index 4fa1de4d3f..e9ede522b8 100644
--- a/sysdeps/x86_64/multiarch/wcsncmp-avx2.S
+++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S
@@ -1,5 +1,5 @@ 
 #define STRCMP __wcsncmp_avx2
 #define USE_AS_STRNCMP 1
 #define USE_AS_WCSCMP 1
-
+#define OVERFLOW_STRCMP	__wcscmp_avx2
 #include "strcmp-avx2.S"