[2/4] x86-64: Require BMI2 for AVX2 strn(case)cmp and wcsncmp implementations
Checks
Context |
Check |
Description |
dj/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
Commit Message
The AVX2 strncmp, strncasecmp and wcsncmp implementations use the bzhil
instructions, which belongs to the BMI2 CPU feature.
Fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
Partially resolves: BZ #29611
---
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 25 +++++++++++++++------
sysdeps/x86_64/multiarch/ifunc-strcasecmp.h | 1 +
sysdeps/x86_64/multiarch/strncmp.c | 4 ++--
3 files changed, 21 insertions(+), 9 deletions(-)
Comments
On Sat, Oct 1, 2022 at 12:09 PM Aurelien Jarno <aurelien@aurel32.net> wrote:
>
> The AVX2 strncmp, strncasecmp and wcsncmp implementations use the bzhil
> instructions, which belongs to the BMI2 CPU feature.
>
> Fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
> Partially resolves: BZ #29611
> ---
> sysdeps/x86_64/multiarch/ifunc-impl-list.c | 25 +++++++++++++++------
> sysdeps/x86_64/multiarch/ifunc-strcasecmp.h | 1 +
> sysdeps/x86_64/multiarch/strncmp.c | 4 ++--
The ifunc change in strncmp.c and ifunc-strcasecmp.h need to be backport
to 2.33, 2.34, 2.35.
Also separate changes for ifunc need to be backport to strncmp.c:
2.32, 2.31, 2.30, 2.29, 2.28 for a `tzcnt` usage that needs
BMI1.
Finally a corresponding fix is needed for strcmp.c as well (there is
missing BMI2 check in strcmp.c ifunc selection as well as missing
checks in the impl list).
> 3 files changed, 21 insertions(+), 9 deletions(-)
>
> diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> index a71444eccb..ec1a8bff5e 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> @@ -638,13 +638,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> IFUNC_IMPL (i, name, strncasecmp,
> X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp,
> (CPU_FEATURE_USABLE (AVX512VL)
> - && CPU_FEATURE_USABLE (AVX512BW)),
> + && CPU_FEATURE_USABLE (AVX512BW)
> + && CPU_FEATURE_USABLE (BMI2)),
> __strncasecmp_evex)
> X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
> - CPU_FEATURE_USABLE (AVX2),
> + (CPU_FEATURE_USABLE (AVX2)
> + && CPU_FEATURE_USABLE (BMI2)),
> __strncasecmp_avx2)
> X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
> (CPU_FEATURE_USABLE (AVX2)
> + && CPU_FEATURE_USABLE (BMI2)
> && CPU_FEATURE_USABLE (RTM)),
> __strncasecmp_avx2_rtm)
> X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp,
> @@ -660,13 +663,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> IFUNC_IMPL (i, name, strncasecmp_l,
> X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp,
> (CPU_FEATURE_USABLE (AVX512VL)
> - && CPU_FEATURE_USABLE (AVX512BW)),
> + & CPU_FEATURE_USABLE (AVX512BW)
> + && CPU_FEATURE_USABLE (BMI2)),
> __strncasecmp_l_evex)
> X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
> - CPU_FEATURE_USABLE (AVX2),
> + (CPU_FEATURE_USABLE (AVX2)
> + && CPU_FEATURE_USABLE (BMI2),
> __strncasecmp_l_avx2)
> X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
> (CPU_FEATURE_USABLE (AVX2)
> + && CPU_FEATURE_USABLE (BMI2)
> && CPU_FEATURE_USABLE (RTM)),
> __strncasecmp_l_avx2_rtm)
> X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp_l,
> @@ -816,10 +822,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> && CPU_FEATURE_USABLE (BMI2)),
> __wcsncmp_evex)
> X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
> - CPU_FEATURE_USABLE (AVX2),
> + (CPU_FEATURE_USABLE (AVX2)
> + && CPU_FEATURE_USABLE (BMI2)),
> __wcsncmp_avx2)
> X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
> (CPU_FEATURE_USABLE (AVX2)
> + && CPU_FEATURE_USABLE (BMI2)
> && CPU_FEATURE_USABLE (RTM)),
> __wcsncmp_avx2_rtm)
> /* ISA V2 wrapper for GENERIC implementation because the
> @@ -1162,13 +1170,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> IFUNC_IMPL (i, name, strncmp,
> X86_IFUNC_IMPL_ADD_V4 (array, i, strncmp,
> (CPU_FEATURE_USABLE (AVX512VL)
> - && CPU_FEATURE_USABLE (AVX512BW)),
> + && CPU_FEATURE_USABLE (AVX512BW)
> + && CPU_FEATURE_USABLE (BMI2)),
> __strncmp_evex)
> X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp,
> - CPU_FEATURE_USABLE (AVX2),
> + (CPU_FEATURE_USABLE (AVX2)
> + && CPU_FEATURE_USABLE (BMI2)),
> __strncmp_avx2)
> X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp,
> (CPU_FEATURE_USABLE (AVX2)
> + && CPU_FEATURE_USABLE (BMI2)
> && CPU_FEATURE_USABLE (RTM)),
> __strncmp_avx2_rtm)
> X86_IFUNC_IMPL_ADD_V2 (array, i, strncmp,
> diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
> index 68646ef199..7622af259c 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
> @@ -34,6 +34,7 @@ IFUNC_SELECTOR (void)
> const struct cpu_features *cpu_features = __get_cpu_features ();
>
> if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
> + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
> && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> AVX_Fast_Unaligned_Load, ))
> {
> diff --git a/sysdeps/x86_64/multiarch/strncmp.c b/sysdeps/x86_64/multiarch/strncmp.c
> index 4ebe4bde30..c4f8b6bbb5 100644
> --- a/sysdeps/x86_64/multiarch/strncmp.c
> +++ b/sysdeps/x86_64/multiarch/strncmp.c
> @@ -41,12 +41,12 @@ IFUNC_SELECTOR (void)
> const struct cpu_features *cpu_features = __get_cpu_features ();
>
> if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
> + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
> && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> AVX_Fast_Unaligned_Load, ))
> {
> if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> - && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
> - && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
> + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
> return OPTIMIZE (evex);
>
> if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
> --
> 2.35.1
>
On Sat, Oct 1, 2022 at 3:15 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Sat, Oct 1, 2022 at 12:09 PM Aurelien Jarno <aurelien@aurel32.net> wrote:
> >
> > The AVX2 strncmp, strncasecmp and wcsncmp implementations use the bzhil
> > instructions, which belongs to the BMI2 CPU feature.
> >
> > Fixes: b77b06e0e296 ("x86: Optimize strcmp-avx2.S")
> > Partially resolves: BZ #29611
> > ---
> > sysdeps/x86_64/multiarch/ifunc-impl-list.c | 25 +++++++++++++++------
> > sysdeps/x86_64/multiarch/ifunc-strcasecmp.h | 1 +
> > sysdeps/x86_64/multiarch/strncmp.c | 4 ++--
>
> The ifunc change in strncmp.c and ifunc-strcasecmp.h need to be backport
> to 2.33, 2.34, 2.35.
>
> Also separate changes for ifunc need to be backport to strncmp.c:
> 2.32, 2.31, 2.30, 2.29, 2.28 for a `tzcnt` usage that needs
> BMI1.
>
> Finally a corresponding fix is needed for strcmp.c as well (there is
> missing BMI2 check in strcmp.c ifunc selection as well as missing
> checks in the impl list).
Don't reply here. Reply (if needed) in main the [0/4] patch thread just
to keep the conversation contained.
>
> > 3 files changed, 21 insertions(+), 9 deletions(-)
> >
> > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > index a71444eccb..ec1a8bff5e 100644
> > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > @@ -638,13 +638,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> > IFUNC_IMPL (i, name, strncasecmp,
> > X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp,
> > (CPU_FEATURE_USABLE (AVX512VL)
> > - && CPU_FEATURE_USABLE (AVX512BW)),
> > + && CPU_FEATURE_USABLE (AVX512BW)
> > + && CPU_FEATURE_USABLE (BMI2)),
> > __strncasecmp_evex)
> > X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
> > - CPU_FEATURE_USABLE (AVX2),
> > + (CPU_FEATURE_USABLE (AVX2)
> > + && CPU_FEATURE_USABLE (BMI2)),
> > __strncasecmp_avx2)
> > X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
> > (CPU_FEATURE_USABLE (AVX2)
> > + && CPU_FEATURE_USABLE (BMI2)
> > && CPU_FEATURE_USABLE (RTM)),
> > __strncasecmp_avx2_rtm)
> > X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp,
> > @@ -660,13 +663,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> > IFUNC_IMPL (i, name, strncasecmp_l,
> > X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp,
> > (CPU_FEATURE_USABLE (AVX512VL)
> > - && CPU_FEATURE_USABLE (AVX512BW)),
> > + & CPU_FEATURE_USABLE (AVX512BW)
> > + && CPU_FEATURE_USABLE (BMI2)),
> > __strncasecmp_l_evex)
> > X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
> > - CPU_FEATURE_USABLE (AVX2),
> > + (CPU_FEATURE_USABLE (AVX2)
> > + && CPU_FEATURE_USABLE (BMI2),
> > __strncasecmp_l_avx2)
> > X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
> > (CPU_FEATURE_USABLE (AVX2)
> > + && CPU_FEATURE_USABLE (BMI2)
> > && CPU_FEATURE_USABLE (RTM)),
> > __strncasecmp_l_avx2_rtm)
> > X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp_l,
> > @@ -816,10 +822,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> > && CPU_FEATURE_USABLE (BMI2)),
> > __wcsncmp_evex)
> > X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
> > - CPU_FEATURE_USABLE (AVX2),
> > + (CPU_FEATURE_USABLE (AVX2)
> > + && CPU_FEATURE_USABLE (BMI2)),
> > __wcsncmp_avx2)
> > X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
> > (CPU_FEATURE_USABLE (AVX2)
> > + && CPU_FEATURE_USABLE (BMI2)
> > && CPU_FEATURE_USABLE (RTM)),
> > __wcsncmp_avx2_rtm)
> > /* ISA V2 wrapper for GENERIC implementation because the
> > @@ -1162,13 +1170,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> > IFUNC_IMPL (i, name, strncmp,
> > X86_IFUNC_IMPL_ADD_V4 (array, i, strncmp,
> > (CPU_FEATURE_USABLE (AVX512VL)
> > - && CPU_FEATURE_USABLE (AVX512BW)),
> > + && CPU_FEATURE_USABLE (AVX512BW)
> > + && CPU_FEATURE_USABLE (BMI2)),
> > __strncmp_evex)
> > X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp,
> > - CPU_FEATURE_USABLE (AVX2),
> > + (CPU_FEATURE_USABLE (AVX2)
> > + && CPU_FEATURE_USABLE (BMI2)),
> > __strncmp_avx2)
> > X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp,
> > (CPU_FEATURE_USABLE (AVX2)
> > + && CPU_FEATURE_USABLE (BMI2)
> > && CPU_FEATURE_USABLE (RTM)),
> > __strncmp_avx2_rtm)
> > X86_IFUNC_IMPL_ADD_V2 (array, i, strncmp,
> > diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
> > index 68646ef199..7622af259c 100644
> > --- a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
> > +++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
> > @@ -34,6 +34,7 @@ IFUNC_SELECTOR (void)
> > const struct cpu_features *cpu_features = __get_cpu_features ();
> >
> > if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
> > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
> > && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> > AVX_Fast_Unaligned_Load, ))
> > {
> > diff --git a/sysdeps/x86_64/multiarch/strncmp.c b/sysdeps/x86_64/multiarch/strncmp.c
> > index 4ebe4bde30..c4f8b6bbb5 100644
> > --- a/sysdeps/x86_64/multiarch/strncmp.c
> > +++ b/sysdeps/x86_64/multiarch/strncmp.c
> > @@ -41,12 +41,12 @@ IFUNC_SELECTOR (void)
> > const struct cpu_features *cpu_features = __get_cpu_features ();
> >
> > if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
> > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
> > && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> > AVX_Fast_Unaligned_Load, ))
> > {
> > if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> > - && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
> > - && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
> > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
> > return OPTIMIZE (evex);
> >
> > if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
> > --
> > 2.35.1
> >
@@ -638,13 +638,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL (i, name, strncasecmp,
X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp,
(CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)),
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
__strncasecmp_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
- CPU_FEATURE_USABLE (AVX2),
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)),
__strncasecmp_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
(CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__strncasecmp_avx2_rtm)
X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp,
@@ -660,13 +663,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL (i, name, strncasecmp_l,
X86_IFUNC_IMPL_ADD_V4 (array, i, strncasecmp,
(CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)),
+ & CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
__strncasecmp_l_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
- CPU_FEATURE_USABLE (AVX2),
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2),
__strncasecmp_l_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, strncasecmp,
(CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__strncasecmp_l_avx2_rtm)
X86_IFUNC_IMPL_ADD_V2 (array, i, strncasecmp_l,
@@ -816,10 +822,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
&& CPU_FEATURE_USABLE (BMI2)),
__wcsncmp_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
- CPU_FEATURE_USABLE (AVX2),
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)),
__wcsncmp_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, wcsncmp,
(CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__wcsncmp_avx2_rtm)
/* ISA V2 wrapper for GENERIC implementation because the
@@ -1162,13 +1170,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL (i, name, strncmp,
X86_IFUNC_IMPL_ADD_V4 (array, i, strncmp,
(CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)),
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
__strncmp_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp,
- CPU_FEATURE_USABLE (AVX2),
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)),
__strncmp_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, strncmp,
(CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__strncmp_avx2_rtm)
X86_IFUNC_IMPL_ADD_V2 (array, i, strncmp,
@@ -34,6 +34,7 @@ IFUNC_SELECTOR (void)
const struct cpu_features *cpu_features = __get_cpu_features ();
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
AVX_Fast_Unaligned_Load, ))
{
@@ -41,12 +41,12 @@ IFUNC_SELECTOR (void)
const struct cpu_features *cpu_features = __get_cpu_features ();
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
AVX_Fast_Unaligned_Load, ))
{
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
- && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
- && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
return OPTIMIZE (evex);
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))