[4/4] x86-64: Require LZCNT for AVX2 memrchr implementation
Checks
Context |
Check |
Description |
dj/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
dj/TryBot-32bit |
success
|
Build for i686
|
Commit Message
The AVX2 memrchr implementation uses the lzcntl and lzcntq instructions,
which belongs to the LZCNT CPU feature.
Fixes: af5306a735eb ("x86: Optimize memrchr-avx2.S")
Partially resolves: BZ #29611
---
sysdeps/x86_64/multiarch/ifunc-avx2.h | 1 +
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 7 +++++--
2 files changed, 6 insertions(+), 2 deletions(-)
Comments
On Sat, Oct 1, 2022 at 12:09 PM Aurelien Jarno <aurelien@aurel32.net> wrote:
>
> The AVX2 memrchr implementation uses the lzcntl and lzcntq instructions,
> which belongs to the LZCNT CPU feature.
>
> Fixes: af5306a735eb ("x86: Optimize memrchr-avx2.S")
> Partially resolves: BZ #29611
> ---
> sysdeps/x86_64/multiarch/ifunc-avx2.h | 1 +
> sysdeps/x86_64/multiarch/ifunc-impl-list.c | 7 +++++--
> 2 files changed, 6 insertions(+), 2 deletions(-)
>
> diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h
> index a57a9952f3..f1741083fd 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-avx2.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h
> @@ -37,6 +37,7 @@ IFUNC_SELECTOR (void)
>
> if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
> && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
> + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
> && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> AVX_Fast_Unaligned_Load, ))
> {
> diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> index c628462d47..db5a2032d6 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> @@ -209,13 +209,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> IFUNC_IMPL (i, name, memrchr,
> X86_IFUNC_IMPL_ADD_V4 (array, i, memrchr,
> (CPU_FEATURE_USABLE (AVX512VL)
> - && CPU_FEATURE_USABLE (AVX512BW)),
> + && CPU_FEATURE_USABLE (AVX512BW)
> + && CPU_FEATURE_USABLE (LZCNT)),
Also needs BMI2 for the `shlx`. Likewise for avx2 versions.
> __memrchr_evex)
> X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr,
> - CPU_FEATURE_USABLE (AVX2),
> + (CPU_FEATURE_USABLE (AVX2)
> + && CPU_FEATURE_USABLE (LZCNT)),
> __memrchr_avx2)
> X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr,
> (CPU_FEATURE_USABLE (AVX2)
> + && CPU_FEATURE_USABLE (LZCNT)
> && CPU_FEATURE_USABLE (RTM)),
> __memrchr_avx2_rtm)
> /* ISA V2 wrapper for SSE2 implementation because the SSE2
> --
> 2.35.1
>
On 2022-10-01 15:06, Noah Goldstein wrote:
> On Sat, Oct 1, 2022 at 12:09 PM Aurelien Jarno <aurelien@aurel32.net> wrote:
> >
> > The AVX2 memrchr implementation uses the lzcntl and lzcntq instructions,
> > which belongs to the LZCNT CPU feature.
> >
> > Fixes: af5306a735eb ("x86: Optimize memrchr-avx2.S")
> > Partially resolves: BZ #29611
> > ---
> > sysdeps/x86_64/multiarch/ifunc-avx2.h | 1 +
> > sysdeps/x86_64/multiarch/ifunc-impl-list.c | 7 +++++--
> > 2 files changed, 6 insertions(+), 2 deletions(-)
> >
> > diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h
> > index a57a9952f3..f1741083fd 100644
> > --- a/sysdeps/x86_64/multiarch/ifunc-avx2.h
> > +++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h
> > @@ -37,6 +37,7 @@ IFUNC_SELECTOR (void)
> >
> > if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
> > && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
> > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
> > && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> > AVX_Fast_Unaligned_Load, ))
> > {
> > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > index c628462d47..db5a2032d6 100644
> > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > @@ -209,13 +209,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> > IFUNC_IMPL (i, name, memrchr,
> > X86_IFUNC_IMPL_ADD_V4 (array, i, memrchr,
> > (CPU_FEATURE_USABLE (AVX512VL)
> > - && CPU_FEATURE_USABLE (AVX512BW)),
> > + && CPU_FEATURE_USABLE (AVX512BW)
> > + && CPU_FEATURE_USABLE (LZCNT)),
>
> Also needs BMI2 for the `shlx`. Likewise for avx2 versions.
Good catch, I haven't look for that one, so I haven't encountered the
issue. Similarly there is 'shrx'.
On Sat, Oct 1, 2022 at 12:09 PM Aurelien Jarno <aurelien@aurel32.net> wrote:
>
> The AVX2 memrchr implementation uses the lzcntl and lzcntq instructions,
> which belongs to the LZCNT CPU feature.
>
> Fixes: af5306a735eb ("x86: Optimize memrchr-avx2.S")
> Partially resolves: BZ #29611
> ---
> sysdeps/x86_64/multiarch/ifunc-avx2.h | 1 +
> sysdeps/x86_64/multiarch/ifunc-impl-list.c | 7 +++++--
> 2 files changed, 6 insertions(+), 2 deletions(-)
>
> diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h
> index a57a9952f3..f1741083fd 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-avx2.h
> +++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h
> @@ -37,6 +37,7 @@ IFUNC_SELECTOR (void)
>
> if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
> && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
> + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
This causes a build failure. Need a corresponding macro in
sysdeps/x86/isa-level.h
Something like:
#define LZCNT_X86_ISA_LEVEL 3
after the BMI2 one.
> && X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
> AVX_Fast_Unaligned_Load, ))
> {
> diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> index c628462d47..db5a2032d6 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> @@ -209,13 +209,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> IFUNC_IMPL (i, name, memrchr,
> X86_IFUNC_IMPL_ADD_V4 (array, i, memrchr,
> (CPU_FEATURE_USABLE (AVX512VL)
> - && CPU_FEATURE_USABLE (AVX512BW)),
> + && CPU_FEATURE_USABLE (AVX512BW)
> + && CPU_FEATURE_USABLE (LZCNT)),
> __memrchr_evex)
> X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr,
> - CPU_FEATURE_USABLE (AVX2),
> + (CPU_FEATURE_USABLE (AVX2)
> + && CPU_FEATURE_USABLE (LZCNT)),
> __memrchr_avx2)
> X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr,
> (CPU_FEATURE_USABLE (AVX2)
> + && CPU_FEATURE_USABLE (LZCNT)
> && CPU_FEATURE_USABLE (RTM)),
> __memrchr_avx2_rtm)
> /* ISA V2 wrapper for SSE2 implementation because the SSE2
> --
> 2.35.1
>
@@ -37,6 +37,7 @@ IFUNC_SELECTOR (void)
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX2)
&& X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, LZCNT)
&& X86_ISA_CPU_FEATURES_ARCH_P (cpu_features,
AVX_Fast_Unaligned_Load, ))
{
@@ -209,13 +209,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL (i, name, memrchr,
X86_IFUNC_IMPL_ADD_V4 (array, i, memrchr,
(CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)),
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (LZCNT)),
__memrchr_evex)
X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr,
- CPU_FEATURE_USABLE (AVX2),
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (LZCNT)),
__memrchr_avx2)
X86_IFUNC_IMPL_ADD_V3 (array, i, memrchr,
(CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (LZCNT)
&& CPU_FEATURE_USABLE (RTM)),
__memrchr_avx2_rtm)
/* ISA V2 wrapper for SSE2 implementation because the SSE2