[v1] x86: Add support for building strstr with explicit ISA level
Checks
Context |
Check |
Description |
dj/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
dj/TryBot-32bit |
success
|
Build for i686
|
Commit Message
Small changes for this function as the generic implementation remains
the same for all ISA levels.
Only changes are using the X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P
macros so that some of the checks at least can constant evaluate
and some comments explaining the ISA constraints on the function.
---
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 13 +++++++------
sysdeps/x86_64/multiarch/strstr.c | 10 +++++-----
2 files changed, 12 insertions(+), 11 deletions(-)
Comments
On Tue, Jun 28, 2022 at 8:26 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> Small changes for this function as the generic implementation remains
> the same for all ISA levels.
>
> Only changes are using the X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P
> macros so that some of the checks at least can constant evaluate
> and some comments explaining the ISA constraints on the function.
> ---
> sysdeps/x86_64/multiarch/ifunc-impl-list.c | 13 +++++++------
> sysdeps/x86_64/multiarch/strstr.c | 10 +++++-----
> 2 files changed, 12 insertions(+), 11 deletions(-)
>
> diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> index 0d28319905..a1bff560bc 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> @@ -620,12 +620,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>
> /* Support sysdeps/x86_64/multiarch/strstr.c. */
> IFUNC_IMPL (i, name, strstr,
> - IFUNC_IMPL_ADD (array, i, strstr,
> - (CPU_FEATURE_USABLE (AVX512VL)
> - && CPU_FEATURE_USABLE (AVX512BW)
> - && CPU_FEATURE_USABLE (AVX512DQ)
> - && CPU_FEATURE_USABLE (BMI2)),
> - __strstr_avx512)
> + /* All implementations of strstr are built at all ISA levels. */
> + IFUNC_IMPL_ADD (array, i, strstr,
> + (CPU_FEATURE_USABLE (AVX512VL)
> + && CPU_FEATURE_USABLE (AVX512BW)
> + && CPU_FEATURE_USABLE (AVX512DQ)
> + && CPU_FEATURE_USABLE (BMI2)),
> + __strstr_avx512)
> IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned)
> IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_generic))
>
> diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c
> index 2b83199245..3f86bfa5f2 100644
> --- a/sysdeps/x86_64/multiarch/strstr.c
> +++ b/sysdeps/x86_64/multiarch/strstr.c
> @@ -49,13 +49,13 @@ IFUNC_SELECTOR (void)
> const struct cpu_features *cpu_features = __get_cpu_features ();
>
> if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)
> - && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
> - && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
> - && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
> + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
> + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
> + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
> return __strstr_avx512;
>
> - if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
> + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load, ))
Is Fast_Unaligned_Load set on all processors before? If not, we should
revert
/* Feature(s) enabled when ISA level >= 2. */
#define Fast_Unaligned_Load_X86_ISA_LEVEL 2
> return __strstr_sse2_unaligned;
>
> return __strstr_generic;
> --
> 2.34.1
>
On Tue, Jun 28, 2022 at 11:21 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Tue, Jun 28, 2022 at 8:26 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > Small changes for this function as the generic implementation remains
> > the same for all ISA levels.
> >
> > Only changes are using the X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P
> > macros so that some of the checks at least can constant evaluate
> > and some comments explaining the ISA constraints on the function.
> > ---
> > sysdeps/x86_64/multiarch/ifunc-impl-list.c | 13 +++++++------
> > sysdeps/x86_64/multiarch/strstr.c | 10 +++++-----
> > 2 files changed, 12 insertions(+), 11 deletions(-)
> >
> > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > index 0d28319905..a1bff560bc 100644
> > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > @@ -620,12 +620,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> >
> > /* Support sysdeps/x86_64/multiarch/strstr.c. */
> > IFUNC_IMPL (i, name, strstr,
> > - IFUNC_IMPL_ADD (array, i, strstr,
> > - (CPU_FEATURE_USABLE (AVX512VL)
> > - && CPU_FEATURE_USABLE (AVX512BW)
> > - && CPU_FEATURE_USABLE (AVX512DQ)
> > - && CPU_FEATURE_USABLE (BMI2)),
> > - __strstr_avx512)
> > + /* All implementations of strstr are built at all ISA levels. */
> > + IFUNC_IMPL_ADD (array, i, strstr,
> > + (CPU_FEATURE_USABLE (AVX512VL)
> > + && CPU_FEATURE_USABLE (AVX512BW)
> > + && CPU_FEATURE_USABLE (AVX512DQ)
> > + && CPU_FEATURE_USABLE (BMI2)),
> > + __strstr_avx512)
> > IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned)
> > IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_generic))
> >
> > diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c
> > index 2b83199245..3f86bfa5f2 100644
> > --- a/sysdeps/x86_64/multiarch/strstr.c
> > +++ b/sysdeps/x86_64/multiarch/strstr.c
> > @@ -49,13 +49,13 @@ IFUNC_SELECTOR (void)
> > const struct cpu_features *cpu_features = __get_cpu_features ();
> >
> > if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)
> > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
> > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
> > - && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
> > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
> > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
> > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
> > return __strstr_avx512;
> >
> > - if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
> > + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load, ))
>
> Is Fast_Unaligned_Load set on all processors before? If not, we should
> revert
It's set at ISA level >= 2. AFAICT the reason the bit exists is so that that
CPUs with slow sse42 can fallback on an unaligned sse2 implementation
if it's available as opposed to the generic / often quite expensive aligned
sse2 impl.
Example in strcmp
>
> /* Feature(s) enabled when ISA level >= 2. */
> #define Fast_Unaligned_Load_X86_ISA_LEVEL 2
>
> > return __strstr_sse2_unaligned;
> >
> > return __strstr_generic;
> > --
> > 2.34.1
> >
>
>
> --
> H.J.
On Tue, Jun 28, 2022 at 11:24 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Tue, Jun 28, 2022 at 11:21 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Tue, Jun 28, 2022 at 8:26 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > >
> > > Small changes for this function as the generic implementation remains
> > > the same for all ISA levels.
> > >
> > > Only changes are using the X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P
> > > macros so that some of the checks at least can constant evaluate
> > > and some comments explaining the ISA constraints on the function.
> > > ---
> > > sysdeps/x86_64/multiarch/ifunc-impl-list.c | 13 +++++++------
> > > sysdeps/x86_64/multiarch/strstr.c | 10 +++++-----
> > > 2 files changed, 12 insertions(+), 11 deletions(-)
> > >
> > > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > > index 0d28319905..a1bff560bc 100644
> > > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > > @@ -620,12 +620,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> > >
> > > /* Support sysdeps/x86_64/multiarch/strstr.c. */
> > > IFUNC_IMPL (i, name, strstr,
> > > - IFUNC_IMPL_ADD (array, i, strstr,
> > > - (CPU_FEATURE_USABLE (AVX512VL)
> > > - && CPU_FEATURE_USABLE (AVX512BW)
> > > - && CPU_FEATURE_USABLE (AVX512DQ)
> > > - && CPU_FEATURE_USABLE (BMI2)),
> > > - __strstr_avx512)
> > > + /* All implementations of strstr are built at all ISA levels. */
> > > + IFUNC_IMPL_ADD (array, i, strstr,
> > > + (CPU_FEATURE_USABLE (AVX512VL)
> > > + && CPU_FEATURE_USABLE (AVX512BW)
> > > + && CPU_FEATURE_USABLE (AVX512DQ)
> > > + && CPU_FEATURE_USABLE (BMI2)),
> > > + __strstr_avx512)
> > > IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned)
> > > IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_generic))
> > >
> > > diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c
> > > index 2b83199245..3f86bfa5f2 100644
> > > --- a/sysdeps/x86_64/multiarch/strstr.c
> > > +++ b/sysdeps/x86_64/multiarch/strstr.c
> > > @@ -49,13 +49,13 @@ IFUNC_SELECTOR (void)
> > > const struct cpu_features *cpu_features = __get_cpu_features ();
> > >
> > > if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)
> > > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> > > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
> > > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
> > > - && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
> > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
> > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
> > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
> > > return __strstr_avx512;
> > >
> > > - if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
> > > + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load, ))
> >
> > Is Fast_Unaligned_Load set on all processors before? If not, we should
> > revert
>
> It's set at ISA level >= 2. AFAICT the reason the bit exists is so that that
> CPUs with slow sse42 can fallback on an unaligned sse2 implementation
> if it's available as opposed to the generic / often quite expensive aligned
> sse2 impl.
Is Fast_Unaligned_Load set on Zhaoxin processors?
> Example in strcmp
>
> >
> > /* Feature(s) enabled when ISA level >= 2. */
> > #define Fast_Unaligned_Load_X86_ISA_LEVEL 2
> >
> > > return __strstr_sse2_unaligned;
> > >
> > > return __strstr_generic;
> > > --
> > > 2.34.1
> > >
> >
> >
> > --
> > H.J.
On Tue, Jun 28, 2022 at 11:35 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Tue, Jun 28, 2022 at 11:24 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > On Tue, Jun 28, 2022 at 11:21 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Tue, Jun 28, 2022 at 8:26 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > > >
> > > > Small changes for this function as the generic implementation remains
> > > > the same for all ISA levels.
> > > >
> > > > Only changes are using the X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P
> > > > macros so that some of the checks at least can constant evaluate
> > > > and some comments explaining the ISA constraints on the function.
> > > > ---
> > > > sysdeps/x86_64/multiarch/ifunc-impl-list.c | 13 +++++++------
> > > > sysdeps/x86_64/multiarch/strstr.c | 10 +++++-----
> > > > 2 files changed, 12 insertions(+), 11 deletions(-)
> > > >
> > > > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > > > index 0d28319905..a1bff560bc 100644
> > > > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > > > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > > > @@ -620,12 +620,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> > > >
> > > > /* Support sysdeps/x86_64/multiarch/strstr.c. */
> > > > IFUNC_IMPL (i, name, strstr,
> > > > - IFUNC_IMPL_ADD (array, i, strstr,
> > > > - (CPU_FEATURE_USABLE (AVX512VL)
> > > > - && CPU_FEATURE_USABLE (AVX512BW)
> > > > - && CPU_FEATURE_USABLE (AVX512DQ)
> > > > - && CPU_FEATURE_USABLE (BMI2)),
> > > > - __strstr_avx512)
> > > > + /* All implementations of strstr are built at all ISA levels. */
> > > > + IFUNC_IMPL_ADD (array, i, strstr,
> > > > + (CPU_FEATURE_USABLE (AVX512VL)
> > > > + && CPU_FEATURE_USABLE (AVX512BW)
> > > > + && CPU_FEATURE_USABLE (AVX512DQ)
> > > > + && CPU_FEATURE_USABLE (BMI2)),
> > > > + __strstr_avx512)
> > > > IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned)
> > > > IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_generic))
> > > >
> > > > diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c
> > > > index 2b83199245..3f86bfa5f2 100644
> > > > --- a/sysdeps/x86_64/multiarch/strstr.c
> > > > +++ b/sysdeps/x86_64/multiarch/strstr.c
> > > > @@ -49,13 +49,13 @@ IFUNC_SELECTOR (void)
> > > > const struct cpu_features *cpu_features = __get_cpu_features ();
> > > >
> > > > if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)
> > > > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> > > > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
> > > > - && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
> > > > - && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
> > > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> > > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
> > > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
> > > > + && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
> > > > return __strstr_avx512;
> > > >
> > > > - if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
> > > > + if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load, ))
> > >
> > > Is Fast_Unaligned_Load set on all processors before? If not, we should
> > > revert
> >
> > It's set at ISA level >= 2. AFAICT the reason the bit exists is so that that
> > CPUs with slow sse42 can fallback on an unaligned sse2 implementation
> > if it's available as opposed to the generic / often quite expensive aligned
> > sse2 impl.
>
> Is Fast_Unaligned_Load set on Zhaoxin processors?
No, but I believe we agreed to treat that specially only for
the functions where it really mattered.
I.e for memcpy where this is a meaningful distinction we won't
use the X86_ISA_CPU_FEATURE... checks, we will force
a runtime check.
>
> > Example in strcmp
> >
> > >
> > > /* Feature(s) enabled when ISA level >= 2. */
> > > #define Fast_Unaligned_Load_X86_ISA_LEVEL 2
> > >
> > > > return __strstr_sse2_unaligned;
> > > >
> > > > return __strstr_generic;
> > > > --
> > > > 2.34.1
> > > >
> > >
> > >
> > > --
> > > H.J.
>
>
>
> --
> H.J.
@@ -620,12 +620,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/strstr.c. */
IFUNC_IMPL (i, name, strstr,
- IFUNC_IMPL_ADD (array, i, strstr,
- (CPU_FEATURE_USABLE (AVX512VL)
- && CPU_FEATURE_USABLE (AVX512BW)
- && CPU_FEATURE_USABLE (AVX512DQ)
- && CPU_FEATURE_USABLE (BMI2)),
- __strstr_avx512)
+ /* All implementations of strstr are built at all ISA levels. */
+ IFUNC_IMPL_ADD (array, i, strstr,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (AVX512DQ)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __strstr_avx512)
IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_generic))
@@ -49,13 +49,13 @@ IFUNC_SELECTOR (void)
const struct cpu_features *cpu_features = __get_cpu_features ();
if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)
- && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
- && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
- && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
- && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
+ && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
return __strstr_avx512;
- if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
+ if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load, ))
return __strstr_sse2_unaligned;
return __strstr_generic;