[v1] x86: Add support for building strstr with explicit ISA level

Message ID 20220628152628.17802-2-goldstein.w.n@gmail.com
State Dropped
Headers
Series [v1] x86: Add support for building strstr with explicit ISA level |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent
dj/TryBot-32bit success Build for i686

Commit Message

Noah Goldstein June 28, 2022, 3:26 p.m. UTC
  Small changes for this function as the generic implementation remains
the same for all ISA levels.

Only changes are using the X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P
macros so that some of the checks at least can constant evaluate
and some comments explaining the ISA constraints on the function.
---
 sysdeps/x86_64/multiarch/ifunc-impl-list.c | 13 +++++++------
 sysdeps/x86_64/multiarch/strstr.c          | 10 +++++-----
 2 files changed, 12 insertions(+), 11 deletions(-)
  

Comments

H.J. Lu June 28, 2022, 6:20 p.m. UTC | #1
On Tue, Jun 28, 2022 at 8:26 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> Small changes for this function as the generic implementation remains
> the same for all ISA levels.
>
> Only changes are using the X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P
> macros so that some of the checks at least can constant evaluate
> and some comments explaining the ISA constraints on the function.
> ---
>  sysdeps/x86_64/multiarch/ifunc-impl-list.c | 13 +++++++------
>  sysdeps/x86_64/multiarch/strstr.c          | 10 +++++-----
>  2 files changed, 12 insertions(+), 11 deletions(-)
>
> diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> index 0d28319905..a1bff560bc 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> @@ -620,12 +620,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>
>    /* Support sysdeps/x86_64/multiarch/strstr.c.  */
>    IFUNC_IMPL (i, name, strstr,
> -              IFUNC_IMPL_ADD (array, i, strstr,
> -                              (CPU_FEATURE_USABLE (AVX512VL)
> -                               && CPU_FEATURE_USABLE (AVX512BW)
> -                               && CPU_FEATURE_USABLE (AVX512DQ)
> -                               && CPU_FEATURE_USABLE (BMI2)),
> -                              __strstr_avx512)
> +             /* All implementations of strstr are built at all ISA levels.  */
> +             IFUNC_IMPL_ADD (array, i, strstr,
> +                             (CPU_FEATURE_USABLE (AVX512VL)
> +                              && CPU_FEATURE_USABLE (AVX512BW)
> +                              && CPU_FEATURE_USABLE (AVX512DQ)
> +                              && CPU_FEATURE_USABLE (BMI2)),
> +                              __strstr_avx512)
>               IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned)
>               IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_generic))
>
> diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c
> index 2b83199245..3f86bfa5f2 100644
> --- a/sysdeps/x86_64/multiarch/strstr.c
> +++ b/sysdeps/x86_64/multiarch/strstr.c
> @@ -49,13 +49,13 @@ IFUNC_SELECTOR (void)
>    const struct cpu_features *cpu_features = __get_cpu_features ();
>
>    if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)
> -      && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> -      && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
> -      && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
> -      && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
> +      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> +      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
> +      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
> +      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
>      return __strstr_avx512;
>
> -  if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
> +  if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load, ))

Is Fast_Unaligned_Load set on all processors before? If not, we should
revert

/* Feature(s) enabled when ISA level >= 2.  */
#define Fast_Unaligned_Load_X86_ISA_LEVEL 2

>      return __strstr_sse2_unaligned;
>
>    return __strstr_generic;
> --
> 2.34.1
>
  
Noah Goldstein June 28, 2022, 6:24 p.m. UTC | #2
On Tue, Jun 28, 2022 at 11:21 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Tue, Jun 28, 2022 at 8:26 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > Small changes for this function as the generic implementation remains
> > the same for all ISA levels.
> >
> > Only changes are using the X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P
> > macros so that some of the checks at least can constant evaluate
> > and some comments explaining the ISA constraints on the function.
> > ---
> >  sysdeps/x86_64/multiarch/ifunc-impl-list.c | 13 +++++++------
> >  sysdeps/x86_64/multiarch/strstr.c          | 10 +++++-----
> >  2 files changed, 12 insertions(+), 11 deletions(-)
> >
> > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > index 0d28319905..a1bff560bc 100644
> > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > @@ -620,12 +620,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> >
> >    /* Support sysdeps/x86_64/multiarch/strstr.c.  */
> >    IFUNC_IMPL (i, name, strstr,
> > -              IFUNC_IMPL_ADD (array, i, strstr,
> > -                              (CPU_FEATURE_USABLE (AVX512VL)
> > -                               && CPU_FEATURE_USABLE (AVX512BW)
> > -                               && CPU_FEATURE_USABLE (AVX512DQ)
> > -                               && CPU_FEATURE_USABLE (BMI2)),
> > -                              __strstr_avx512)
> > +             /* All implementations of strstr are built at all ISA levels.  */
> > +             IFUNC_IMPL_ADD (array, i, strstr,
> > +                             (CPU_FEATURE_USABLE (AVX512VL)
> > +                              && CPU_FEATURE_USABLE (AVX512BW)
> > +                              && CPU_FEATURE_USABLE (AVX512DQ)
> > +                              && CPU_FEATURE_USABLE (BMI2)),
> > +                              __strstr_avx512)
> >               IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned)
> >               IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_generic))
> >
> > diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c
> > index 2b83199245..3f86bfa5f2 100644
> > --- a/sysdeps/x86_64/multiarch/strstr.c
> > +++ b/sysdeps/x86_64/multiarch/strstr.c
> > @@ -49,13 +49,13 @@ IFUNC_SELECTOR (void)
> >    const struct cpu_features *cpu_features = __get_cpu_features ();
> >
> >    if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)
> > -      && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> > -      && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
> > -      && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
> > -      && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
> > +      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> > +      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
> > +      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
> > +      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
> >      return __strstr_avx512;
> >
> > -  if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
> > +  if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load, ))
>
> Is Fast_Unaligned_Load set on all processors before? If not, we should
> revert

It's set at ISA level >= 2. AFAICT the reason the bit exists is so that that
CPUs with slow sse42 can fallback on an unaligned sse2 implementation
if it's available as opposed to the generic / often quite expensive aligned
sse2 impl.

Example in strcmp

>
> /* Feature(s) enabled when ISA level >= 2.  */
> #define Fast_Unaligned_Load_X86_ISA_LEVEL 2
>
> >      return __strstr_sse2_unaligned;
> >
> >    return __strstr_generic;
> > --
> > 2.34.1
> >
>
>
> --
> H.J.
  
H.J. Lu June 28, 2022, 6:34 p.m. UTC | #3
On Tue, Jun 28, 2022 at 11:24 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Tue, Jun 28, 2022 at 11:21 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Tue, Jun 28, 2022 at 8:26 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > >
> > > Small changes for this function as the generic implementation remains
> > > the same for all ISA levels.
> > >
> > > Only changes are using the X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P
> > > macros so that some of the checks at least can constant evaluate
> > > and some comments explaining the ISA constraints on the function.
> > > ---
> > >  sysdeps/x86_64/multiarch/ifunc-impl-list.c | 13 +++++++------
> > >  sysdeps/x86_64/multiarch/strstr.c          | 10 +++++-----
> > >  2 files changed, 12 insertions(+), 11 deletions(-)
> > >
> > > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > > index 0d28319905..a1bff560bc 100644
> > > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > > @@ -620,12 +620,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> > >
> > >    /* Support sysdeps/x86_64/multiarch/strstr.c.  */
> > >    IFUNC_IMPL (i, name, strstr,
> > > -              IFUNC_IMPL_ADD (array, i, strstr,
> > > -                              (CPU_FEATURE_USABLE (AVX512VL)
> > > -                               && CPU_FEATURE_USABLE (AVX512BW)
> > > -                               && CPU_FEATURE_USABLE (AVX512DQ)
> > > -                               && CPU_FEATURE_USABLE (BMI2)),
> > > -                              __strstr_avx512)
> > > +             /* All implementations of strstr are built at all ISA levels.  */
> > > +             IFUNC_IMPL_ADD (array, i, strstr,
> > > +                             (CPU_FEATURE_USABLE (AVX512VL)
> > > +                              && CPU_FEATURE_USABLE (AVX512BW)
> > > +                              && CPU_FEATURE_USABLE (AVX512DQ)
> > > +                              && CPU_FEATURE_USABLE (BMI2)),
> > > +                              __strstr_avx512)
> > >               IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned)
> > >               IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_generic))
> > >
> > > diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c
> > > index 2b83199245..3f86bfa5f2 100644
> > > --- a/sysdeps/x86_64/multiarch/strstr.c
> > > +++ b/sysdeps/x86_64/multiarch/strstr.c
> > > @@ -49,13 +49,13 @@ IFUNC_SELECTOR (void)
> > >    const struct cpu_features *cpu_features = __get_cpu_features ();
> > >
> > >    if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)
> > > -      && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> > > -      && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
> > > -      && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
> > > -      && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
> > > +      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> > > +      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
> > > +      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
> > > +      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
> > >      return __strstr_avx512;
> > >
> > > -  if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
> > > +  if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load, ))
> >
> > Is Fast_Unaligned_Load set on all processors before? If not, we should
> > revert
>
> It's set at ISA level >= 2. AFAICT the reason the bit exists is so that that
> CPUs with slow sse42 can fallback on an unaligned sse2 implementation
> if it's available as opposed to the generic / often quite expensive aligned
> sse2 impl.

Is Fast_Unaligned_Load set on Zhaoxin processors?

> Example in strcmp
>
> >
> > /* Feature(s) enabled when ISA level >= 2.  */
> > #define Fast_Unaligned_Load_X86_ISA_LEVEL 2
> >
> > >      return __strstr_sse2_unaligned;
> > >
> > >    return __strstr_generic;
> > > --
> > > 2.34.1
> > >
> >
> >
> > --
> > H.J.
  
Noah Goldstein June 28, 2022, 6:38 p.m. UTC | #4
On Tue, Jun 28, 2022 at 11:35 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Tue, Jun 28, 2022 at 11:24 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > On Tue, Jun 28, 2022 at 11:21 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Tue, Jun 28, 2022 at 8:26 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > > >
> > > > Small changes for this function as the generic implementation remains
> > > > the same for all ISA levels.
> > > >
> > > > Only changes are using the X86_ISA_CPU_FEATURE{S}_{USABLE|ARCH}_P
> > > > macros so that some of the checks at least can constant evaluate
> > > > and some comments explaining the ISA constraints on the function.
> > > > ---
> > > >  sysdeps/x86_64/multiarch/ifunc-impl-list.c | 13 +++++++------
> > > >  sysdeps/x86_64/multiarch/strstr.c          | 10 +++++-----
> > > >  2 files changed, 12 insertions(+), 11 deletions(-)
> > > >
> > > > diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > > > index 0d28319905..a1bff560bc 100644
> > > > --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > > > +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
> > > > @@ -620,12 +620,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> > > >
> > > >    /* Support sysdeps/x86_64/multiarch/strstr.c.  */
> > > >    IFUNC_IMPL (i, name, strstr,
> > > > -              IFUNC_IMPL_ADD (array, i, strstr,
> > > > -                              (CPU_FEATURE_USABLE (AVX512VL)
> > > > -                               && CPU_FEATURE_USABLE (AVX512BW)
> > > > -                               && CPU_FEATURE_USABLE (AVX512DQ)
> > > > -                               && CPU_FEATURE_USABLE (BMI2)),
> > > > -                              __strstr_avx512)
> > > > +             /* All implementations of strstr are built at all ISA levels.  */
> > > > +             IFUNC_IMPL_ADD (array, i, strstr,
> > > > +                             (CPU_FEATURE_USABLE (AVX512VL)
> > > > +                              && CPU_FEATURE_USABLE (AVX512BW)
> > > > +                              && CPU_FEATURE_USABLE (AVX512DQ)
> > > > +                              && CPU_FEATURE_USABLE (BMI2)),
> > > > +                              __strstr_avx512)
> > > >               IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned)
> > > >               IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_generic))
> > > >
> > > > diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c
> > > > index 2b83199245..3f86bfa5f2 100644
> > > > --- a/sysdeps/x86_64/multiarch/strstr.c
> > > > +++ b/sysdeps/x86_64/multiarch/strstr.c
> > > > @@ -49,13 +49,13 @@ IFUNC_SELECTOR (void)
> > > >    const struct cpu_features *cpu_features = __get_cpu_features ();
> > > >
> > > >    if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)
> > > > -      && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> > > > -      && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
> > > > -      && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
> > > > -      && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
> > > > +      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
> > > > +      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
> > > > +      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
> > > > +      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
> > > >      return __strstr_avx512;
> > > >
> > > > -  if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
> > > > +  if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load, ))
> > >
> > > Is Fast_Unaligned_Load set on all processors before? If not, we should
> > > revert
> >
> > It's set at ISA level >= 2. AFAICT the reason the bit exists is so that that
> > CPUs with slow sse42 can fallback on an unaligned sse2 implementation
> > if it's available as opposed to the generic / often quite expensive aligned
> > sse2 impl.
>
> Is Fast_Unaligned_Load set on Zhaoxin processors?

No, but I believe we agreed to treat that specially only for
the functions where it really mattered.

I.e for memcpy where this is a meaningful distinction we won't
use the X86_ISA_CPU_FEATURE... checks, we will force
a runtime check.
>
> > Example in strcmp
> >
> > >
> > > /* Feature(s) enabled when ISA level >= 2.  */
> > > #define Fast_Unaligned_Load_X86_ISA_LEVEL 2
> > >
> > > >      return __strstr_sse2_unaligned;
> > > >
> > > >    return __strstr_generic;
> > > > --
> > > > 2.34.1
> > > >
> > >
> > >
> > > --
> > > H.J.
>
>
>
> --
> H.J.
  

Patch

diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 0d28319905..a1bff560bc 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -620,12 +620,13 @@  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 
   /* Support sysdeps/x86_64/multiarch/strstr.c.  */
   IFUNC_IMPL (i, name, strstr,
-              IFUNC_IMPL_ADD (array, i, strstr,
-                              (CPU_FEATURE_USABLE (AVX512VL)
-                               && CPU_FEATURE_USABLE (AVX512BW)
-                               && CPU_FEATURE_USABLE (AVX512DQ)
-                               && CPU_FEATURE_USABLE (BMI2)),
-                              __strstr_avx512)
+	      /* All implementations of strstr are built at all ISA levels.  */
+	      IFUNC_IMPL_ADD (array, i, strstr,
+			      (CPU_FEATURE_USABLE (AVX512VL)
+			       && CPU_FEATURE_USABLE (AVX512BW)
+			       && CPU_FEATURE_USABLE (AVX512DQ)
+			       && CPU_FEATURE_USABLE (BMI2)),
+			       __strstr_avx512)
 	      IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_generic))
 
diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c
index 2b83199245..3f86bfa5f2 100644
--- a/sysdeps/x86_64/multiarch/strstr.c
+++ b/sysdeps/x86_64/multiarch/strstr.c
@@ -49,13 +49,13 @@  IFUNC_SELECTOR (void)
   const struct cpu_features *cpu_features = __get_cpu_features ();
 
   if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)
-      && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
-      && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
-      && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
-      && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
+      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
+      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
+      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
+      && X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, BMI2))
     return __strstr_avx512;
 
-  if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
+  if (X86_ISA_CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load, ))
     return __strstr_sse2_unaligned;
 
   return __strstr_generic;