x86-64: Require BMI2 for strchr-avx2.S
Commit Message
Since strchr-avx2.S updated by
commit 1f745ecc2109890886b161d4791e1406fdfc29b8
Author: noah <goldstein.w.n@gmail.com>
Date: Wed Feb 3 00:38:59 2021 -0500
x86-64: Refactor and improve performance of strchr-avx2.S
uses sarx:
c4 e2 72 f7 c0 sarx %ecx,%eax,%eax
for strchr-avx2 family functions, require BMI2 in ifunc-impl-list.c and
ifunc-avx2.h.
---
sysdeps/x86_64/multiarch/ifunc-avx2.h | 4 ++--
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 12 +++++++++---
2 files changed, 11 insertions(+), 5 deletions(-)
Comments
* H. J. Lu via Libc-alpha:
> Since strchr-avx2.S updated by
>
> commit 1f745ecc2109890886b161d4791e1406fdfc29b8
> Author: noah <goldstein.w.n@gmail.com>
> Date: Wed Feb 3 00:38:59 2021 -0500
>
> x86-64: Refactor and improve performance of strchr-avx2.S
>
> uses sarx:
>
> c4 e2 72 f7 c0 sarx %ecx,%eax,%eax
>
> for strchr-avx2 family functions, require BMI2 in ifunc-impl-list.c and
> ifunc-avx2.h.
Seems reasonable, thanks.
Florian
On Mon, Apr 19, 2021 at 11:50 AM Florian Weimer via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> * H. J. Lu via Libc-alpha:
>
> > Since strchr-avx2.S updated by
> >
> > commit 1f745ecc2109890886b161d4791e1406fdfc29b8
> > Author: noah <goldstein.w.n@gmail.com>
> > Date: Wed Feb 3 00:38:59 2021 -0500
> >
> > x86-64: Refactor and improve performance of strchr-avx2.S
> >
> > uses sarx:
> >
> > c4 e2 72 f7 c0 sarx %ecx,%eax,%eax
> >
> > for strchr-avx2 family functions, require BMI2 in ifunc-impl-list.c and
> > ifunc-avx2.h.
>
> Seems reasonable, thanks.
>
> Florian
>
I would like to backport this patch to release branches.
Any comments or objections?
--Sunil
* Sunil Pandey:
> On Mon, Apr 19, 2021 at 11:50 AM Florian Weimer via Libc-alpha
> <libc-alpha@sourceware.org> wrote:
>>
>> * H. J. Lu via Libc-alpha:
>>
>> > Since strchr-avx2.S updated by
>> >
>> > commit 1f745ecc2109890886b161d4791e1406fdfc29b8
>> > Author: noah <goldstein.w.n@gmail.com>
>> > Date: Wed Feb 3 00:38:59 2021 -0500
>> >
>> > x86-64: Refactor and improve performance of strchr-avx2.S
>> >
>> > uses sarx:
>> >
>> > c4 e2 72 f7 c0 sarx %ecx,%eax,%eax
>> >
>> > for strchr-avx2 family functions, require BMI2 in ifunc-impl-list.c and
>> > ifunc-avx2.h.
>>
>> Seems reasonable, thanks.
>>
>> Florian
>>
>
> I would like to backport this patch to release branches.
> Any comments or objections?
Please backport.
Thanks,
Florian
@@ -30,11 +30,11 @@ IFUNC_SELECTOR (void)
const struct cpu_features* cpu_features = __get_cpu_features ();
if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
&& CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
{
if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
- && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
- && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
+ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
return OPTIMIZE (evex);
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
@@ -396,10 +396,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/strchr.c. */
IFUNC_IMPL (i, name, strchr,
IFUNC_IMPL_ADD (array, i, strchr,
- CPU_FEATURE_USABLE (AVX2),
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)),
__strchr_avx2)
IFUNC_IMPL_ADD (array, i, strchr,
(CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__strchr_avx2_rtm)
IFUNC_IMPL_ADD (array, i, strchr,
@@ -413,10 +415,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/strchrnul.c. */
IFUNC_IMPL (i, name, strchrnul,
IFUNC_IMPL_ADD (array, i, strchrnul,
- CPU_FEATURE_USABLE (AVX2),
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)),
__strchrnul_avx2)
IFUNC_IMPL_ADD (array, i, strchrnul,
(CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__strchrnul_avx2_rtm)
IFUNC_IMPL_ADD (array, i, strchrnul,
@@ -570,10 +574,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/x86_64/multiarch/wcschr.c. */
IFUNC_IMPL (i, name, wcschr,
IFUNC_IMPL_ADD (array, i, wcschr,
- CPU_FEATURE_USABLE (AVX2),
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)),
__wcschr_avx2)
IFUNC_IMPL_ADD (array, i, wcschr,
(CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (RTM)),
__wcschr_avx2_rtm)
IFUNC_IMPL_ADD (array, i, wcschr,