[v12,18/31] arm: Add string-fza.h
Checks
Context |
Check |
Description |
dj/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
Commit Message
From: Richard Henderson <richard.henderson@linaro.org>
While arm has the more important string functions in assembly,
there are still a few generic routines used.
Use the UQSUB8 insn for testing of zeros.
Checked on armv7-linux-gnueabihf
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
---
sysdeps/arm/armv6t2/string-fza.h | 68 ++++++++++++++++++++++++++++++++
1 file changed, 68 insertions(+)
create mode 100644 sysdeps/arm/armv6t2/string-fza.h
Comments
The 02/02/2023 15:11, Adhemerval Zanella via Libc-alpha wrote:
> From: Richard Henderson <richard.henderson@linaro.org>
>
> While arm has the more important string functions in assembly,
> there are still a few generic routines used.
>
> Use the UQSUB8 insn for testing of zeros.
>
> Checked on armv7-linux-gnueabihf
> Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
> ---
> sysdeps/arm/armv6t2/string-fza.h | 68 ++++++++++++++++++++++++++++++++
...
> +static __always_inline find_t
> +find_zero_all (op_t x)
> +{
> + /* Use unsigned saturated subtraction from 1 in each byte.
> + That leaves 1 for every byte that was zero. */
> + op_t ones = repeat_bytes (0x01);
> + return __builtin_arm_uqsub8 (ones, x);
> +}
__builtin_arm_uqsub8 is only available >=gcc-10
so now the build fails with gcc-9
../sysdeps/arm/armv6t2/string-fza.h:36:10: error: implicit declaration of function ‘__builtin_arm_uqsub8’; did you mean ‘__builtin_arm_stc’? [-Werror=implicit-function-declaration]
36 | return __builtin_arm_uqsub8 (ones, x);
so this code should be conditional on gcc version.
The 02/20/2023 13:24, Szabolcs Nagy via Libc-alpha wrote:
> The 02/02/2023 15:11, Adhemerval Zanella via Libc-alpha wrote:
> > +static __always_inline find_t
> > +find_zero_all (op_t x)
> > +{
> > + /* Use unsigned saturated subtraction from 1 in each byte.
> > + That leaves 1 for every byte that was zero. */
> > + op_t ones = repeat_bytes (0x01);
> > + return __builtin_arm_uqsub8 (ones, x);
> > +}
>
> __builtin_arm_uqsub8 is only available >=gcc-10
>
> so now the build fails with gcc-9
>
> ../sysdeps/arm/armv6t2/string-fza.h:36:10: error: implicit declaration of function ‘__builtin_arm_uqsub8’; did you mean ‘__builtin_arm_stc’? [-Werror=implicit-function-declaration]
> 36 | return __builtin_arm_uqsub8 (ones, x);
>
> so this code should be conditional on gcc version.
>
i think
asm ("uqsub8 %0, %0, %1" : "+r" (ones) : "r" (x));
should be a good fallback (untested).
On 20/02/23 10:45, Szabolcs Nagy wrote:
> The 02/20/2023 13:24, Szabolcs Nagy via Libc-alpha wrote:
>> The 02/02/2023 15:11, Adhemerval Zanella via Libc-alpha wrote:
>>> +static __always_inline find_t
>>> +find_zero_all (op_t x)
>>> +{
>>> + /* Use unsigned saturated subtraction from 1 in each byte.
>>> + That leaves 1 for every byte that was zero. */
>>> + op_t ones = repeat_bytes (0x01);
>>> + return __builtin_arm_uqsub8 (ones, x);
>>> +}
>>
>> __builtin_arm_uqsub8 is only available >=gcc-10
>>
>> so now the build fails with gcc-9
>>
>> ../sysdeps/arm/armv6t2/string-fza.h:36:10: error: implicit declaration of function ‘__builtin_arm_uqsub8’; did you mean ‘__builtin_arm_stc’? [-Werror=implicit-function-declaration]
>> 36 | return __builtin_arm_uqsub8 (ones, x);
>>
>> so this code should be conditional on gcc version.
>>
>
> i think
>
> asm ("uqsub8 %0, %0, %1" : "+r" (ones) : "r" (x));
>
> should be a good fallback (untested).
This is what we have on v7:
static __always_inline op_t
find_zero_all (op_t x)
{
/* Use unsigned saturated subtraction from 1 in each byte.
That leaves 1 for every byte that was zero. */
op_t ret, ones = repeat_bytes (0x01);
asm ("uqsub8 %0,%1,%2" : "=r"(ret) : "r"(ones), "r"(x));
return ret;
}
Maybe extend with:
static __always_inline op_t
find_zero_all (op_t x)
{
op_t ones = repeat_bytes (0x01);
#if __GNUC_PREREQ (10, 0)
return __builtin_arm_uqsub8 (ones, x);
#else
op_t ret;
asm ("uqsub8 %0,%1,%2" : "=r"(ret) : "r"(ones), "r"(x));
return ret;
#endif
}
The 02/20/2023 11:01, Adhemerval Zanella Netto wrote:
>
>
> On 20/02/23 10:45, Szabolcs Nagy wrote:
> > The 02/20/2023 13:24, Szabolcs Nagy via Libc-alpha wrote:
> >> The 02/02/2023 15:11, Adhemerval Zanella via Libc-alpha wrote:
> >>> +static __always_inline find_t
> >>> +find_zero_all (op_t x)
> >>> +{
> >>> + /* Use unsigned saturated subtraction from 1 in each byte.
> >>> + That leaves 1 for every byte that was zero. */
> >>> + op_t ones = repeat_bytes (0x01);
> >>> + return __builtin_arm_uqsub8 (ones, x);
> >>> +}
> >>
> >> __builtin_arm_uqsub8 is only available >=gcc-10
> >>
> >> so now the build fails with gcc-9
> >>
> >> ../sysdeps/arm/armv6t2/string-fza.h:36:10: error: implicit declaration of function ‘__builtin_arm_uqsub8’; did you mean ‘__builtin_arm_stc’? [-Werror=implicit-function-declaration]
> >> 36 | return __builtin_arm_uqsub8 (ones, x);
> >>
> >> so this code should be conditional on gcc version.
> >>
> >
> > i think
> >
> > asm ("uqsub8 %0, %0, %1" : "+r" (ones) : "r" (x));
> >
> > should be a good fallback (untested).
>
> This is what we have on v7:
>
> static __always_inline op_t
> find_zero_all (op_t x)
> {
> /* Use unsigned saturated subtraction from 1 in each byte.
> That leaves 1 for every byte that was zero. */
> op_t ret, ones = repeat_bytes (0x01);
> asm ("uqsub8 %0,%1,%2" : "=r"(ret) : "r"(ones), "r"(x));
> return ret;
> }
>
> Maybe extend with:
>
> static __always_inline op_t
> find_zero_all (op_t x)
> {
> op_t ones = repeat_bytes (0x01);
> #if __GNUC_PREREQ (10, 0)
> return __builtin_arm_uqsub8 (ones, x);
> #else
> op_t ret;
> asm ("uqsub8 %0,%1,%2" : "=r"(ret) : "r"(ones), "r"(x));
> return ret;
> #endif
> }
yes this looks good to me.
new file mode 100644
@@ -0,0 +1,68 @@
+/* Zero byte detection; basics. ARM version.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _STRING_FZA_H
+#define _STRING_FZA_H 1
+
+#include <string-misc.h>
+#include <string-optype.h>
+
+/* The functions return a byte mask. */
+typedef op_t find_t;
+
+/* This function returns at least one bit set within every byte
+ of X that is zero. */
+static __always_inline find_t
+find_zero_all (op_t x)
+{
+ /* Use unsigned saturated subtraction from 1 in each byte.
+ That leaves 1 for every byte that was zero. */
+ op_t ones = repeat_bytes (0x01);
+ return __builtin_arm_uqsub8 (ones, x);
+}
+
+/* Identify bytes that are equal between X1 and X2. */
+static __always_inline find_t
+find_eq_all (op_t x1, op_t x2)
+{
+ return find_zero_all (x1 ^ x2);
+}
+
+/* Identify zero bytes in X1 or equality between X1 and X2. */
+static __always_inline find_t
+find_zero_eq_all (op_t x1, op_t x2)
+{
+ return find_zero_all (x1) | find_zero_all (x1 ^ x2);
+}
+
+/* Identify zero bytes in X1 or inequality between X1 and X2. */
+static __always_inline find_t
+find_zero_ne_all (op_t x1, op_t x2)
+{
+ /* Make use of the fact that we'll already have ONES in a register. */
+ op_t ones = repeat_bytes (0x01);
+ return find_zero_all (x1) | (find_zero_all (x1 ^ x2) ^ ones);
+}
+
+/* Define the "inexact" versions in terms of the exact versions. */
+#define find_zero_low find_zero_all
+#define find_eq_low find_eq_all
+#define find_zero_eq_low find_zero_eq_all
+#define find_zero_ne_low find_zero_ne_all
+
+#endif /* _STRING_FZA_H */