[10/11] hppa: Add haszero.h and whichzero.h
Commit Message
Use UXOR,SBZ to test for a zero byte within a word. While we can
get semi-decent code out of asm-goto, we would do slightly better
with a compiler builtin.
For whichzero, sequential testing of bytes is less expensive than any
tricks that involve a count-leading-zeros insn that we don't have.
* sysdeps/hppa/haszero.h: New file.
* sysdeps/hppa/whichzero.h: New file.
---
sysdeps/hppa/haszero.h | 82 ++++++++++++++++++++++++++++++++++++++++++++++++
sysdeps/hppa/whichzero.h | 70 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 152 insertions(+)
create mode 100644 sysdeps/hppa/haszero.h
create mode 100644 sysdeps/hppa/whichzero.h
Comments
On 17/12/2016 04:57, Richard Henderson wrote:
> +static inline unsigned long int
> +haszero(unsigned long int x)
> +{
> +#if __GNUC_PREREQ(4, 5)
> + /* It's more useful to expose a control transfer to the compiler
> + than to expose a proper boolean result. */
> + if (sizeof(x) == 8)
> + asm goto ("uxor,*sbz %%r0,%0,%%r0\n\tb,n %l1" : : "r"(x) : : nbz);
> + else
> + asm goto ("uxor,sbz %%r0,%0,%%r0\n\tb,n %l1" : : "r"(x) : : nbz);
> + return 1;
> + nbz:
> + return 0;
> +#else
Since current GLIBC requires GCC 4.7 as minimum compiler I think we
can get rid of snippets for old compilers. Same for the other
override functios.
> + unsigned long int ret;
> + if (sizeof(x) == 8)
> + asm ("uxor,*sbz %%r0,%1,%%r0\n\tcopy %%r0,%0"
> + : "=r"(ret) : "r"(x), "0"(1));
> + else
> + asm ("uxor,sbz %%r0,%1,%%r0\n\tcopy %%r0,%0"
> + : "=r"(ret) : "r"(x), "0"(1));
> + return ret;
> +#endif
> +}
> +
> +/* Likewise, but for two words simultaneously. */
> +
> +static inline unsigned long int
> +haszero2(unsigned long int x1, unsigned long int x2)
> +{
> +#if __GNUC_PREREQ(4, 5)
> + /* It's more useful to expose a control transfer to the compiler
> + than to expose a proper boolean result. */
> + if (sizeof(x1) == 8)
> + asm goto ("uxor,*sbz %%r0,%0,%%r0\n\t"
> + "uxor,*nbz %%r0,%1,%%r0\n\t"
> + "b,n %l2" : : "r"(x1), "r"(x2) : : sbz);
> + else
> + asm goto ("uxor,sbz %%r0,%0,%%r0\n\t"
> + "uxor,nbz %%r0,%1,%%r0\n\t"
> + "b,n %l2" : : "r"(x1), "r"(x2) : : sbz);
> + return 0;
> + sbz:
> + return 1;
> +#else
> + unsigned long int ret;
> + if (sizeof(x1) == 8)
> + asm ("uxor,*sbz %%r0,%1,%%r0\n\t"
> + "uxor,*nbz %%r0,%2,%%r0\n\t"
> + "ldi 1,%0"
> + : "=r"(ret) : "r"(x1), "r"(x2), "0"(0));
> + else
> + asm ("uxor,sbz %%r0,%1,%%r0\n\t"
> + "uxor,nbz %%r0,%2,%%r0\n\t"
> + "ldi 1,%0"
> + : "=r"(ret) : "r"(x1), "r"(x2), "0"(0));
> + return ret;
> +#endif
> +}
> +
> +#endif /* haszero.h */
> diff --git a/sysdeps/hppa/whichzero.h b/sysdeps/hppa/whichzero.h
> new file mode 100644
> index 0000000..ef18cc7
> --- /dev/null
> +++ b/sysdeps/hppa/whichzero.h
> @@ -0,0 +1,70 @@
> +/* whichzero.h -- functions for zero byte searching. HPPA version.
> + Copyright (C) 2016 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +#ifndef HPPA_WHICHZERO_H
> +#define HPPA_WHICHZERO_H 1
> +
> +/* Given a long that is known to contain a zero byte, return the
> + index of the first such within the long in host memory order. */
> +
> +static inline unsigned int
> +whichzero(unsigned long int x)
> +{
> + unsigned int ret;
> +
> + _Static_assert (sizeof(x) == 4, "64-bit not supported");
> +
> + /* Since we have no clz insn, direct tests of the bytes is faster
> + than loading up the constants to do the masking. */
> + asm ("extrw,u,<> %1,23,8,%%r0\n\t"
> + "ldi 2,%0\n\t"
> + "extrw,u,<> %1,15,8,%%r0\n\t"
> + "ldi 1,%0\n\t"
> + "extrw,u,<> %1,7,8,%%r0\n\t"
> + "ldi 0,%0"
> + : "=r"(ret) : "r"(x), "0"(3));
> +
> + return ret;
> +}
> +
> +/* Similarly, but perform the test for two longs simultaneously. */
> +
> +static inline unsigned int
> +whichzero2(unsigned long int x1, unsigned long int x2)
> +{
> + unsigned int ret;
> +
> + _Static_assert (sizeof(x1) == 4, "64-bit not supported");
> +
> + /* Since we have no clz insn, direct tests of the bytes is faster
> + than loading up the constants to do the masking. */
> + asm ("extrw,u,= %1,23,8,%%r0\n\t"
> + "extrw,u,<> %2,23,8,%%r0\n\t"
> + "ldi 2,%0\n\t"
> + "extrw,u,= %1,15,8,%%r0\n\t"
> + "extrw,u,<> %2,15,8,%%r0\n\t"
> + "ldi 1,%0\n\t"
> + "extrw,u,= %1,7,8,%%r0\n\t"
> + "extrw,u,<> %2,7,8,%%r0\n\t"
> + "ldi 0,%0"
> + : "=r"(ret) : "r"(x1), "r"(x2), "0"(3));
> +
> + return ret;
> +}
> +
> +#endif /* whichzero.h */
I am far from a hppa expert, but can't we code the same snippet in C? How
bad would it be compared to this optimized asm?
On 12/19/2016 06:54 AM, Adhemerval Zanella wrote:
>> +static inline unsigned long int
>> +haszero(unsigned long int x)
>> +{
>> +#if __GNUC_PREREQ(4, 5)
>> + /* It's more useful to expose a control transfer to the compiler
>> + than to expose a proper boolean result. */
>> + if (sizeof(x) == 8)
>> + asm goto ("uxor,*sbz %%r0,%0,%%r0\n\tb,n %l1" : : "r"(x) : : nbz);
>> + else
>> + asm goto ("uxor,sbz %%r0,%0,%%r0\n\tb,n %l1" : : "r"(x) : : nbz);
>> + return 1;
>> + nbz:
>> + return 0;
>> +#else
>
> Since current GLIBC requires GCC 4.7 as minimum compiler I think we
> can get rid of snippets for old compilers. Same for the other
> override functios.
Ah good. I'd meant to go back and look for the minimum required gcc.
>> + /* Since we have no clz insn, direct tests of the bytes is faster
>> + than loading up the constants to do the masking. */
>> + asm ("extrw,u,= %1,23,8,%%r0\n\t"
>> + "extrw,u,<> %2,23,8,%%r0\n\t"
>> + "ldi 2,%0\n\t"
>> + "extrw,u,= %1,15,8,%%r0\n\t"
>> + "extrw,u,<> %2,15,8,%%r0\n\t"
>> + "ldi 1,%0\n\t"
>> + "extrw,u,= %1,7,8,%%r0\n\t"
>> + "extrw,u,<> %2,7,8,%%r0\n\t"
>> + "ldi 0,%0"
>> + : "=r"(ret) : "r"(x1), "r"(x2), "0"(3));
>> +
>> + return ret;
>> +}
>> +
>> +#endif /* whichzero.h */
>
> I am far from a hppa expert, but can't we code the same snippet in C? How
> bad would it be compared to this optimized asm?
The compiler is not great at this. It only attempts nullification on
comparisons (not directly as a result of an operation like extract), and it
never attempts double nullification as above.
So for whichzero gcc will use 10 insns instead of my 7; for whichzero2 gcc will
use 19 insns instead of my 10.
r~
new file mode 100644
@@ -0,0 +1,82 @@
+/* haszero.h -- function for zero byte detection. HPPA version.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef HASZERO_H
+#define HASZERO_H 1
+
+static inline unsigned long int
+haszero(unsigned long int x)
+{
+#if __GNUC_PREREQ(4, 5)
+ /* It's more useful to expose a control transfer to the compiler
+ than to expose a proper boolean result. */
+ if (sizeof(x) == 8)
+ asm goto ("uxor,*sbz %%r0,%0,%%r0\n\tb,n %l1" : : "r"(x) : : nbz);
+ else
+ asm goto ("uxor,sbz %%r0,%0,%%r0\n\tb,n %l1" : : "r"(x) : : nbz);
+ return 1;
+ nbz:
+ return 0;
+#else
+ unsigned long int ret;
+ if (sizeof(x) == 8)
+ asm ("uxor,*sbz %%r0,%1,%%r0\n\tcopy %%r0,%0"
+ : "=r"(ret) : "r"(x), "0"(1));
+ else
+ asm ("uxor,sbz %%r0,%1,%%r0\n\tcopy %%r0,%0"
+ : "=r"(ret) : "r"(x), "0"(1));
+ return ret;
+#endif
+}
+
+/* Likewise, but for two words simultaneously. */
+
+static inline unsigned long int
+haszero2(unsigned long int x1, unsigned long int x2)
+{
+#if __GNUC_PREREQ(4, 5)
+ /* It's more useful to expose a control transfer to the compiler
+ than to expose a proper boolean result. */
+ if (sizeof(x1) == 8)
+ asm goto ("uxor,*sbz %%r0,%0,%%r0\n\t"
+ "uxor,*nbz %%r0,%1,%%r0\n\t"
+ "b,n %l2" : : "r"(x1), "r"(x2) : : sbz);
+ else
+ asm goto ("uxor,sbz %%r0,%0,%%r0\n\t"
+ "uxor,nbz %%r0,%1,%%r0\n\t"
+ "b,n %l2" : : "r"(x1), "r"(x2) : : sbz);
+ return 0;
+ sbz:
+ return 1;
+#else
+ unsigned long int ret;
+ if (sizeof(x1) == 8)
+ asm ("uxor,*sbz %%r0,%1,%%r0\n\t"
+ "uxor,*nbz %%r0,%2,%%r0\n\t"
+ "ldi 1,%0"
+ : "=r"(ret) : "r"(x1), "r"(x2), "0"(0));
+ else
+ asm ("uxor,sbz %%r0,%1,%%r0\n\t"
+ "uxor,nbz %%r0,%2,%%r0\n\t"
+ "ldi 1,%0"
+ : "=r"(ret) : "r"(x1), "r"(x2), "0"(0));
+ return ret;
+#endif
+}
+
+#endif /* haszero.h */
new file mode 100644
@@ -0,0 +1,70 @@
+/* whichzero.h -- functions for zero byte searching. HPPA version.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef HPPA_WHICHZERO_H
+#define HPPA_WHICHZERO_H 1
+
+/* Given a long that is known to contain a zero byte, return the
+ index of the first such within the long in host memory order. */
+
+static inline unsigned int
+whichzero(unsigned long int x)
+{
+ unsigned int ret;
+
+ _Static_assert (sizeof(x) == 4, "64-bit not supported");
+
+ /* Since we have no clz insn, direct tests of the bytes is faster
+ than loading up the constants to do the masking. */
+ asm ("extrw,u,<> %1,23,8,%%r0\n\t"
+ "ldi 2,%0\n\t"
+ "extrw,u,<> %1,15,8,%%r0\n\t"
+ "ldi 1,%0\n\t"
+ "extrw,u,<> %1,7,8,%%r0\n\t"
+ "ldi 0,%0"
+ : "=r"(ret) : "r"(x), "0"(3));
+
+ return ret;
+}
+
+/* Similarly, but perform the test for two longs simultaneously. */
+
+static inline unsigned int
+whichzero2(unsigned long int x1, unsigned long int x2)
+{
+ unsigned int ret;
+
+ _Static_assert (sizeof(x1) == 4, "64-bit not supported");
+
+ /* Since we have no clz insn, direct tests of the bytes is faster
+ than loading up the constants to do the masking. */
+ asm ("extrw,u,= %1,23,8,%%r0\n\t"
+ "extrw,u,<> %2,23,8,%%r0\n\t"
+ "ldi 2,%0\n\t"
+ "extrw,u,= %1,15,8,%%r0\n\t"
+ "extrw,u,<> %2,15,8,%%r0\n\t"
+ "ldi 1,%0\n\t"
+ "extrw,u,= %1,7,8,%%r0\n\t"
+ "extrw,u,<> %2,7,8,%%r0\n\t"
+ "ldi 0,%0"
+ : "=r"(ret) : "r"(x1), "r"(x2), "0"(3));
+
+ return ret;
+}
+
+#endif /* whichzero.h */