========================================================================================================================
length=16, char=65: 9.85 (-146.38%) 4.00 ( 0.06%) 4.01 ( -0.24%) 3.61 ( 9.71%) 4.00
length=17, char=65: 10.24 (-154.44%) 3.99 ( 0.79%) 3.99 ( 0.85%) 3.61 ( 10.40%) 4.02
length=17, char=65: 10.24 (-156.51%) 4.00 ( -0.09%) 3.99 ( 0.06%) 3.61 ( 9.57%) 3.99
length=18, char=65: 10.63 (-166.37%) 3.99 ( 0.00%) 3.99 ( 0.03%) 3.61 ( 9.55%) 3.99
length=18, char=65: 10.63 (-166.43%) 4.00 ( -0.15%) 3.99 ( -0.06%) 3.61 ( 9.55%) 3.99
length=19, char=65: 11.01 (-176.02%) 4.00 ( -0.18%) 3.99 ( 0.00%) 3.61 ( 9.61%) 3.99
length=19, char=65: 11.02 (-176.50%) 3.99 ( -0.24%) 3.99 ( -0.15%) 3.60 ( 9.59%) 3.98
length=20, char=65: 11.40 (-185.69%) 3.99 ( 0.00%) 3.99 ( 0.09%) 3.61 ( 9.51%) 3.99
length=20, char=65: 11.41 (-185.78%) 4.02 ( -0.73%) 3.99 ( 0.06%) 3.60 ( 9.79%) 3.99
length=21, char=65: 11.82 (-196.30%) 3.99 ( -0.12%) 3.99 ( -0.03%) 3.61 ( 9.58%) 3.99
length=21, char=65: 11.81 (-196.08%) 4.00 ( -0.24%) 3.99 ( -0.12%) 3.61 ( 9.52%) 3.99
length=22, char=65: 12.19 (-204.73%) 3.99 ( 0.12%) 3.99 ( 0.15%) 3.61 ( 9.80%) 4.00
length=22, char=65: 12.19 (-205.45%) 3.99 ( -0.06%) 3.99 ( 0.00%) 3.61 ( 9.52%) 3.99
length=23, char=65: 12.58 (-215.43%) 3.99 ( -0.12%) 3.99 ( -0.03%) 3.61 ( 9.40%) 3.99
length=23, char=65: 12.57 (-215.18%) 3.99 ( -0.03%) 3.99 ( -0.03%) 3.64 ( 8.84%) 3.99
length=24, char=65: 12.96 (-224.85%) 3.99 ( -0.12%) 3.99 ( 0.00%) 3.61 ( 9.49%) 3.99
length=24, char=65: 12.96 (-223.23%) 4.00 ( 0.24%) 4.00 ( 0.37%) 3.62 ( 9.77%) 4.01
length=25, char=65: 13.36 (-234.64%) 4.00 ( -0.15%) 3.99 ( 0.03%) 3.61 ( 9.60%) 3.99
length=25, char=65: 13.35 (-234.37%) 3.99 ( -0.03%) 3.99 ( 0.00%) 3.60 ( 9.72%) 3.99
length=26, char=65: 13.74 (-243.99%) 4.00 ( -0.12%) 3.99 ( 0.03%) 3.61 ( 9.60%) 3.99
length=26, char=65: 13.74 (-241.73%) 4.00 ( 0.49%) 3.99 ( 0.79%) 3.61 ( 10.26%) 4.02
length=27, char=65: 14.12 (-254.01%) 3.99 ( -0.12%) 3.99 ( -0.03%) 3.61 ( 9.61%) 3.99
length=27, char=65: 14.12 (-251.99%) 3.99 ( 0.52%) 3.99 ( 0.64%) 4.13 ( -2.83%) 4.01
length=28, char=65: 14.52 (-263.56%) 4.00 ( -0.06%) 4.00 ( -0.12%) 3.61 ( 9.69%) 3.99
length=28, char=65: 14.52 (-263.84%) 4.00 ( -0.12%) 3.99 ( -0.09%) 3.61 ( 9.57%) 3.99
length=29, char=65: 14.90 (-273.05%) 4.00 ( -0.03%) 3.99 ( 0.06%) 3.61 ( 9.62%) 4.00
length=29, char=65: 14.90 (-273.18%) 3.99 ( -0.03%) 3.99 ( 0.09%) 3.61 ( 9.66%) 3.99
length=30, char=65: 15.29 (-283.12%) 4.02 ( -0.80%) 3.99 ( 0.03%) 3.60 ( 9.76%) 3.99
length=30, char=65: 15.29 (-282.94%) 3.99 ( 0.09%) 3.99 ( 0.03%) 3.61 ( 9.57%) 3.99
length=31, char=65: 15.68 (-293.08%) 4.00 ( -0.21%) 3.99 ( -0.12%) 3.61 ( 9.55%) 3.99
length=31, char=65: 15.68 (-292.93%) 4.00 ( -0.15%) 4.00 ( -0.15%) 3.61 ( 9.55%) 3.99
length=32, char=65: 16.07 (-345.68%) 3.62 ( -0.34%) 3.63 ( -0.74%) 3.23 ( 10.29%) 3.60
length=32, char=65: 16.07 (-345.73%) 3.61 ( -0.07%) 3.61 ( 0.00%) 3.22 ( 10.66%) 3.61
length=64, char=65: 28.49 (-689.25%) 3.60 ( 0.17%) 3.61 ( 0.03%) 3.64 ( -0.74%) 3.61
length=64, char=65: 29.11 (-706.87%) 3.62 ( -0.34%) 3.61 ( -0.14%) 3.62 ( -0.27%) 3.61
length=96, char=65: 40.88 (-1032.78%) 3.61 ( -0.07%) 3.61 ( -0.10%) 3.23 ( 10.49%) 3.61
length=96, char=65: 40.87 (-1034.21%) 3.61 ( -0.07%) 3.61 ( -0.24%) 3.23 ( 10.33%) 3.60
length=128, char=65: 53.31 (-1234.78%) 4.00 ( -0.12%) 3.63 ( 9.23%) 4.00 ( -0.15%) 3.99
length=128, char=65: 53.32 (-1234.47%) 4.01 ( -0.40%) 3.61 ( 9.59%) 4.00 ( 0.00%) 4.00
length=160, char=65: 70.67 (-1253.63%) 5.22 ( 0.00%) 5.54 ( -6.08%) 4.39 ( 15.95%) 5.22
length=160, char=65: 71.30 (-1266.84%) 5.24 ( -0.40%) 5.64 ( -8.05%) 4.39 ( 15.91%) 5.22
length=192, char=65: 82.85 (-1487.67%) 5.21 ( 0.07%) 5.18 ( 0.82%) 5.34 ( -2.39%) 5.22
length=192, char=65: 82.85 (-1486.81%) 5.21 ( 0.16%) 5.16 ( 1.08%) 5.35 ( -2.43%) 5.22
length=224, char=65: 95.35 (-471.44%) 16.68 ( 0.01%) 6.45 ( 61.36%) 5.99 ( 64.11%) 16.69
length=224, char=65: 96.00 (-475.09%) 16.70 ( -0.04%) 6.44 ( 61.41%) 5.99 ( 64.14%) 16.69
length=256, char=65: 107.47 (-544.11%) 16.69 ( -0.01%) 6.83 ( 59.04%) 6.89 ( 58.70%) 16.68
length=256, char=65: 107.47 (-544.19%) 16.68 ( -0.01%) 6.85 ( 58.94%) 6.88 ( 58.76%) 16.68
length=288, char=65: 120.57 (-1349.57%) 8.33 ( -0.09%) 7.58 ( 8.85%) 7.54 ( 9.35%) 8.32
length=288, char=65: 120.03 (-1347.51%) 8.29 ( 0.01%) 7.56 ( 8.88%) 7.51 ( 9.39%) 8.29
length=320, char=65: 132.08 (-1492.79%) 8.29 ( -0.01%) 8.24 ( 0.62%) 8.42 ( -1.52%) 8.29
length=320, char=65: 132.53 (-1497.78%) 8.31 ( -0.19%) 8.22 ( 0.88%) 8.42 ( -1.49%) 8.29
length=352, char=65: 144.78 (-1372.65%) 9.84 ( -0.06%) 9.10 ( 7.44%) 9.23 ( 6.15%) 9.83
length=352, char=65: 144.77 (-1372.49%) 9.83 ( 0.00%) 9.09 ( 7.55%) 9.10 ( 7.49%) 9.83
length=384, char=65: 157.91 (-1505.36%) 9.85 ( -0.15%) 9.78 ( 0.58%) 9.95 ( -1.18%) 9.84
length=384, char=65: 156.82 (-1494.91%) 9.83 ( 0.01%) 9.76 ( 0.71%) 9.95 ( -1.19%) 9.83
length=416, char=65: 169.83 (-1392.95%) 11.38 ( -0.08%) 10.65 ( 6.35%) 10.81 ( 4.98%) 11.38
length=416, char=65: 169.00 (-1386.09%) 11.37 ( 0.02%) 10.64 ( 6.46%) 10.62 ( 6.61%) 11.37
length=448, char=65: 181.98 (-1500.22%) 11.38 ( -0.06%) 11.32 ( 0.42%) 11.51 ( -1.25%) 11.37
length=448, char=65: 181.51 (-1496.96%) 11.37 ( -0.06%) 11.30 ( 0.56%) 11.50 ( -1.17%) 11.37
length=480, char=65: 194.46 (-1394.52%) 13.01 ( -0.01%) 12.19 ( 6.34%) 12.24 ( 5.93%) 13.01
length=480, char=65: 194.00 (-1377.70%) 13.13 ( 0.00%) 12.18 ( 7.22%) 12.13 ( 7.57%) 13.13
length=16, char=0: 9.85 (-146.59%) 4.03 ( -0.95%) 4.00 ( -0.12%) 3.61 ( 9.72%) 4.00
length=17, char=0: 10.24 (-156.42%) 3.99 ( 0.12%) 4.00 ( -0.06%) 3.61 ( 9.69%) 3.99
length=17, char=0: 10.24 (-156.78%) 3.99 ( -0.15%) 3.99 ( -0.15%) 3.60 ( 9.61%) 3.99
length=18, char=0: 10.62 (-166.15%) 3.99 ( -0.03%) 3.99 ( 0.00%) 3.61 ( 9.60%) 3.99
length=18, char=0: 10.63 (-166.29%) 4.62 (-15.78%) 4.00 ( -0.31%) 3.61 ( 9.57%) 3.99
length=19, char=0: 11.02 (-176.16%) 3.99 ( -0.12%) 3.99 ( -0.06%) 3.61 ( 9.52%) 3.99
length=19, char=0: 11.01 (-175.51%) 3.99 ( 0.09%) 3.99 ( 0.21%) 3.64 ( 9.01%) 4.00
length=20, char=0: 11.40 (-185.45%) 3.99 ( 0.06%) 3.99 ( 0.06%) 3.62 ( 9.26%) 3.99
length=20, char=0: 11.40 (-185.72%) 3.99 ( 0.09%) 3.99 ( -0.03%) 3.60 ( 9.72%) 3.99
length=21, char=0: 11.79 (-195.41%) 3.99 ( -0.03%) 3.99 ( 0.03%) 3.61 ( 9.64%) 3.99
length=21, char=0: 11.79 (-195.23%) 3.99 ( 0.03%) 3.99 ( 0.12%) 3.60 ( 9.75%) 3.99
length=22, char=0: 12.18 (-205.05%) 4.00 ( -0.09%) 3.99 ( 0.03%) 3.61 ( 9.63%) 3.99
length=22, char=0: 12.18 (-205.14%) 4.00 ( -0.15%) 3.99 ( -0.03%) 3.61 ( 9.61%) 3.99
length=23, char=0: 12.60 (-215.60%) 4.00 ( -0.15%) 3.99 ( 0.12%) 3.60 ( 9.72%) 3.99
length=23, char=0: 12.56 (-214.71%) 3.99 ( -0.03%) 3.99 ( 0.03%) 3.61 ( 9.60%) 3.99
length=24, char=0: 12.95 (-224.30%) 3.99 ( 0.03%) 3.99 ( 0.06%) 3.61 ( 9.60%) 3.99
length=24, char=0: 12.95 (-224.27%) 3.99 ( 0.00%) 3.99 ( 0.18%) 3.61 ( 9.69%) 3.99
length=25, char=0: 13.34 (-233.74%) 3.99 ( 0.09%) 4.51 (-12.82%) 3.61 ( 9.68%) 4.00
length=25, char=0: 13.34 (-234.52%) 3.99 ( -0.12%) 3.99 ( -0.12%) 3.61 ( 9.58%) 3.99
length=26, char=0: 13.74 (-244.40%) 4.00 ( -0.18%) 4.02 ( -0.83%) 3.61 ( 9.49%) 3.99
length=26, char=0: 13.73 (-244.28%) 3.99 ( -0.03%) 3.99 ( -0.09%) 3.60 ( 9.64%) 3.99
length=27, char=0: 14.12 (-253.92%) 4.00 ( -0.18%) 3.99 ( 0.03%) 3.60 ( 9.67%) 3.99
length=27, char=0: 14.12 (-254.03%) 3.99 ( -0.03%) 3.99 ( 0.00%) 3.60 ( 9.64%) 3.99
length=28, char=0: 14.51 (-263.04%) 3.99 ( 0.15%) 3.99 ( 0.12%) 3.61 ( 9.77%) 4.00
length=28, char=0: 14.51 (-263.50%) 3.99 ( 0.03%) 3.99 ( 0.03%) 3.61 ( 9.69%) 3.99
length=29, char=0: 14.90 (-270.95%) 3.99 ( 0.58%) 3.99 ( 0.70%) 3.60 ( 10.30%) 4.02
length=29, char=0: 14.90 (-273.29%) 3.99 ( 0.00%) 3.99 ( -0.06%) 3.61 ( 9.57%) 3.99
length=30, char=0: 15.29 (-282.76%) 3.99 ( 0.09%) 4.00 ( -0.06%) 3.61 ( 9.69%) 3.99
length=30, char=0: 15.29 (-283.14%) 4.00 ( -0.15%) 3.99 ( 0.00%) 3.61 ( 9.48%) 3.99
length=31, char=0: 15.68 (-293.14%) 3.99 ( -0.12%) 3.99 ( -0.12%) 3.61 ( 9.58%) 3.99
length=31, char=0: 15.69 (-293.36%) 3.99 ( -0.15%) 3.99 ( 0.03%) 3.61 ( 9.52%) 3.99
length=32, char=0: 16.07 (-345.22%) 3.62 ( -0.20%) 3.61 ( 0.00%) 3.23 ( 10.62%) 3.61
length=32, char=0: 16.10 (-346.63%) 3.62 ( -0.30%) 3.61 ( -0.03%) 3.22 ( 10.74%) 3.60
length=64, char=0: 28.49 (-691.26%) 3.60 ( -0.10%) 3.61 ( -0.34%) 3.61 ( -0.20%) 3.60
length=64, char=0: 28.49 (-690.31%) 3.60 ( 0.00%) 3.61 ( -0.10%) 3.61 ( -0.03%) 3.60
length=96, char=0: 55.09 (-1427.24%) 3.61 ( -0.14%) 3.60 ( 0.07%) 3.24 ( 10.12%) 3.61
length=96, char=0: 51.76 (-1334.40%) 3.61 ( 0.00%) 3.60 ( 0.14%) 3.23 ( 10.52%) 3.61
length=128, char=0: 64.00 (-1501.44%) 4.00 ( 0.03%) 3.63 ( 9.25%) 4.00 ( -0.18%) 4.00
length=128, char=0: 64.72 (-1519.77%) 4.01 ( -0.24%) 3.65 ( 8.77%) 3.99 ( 0.03%) 4.00
length=160, char=0: 76.39 (-1365.53%) 5.21 ( 0.02%) 5.64 ( -8.22%) 4.39 ( 15.71%) 5.21
length=160, char=0: 76.39 (-1367.17%) 5.21 ( -0.14%) 5.64 ( -8.25%) 4.38 ( 15.87%) 5.21
length=192, char=0: 88.72 (-1603.26%) 5.22 ( -0.16%) 5.18 ( 0.47%) 5.34 ( -2.60%) 5.21
length=192, char=0: 89.05 (-1608.39%) 5.23 ( -0.30%) 5.18 ( 0.61%) 5.34 ( -2.51%) 5.21
length=224, char=0: 95.34 (-471.58%) 16.68 ( -0.01%) 6.44 ( 61.37%) 5.99 ( 64.08%) 16.68
length=224, char=0: 95.34 (-470.55%) 16.68 ( 0.17%) 6.44 ( 61.47%) 5.98 ( 64.21%) 16.71
length=256, char=0: 107.46 (-1175.45%) 8.42 ( 0.06%) 6.69 ( 20.62%) 6.88 ( 18.33%) 8.43
length=256, char=0: 107.99 (-1182.66%) 8.44 ( -0.28%) 6.69 ( 20.57%) 6.88 ( 18.27%) 8.42
length=288, char=0: 120.03 (-1325.00%) 8.42 ( -0.01%) 7.55 ( 10.30%) 7.53 ( 10.61%) 8.42
length=288, char=0: 120.03 (-1324.60%) 8.42 ( 0.07%) 7.55 ( 10.33%) 7.52 ( 10.75%) 8.43
length=320, char=0: 132.58 (-1471.50%) 8.43 ( 0.03%) 8.24 ( 2.37%) 8.42 ( 0.17%) 8.44
length=320, char=0: 132.09 (-1465.48%) 8.44 ( 0.01%) 8.22 ( 2.55%) 8.42 ( 0.25%) 8.44
length=352, char=0: 144.77 (-729.18%) 10.62 ( 39.17%) 9.10 ( 47.87%) 9.07 ( 48.04%) 17.46
length=352, char=0: 144.77 (-729.17%) 9.96 ( 42.95%) 9.09 ( 47.95%) 9.06 ( 48.10%) 17.46
length=384, char=0: 156.81 (-797.96%) 9.95 ( 43.00%) 9.77 ( 44.07%) 9.96 ( 42.97%) 17.46
length=384, char=0: 157.56 (-802.27%) 9.97 ( 42.89%) 9.77 ( 44.06%) 9.96 ( 42.96%) 17.46
length=416, char=0: 169.38 (-774.13%) 17.46 ( 9.91%) 10.64 ( 45.09%) 10.81 ( 44.20%) 19.38
length=416, char=0: 169.58 (-830.31%) 17.47 ( 4.17%) 10.64 ( 41.61%) 10.81 ( 40.70%) 18.23
length=448, char=0: 185.67 (-857.80%) 17.45 ( 9.97%) 11.31 ( 41.65%) 11.54 ( 40.45%) 19.38
length=448, char=0: 182.09 (-839.56%) 17.47 ( 9.86%) 11.30 ( 41.70%) 11.49 ( 40.69%) 19.38
length=480, char=0: 194.00 (-1456.58%) 13.42 ( -7.69%) 12.18 ( 2.25%) 12.23 ( 1.89%) 12.46
length=480, char=0: 194.25 (-1411.94%) 13.43 ( -4.53%) 12.19 ( 5.15%) 12.13 ( 5.56%) 12.85
---
sysdeps/aarch64/multiarch/Makefile | 2 +-
sysdeps/aarch64/multiarch/ifunc-impl-list.c | 1 +
sysdeps/aarch64/multiarch/memset.c | 5 +-
sysdeps/aarch64/multiarch/memset_kunpeng.S | 124 ++++++++++++++++++++++++++++
4 files changed, 130 insertions(+), 2 deletions(-)
create mode 100644 sysdeps/aarch64/multiarch/memset_kunpeng.S
@@ -1,7 +1,7 @@
ifeq ($(subdir),string)
sysdep_routines += memcpy_generic memcpy_thunderx memcpy_thunderx2 \
memcpy_falkor memmove_falkor \
- memset_generic memset_falkor memset_emag \
+ memset_generic memset_falkor memset_emag memset_kunpeng \
memchr_generic memchr_nosimd \
strlen_generic strlen_asimd
endif
@@ -53,6 +53,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
can do a comparative analysis with __memset_generic. */
IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_falkor)
IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_emag)
+ IFUNC_IMPL_ADD (array, i, memset, 1, __memset_kunpeng)
IFUNC_IMPL_ADD (array, i, memset, 1, __memset_generic))
IFUNC_IMPL (i, name, memchr,
IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_nosimd)
@@ -30,10 +30,13 @@ extern __typeof (__redirect_memset) __libc_memset;
extern __typeof (__redirect_memset) __memset_falkor attribute_hidden;
extern __typeof (__redirect_memset) __memset_emag attribute_hidden;
+extern __typeof (__redirect_memset) __memset_kunpeng attribute_hidden;
extern __typeof (__redirect_memset) __memset_generic attribute_hidden;
libc_ifunc (__libc_memset,
- ((IS_FALKOR (midr) || IS_PHECDA (midr)) && zva_size == 64
+ IS_KUNPENG (midr)
+ ?__memset_kunpeng
+ : ((IS_FALKOR (midr) || IS_PHECDA (midr)) && zva_size == 64
? __memset_falkor
: (IS_EMAG (midr) && zva_size == 64
? __memset_emag
new file mode 100644
@@ -0,0 +1,124 @@
+/* Optimized memset for Huawei Kunpeng processor.
+ Copyright (C) 2012-2019 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sysdeps/aarch64/memset-reg.h>
+
+#if IS_IN (libc)
+# define MEMSET __memset_kunpeng
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses
+ *
+ */
+
+ENTRY_ALIGN (MEMSET, 6)
+
+ DELOUSE (0)
+ DELOUSE (2)
+
+ dup v0.16B, valw
+ add dstend, dstin, count
+
+ cmp count, 128
+ b.hs L(set_long)
+
+ cmp count, 16
+ b.lo L(less16)
+
+ /* Set 16..127 bytes. */
+ str q0, [dstin]
+ tbnz count, 6, L(set112)
+ str q0, [dstend, -16]
+ tbz count, 5, 1f
+ str q0, [dstin, 16]
+ str q0, [dstend, -32]
+1: ret
+
+ .p2align 4
+ /* Set 64..127 bytes. Write 64 bytes from the start and
+ 32 bytes from the end. */
+L(set112):
+ ands tmp1, dstin, 15
+ bne 2f
+ str q0, [dstin, 16]
+ stp q0, q0, [dstin, 32]
+ tbz count, 5, 1f
+ stp q0, q0, [dstin, 64]
+1: stp q0, q0, [dstend, -32]
+ ret
+ .p2align 4
+2: bic dst, dstin, 15
+ stp q0,q0, [dst, 16]
+ str q0, [dst, 48]
+ tbz count, 5, 3f
+ stp q0, q0, [dst, 64]
+3: stp q0, q0, [dstend, -48]
+ str q0, [dstend, -16]
+ ret
+
+ .p2align 4
+ /* Set 0..15 bytes. */
+L(less16):
+ tbz count, 3, L(less8)
+ str d0, [dstin]
+ str d0, [dstend, -8]
+ ret
+L(less8):
+ tbz count, 2, 2f
+ str s0, [dstin]
+ str s0, [dstend, -4]
+ ret
+2: cbz count, 3f
+ str b0, [dstin]
+ tbz count, 1, 3f
+ str h0, [dstend, -2]
+3: ret
+
+ .p2align 4
+L(set_long):
+ and valw, valw, 255
+ bic dst, dstin, 15
+ str q0, [dstin]
+ sub count, dstend, dst /* Count is 16 too large. */
+ sub dst, dst, 16 /* Dst is biased by -32. */
+ sub count, count, 64 + 16 + 1 /* Adjust count and bias for loop. */
+1: stp q0, q0, [dst, 32]
+ stp q0, q0, [dst, 64]!
+ subs count, count, 64
+ b.lo 1f
+ stp q0, q0, [dst, 32]
+ stp q0, q0, [dst, 64]!
+ subs count, count, 64
+ b.lo 1f
+ stp q0, q0, [dst, 32]
+ stp q0, q0, [dst, 64]!
+ subs count, count, 64
+ b.hs 1b
+
+1: tbz count, 5, 2f
+ str q0, [dst, 32]
+ str q0, [dst, 48]
+2: stp q0, q0, [dstend, -32]
+ ret
+
+END (MEMSET)
+libc_hidden_builtin_def (MEMSET)
+#endif