aarch64: revert memcpy optimze for kunpeng to avoid performance degradation
Commit Message
In commit 863d775c481704baaa41855fc93e5a1ca2dc6bf6, kunpeng920 is added to default memcpy version,
however, there is performance degradation when the copy size is some large bytes, eg: 100k.
This is the result, tested in glibc-2.28:
before backport after backport Performance improvement
memcpy_1k 2 2 0.00%
memcpy_10k 26 26 0.00%
memcpy_100k 343 423 -18.91%
memcpy_1m 9563 11058 -13.52%
This is the demo
#include "stdio.h"
#include "string.h"
#include "stdlib.h"
char a[1024*1024] = {12};
char b[1024*1024] = {13};
int main(int argc, char *argv[])
{
int i = atoi(argv[1]);
int j;
int size = atoi(argv[2]);
long long begin, end;
asm volatile("mrs %0, cntvct_el0" : "=r" (begin));
for (j = 0; j < i; j++)
memcpy(b, a, size);
asm volatile("mrs %0, cntvct_el0" : "=r" (end));
printf("%llu\n", (end - begin) / i);
return 0;
}
# gcc -g -O0 memcpy.c -o memcpy
# taskset -c 10 ./memcpy 100000 1024
Co-authored-by: liqingqing <liqingqing3@huawei.com>
---
sysdeps/aarch64/multiarch/memcpy.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
@@ -37,7 +37,7 @@ extern __typeof (__redirect_memcpy) __memcpy_falkor attribute_hidden;
libc_ifunc (__libc_memcpy,
(IS_THUNDERX (midr)
? __memcpy_thunderx
- : (IS_FALKOR (midr) || IS_PHECDA (midr) || IS_KUNPENG920 (midr)
+ : (IS_FALKOR (midr) || IS_PHECDA (midr)
? __memcpy_falkor
: (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr)
? __memcpy_thunderx2