diff --git a/sysdeps/powerpc/powerpc64/le/power10/memmove.S b/sysdeps/powerpc/powerpc64/le/power10/memmove.S
new file mode 100644
index 0000000000..7cff5ef2ac
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/power10/memmove.S
@@ -0,0 +1,313 @@
+/* Optimized memmove implementation for POWER10.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include
+
+
+/* void* [r3] memmove (void *dest [r3], const void *src [r4], size_t len [r5])
+
+ This optimization checks if 'src' and 'dst' overlap. If they do not
+ or 'src' is ahead of 'dest' then it copies forward.
+ Otherwise, an optimized backward copy is used. */
+
+#ifndef MEMMOVE
+# define MEMMOVE memmove
+#endif
+ .machine power9
+ENTRY_TOCLESS (MEMMOVE, 5)
+ CALL_MCOUNT 3
+
+ .p2align 5
+ /* Check if there is overlap, if so it will branch to backward copy. */
+ subf r9,r4,r3
+ cmpld cr7,r9,r5
+ blt cr7,L(memmove_bwd)
+
+ /* Fast path for length shorter than 16 bytes. */
+ sldi r7,r5,56
+ lxvl 32+v2,r4,r7
+ stxvl 32+v2,r3,r7
+ subic. r8,r5,16
+ blelr
+
+ cmpldi cr6,r5,256
+ bge cr6,L(ge_256)
+ /* Account for the first 16-byte copy. For shorter lengths the alignment
+ either slows down or is irrelevant. I'm making use of this comparison
+ to skip the alignment. */
+ addi r4,r4,16
+ addi r11,r3,16 /* use r11 to keep dest address on r3. */
+ subi r5,r5,16
+ b L(loop_head)
+
+ .p2align 5
+L(ge_256):
+ /* Account for the first copy <= 16 bytes. This is necessary for
+ memmove because at this point the src address can be in front of the
+ dest address. */
+ clrldi r9,r5,56
+ li r8,16
+ cmpldi r9,16
+ iselgt r9,r8,r9
+ add r4,r4,r9
+ add r11,r3,r9 /* use r11 to keep dest address on r3. */
+ sub r5,r5,r9
+
+ /* Align dest to 16 bytes. */
+ neg r7,r3
+ clrldi. r9,r7,60
+ beq L(loop_head)
+
+ .p2align 5
+ sldi r6,r9,56
+ lxvl 32+v0,r4,r6
+ stxvl 32+v0,r11,r6
+ sub r5,r5,r9
+ add r4,r4,r9
+ add r11,r11,r9
+
+L(loop_head):
+ cmpldi r5,63
+ ble L(final_64)
+
+ srdi. r7,r5,7
+ beq L(loop_tail)
+
+ mtctr r7
+
+/* Main loop that copies 128 bytes each iteration. */
+ .p2align 5
+L(loop):
+ addi r9,r4,64
+ addi r10,r11,64
+
+ lxv 32+v0,0(r4)
+ lxv 32+v1,16(r4)
+ lxv 32+v2,32(r4)
+ lxv 32+v3,48(r4)
+
+ stxv 32+v0,0(r11)
+ stxv 32+v1,16(r11)
+ stxv 32+v2,32(r11)
+ stxv 32+v3,48(r11)
+
+ addi r4,r4,128
+ addi r11,r11,128
+
+ lxv 32+v4,0(r9)
+ lxv 32+v5,16(r9)
+ lxv 32+v6,32(r9)
+ lxv 32+v7,48(r9)
+
+ stxv 32+v4,0(r10)
+ stxv 32+v5,16(r10)
+ stxv 32+v6,32(r10)
+ stxv 32+v7,48(r10)
+
+ bdnz L(loop)
+ clrldi. r5,r5,57
+ beqlr
+
+/* Copy 64 bytes. */
+ .p2align 5
+L(loop_tail):
+ cmpldi cr5,r5,63
+ ble cr5,L(final_64)
+
+ lxv 32+v0,0(r4)
+ lxv 32+v1,16(r4)
+ lxv 32+v2,32(r4)
+ lxv 32+v3,48(r4)
+
+ stxv 32+v0,0(r11)
+ stxv 32+v1,16(r11)
+ stxv 32+v2,32(r11)
+ stxv 32+v3,48(r11)
+
+ addi r4,r4,64
+ addi r11,r11,64
+ subi r5,r5,64
+
+/* Copies the last 1-63 bytes. */
+ .p2align 5
+L(final_64):
+ /* r8 hold the number of bytes that will be copied with lxv/stxv. */
+ clrrdi. r8,r5,4
+ beq L(tail1)
+
+ cmpldi cr5,r5,32
+ lxv 32+v0,0(r4)
+ blt cr5,L(tail2)
+
+ cmpldi cr6,r5,48
+ lxv 32+v1,16(r4)
+ blt cr6,L(tail3)
+
+ lxv 32+v2,32(r4)
+
+ .p2align 5
+L(tail4):
+ stxv 32+v2,32(r11)
+L(tail3):
+ stxv 32+v1,16(r11)
+L(tail2):
+ stxv 32+v0,0(r11)
+ sub r5,r5,r8
+ add r4,r4,r8
+ add r11,r11,r8
+ .p2align 5
+L(tail1):
+ sldi r6,r5,56
+ lxvl v4,r4,r6
+ stxvl v4,r11,r6
+ blr
+
+/* If dest and src overlap, we should copy backwards. */
+L(memmove_bwd):
+ add r11,r3,r5
+ add r4,r4,r5
+
+ /* Optimization for length smaller than 16 bytes. */
+ cmpldi cr5,r5,15
+ ble cr5,L(tail1_bwd)
+
+ /* For shorter lengths the alignment either slows down or is irrelevant.
+ The forward copy uses a already need 256 comparison for that. Here
+ it's using 128 as it will reduce code and improve readability. */
+ cmpldi cr7,r5,128
+ blt cr7,L(bwd_loop_tail)
+
+ .p2align 5
+ clrldi. r9,r11,60
+ beq L(bwd_loop_head)
+ sub r4,r4,r9
+ sub r11,r11,r9
+ lxv 32+v0,0(r4)
+ sldi r6,r9,56
+ stxvl 32+v0,r11,r6
+ sub r5,r5,r9
+
+L(bwd_loop_head):
+ srdi. r7,r5,7
+ beq L(bwd_loop_tail)
+
+ mtctr r7
+
+/* Main loop that copies 128 bytes every iteration. */
+ .p2align 5
+L(bwd_loop):
+ addi r9,r4,-64
+ addi r10,r11,-64
+
+ lxv 32+v0,-16(r4)
+ lxv 32+v1,-32(r4)
+ lxv 32+v2,-48(r4)
+ lxv 32+v3,-64(r4)
+
+ stxv 32+v0,-16(r11)
+ stxv 32+v1,-32(r11)
+ stxv 32+v2,-48(r11)
+ stxv 32+v3,-64(r11)
+
+ addi r4,r4,-128
+ addi r11,r11,-128
+
+ lxv 32+v0,-16(r9)
+ lxv 32+v1,-32(r9)
+ lxv 32+v2,-48(r9)
+ lxv 32+v3,-64(r9)
+
+ stxv 32+v0,-16(r10)
+ stxv 32+v1,-32(r10)
+ stxv 32+v2,-48(r10)
+ stxv 32+v3,-64(r10)
+
+ bdnz L(bwd_loop)
+ clrldi. r5,r5,57
+ beqlr
+
+/* Copy 64 bytes. */
+ .p2align 5
+L(bwd_loop_tail):
+ cmpldi cr5,r5,63
+ ble cr5,L(bwd_final_64)
+
+ addi r4,r4,-64
+ addi r11,r11,-64
+
+ lxv 32+v0,0(r4)
+ lxv 32+v1,16(r4)
+ lxv 32+v2,32(r4)
+ lxv 32+v3,48(r4)
+
+ stxv 32+v0,0(r11)
+ stxv 32+v1,16(r11)
+ stxv 32+v2,32(r11)
+ stxv 32+v3,48(r11)
+
+ subi r5,r5,64
+
+/* Copies the last 1-63 bytes. */
+ .p2align 5
+L(bwd_final_64):
+ /* r8 hold the number of bytes that will be copied with lxv/stxv. */
+ clrrdi. r8,r5,4
+ beq L(tail1_bwd)
+
+ cmpldi cr5,r5,32
+ lxv 32+v2,-16(r4)
+ blt cr5,L(tail2_bwd)
+
+ cmpldi cr6,r5,48
+ lxv 32+v1,-32(r4)
+ blt cr6,L(tail3_bwd)
+
+ lxv 32+v0,-48(r4)
+
+ .p2align 5
+L(tail4_bwd):
+ stxv 32+v0,-48(r11)
+L(tail3_bwd):
+ stxv 32+v1,-32(r11)
+L(tail2_bwd):
+ stxv 32+v2,-16(r11)
+ sub r4,r4,r5
+ sub r11,r11,r5
+ sub r5,r5,r8
+ sldi r6,r5,56
+ lxvl v4,r4,r6
+ stxvl v4,r11,r6
+ blr
+
+/* Copy last 16 bytes. */
+ .p2align 5
+L(tail1_bwd):
+ sub r4,r4,r5
+ sub r11,r11,r5
+ sldi r6,r5,56
+ lxvl v4,r4,r6
+ stxvl v4,r11,r6
+ blr
+
+
+END (MEMMOVE)
+
+#ifdef DEFINE_STRLEN_HIDDEN_DEF
+weak_alias (__memmove, memmove)
+libc_hidden_builtin_def (memmove)
+#endif
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
index 8aa46a3702..16ad1ab439 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
@@ -24,7 +24,8 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \
stpncpy-power8 stpncpy-power7 stpncpy-ppc64 \
strcmp-power8 strcmp-power7 strcmp-ppc64 \
strcat-power8 strcat-power7 strcat-ppc64 \
- memmove-power7 memmove-ppc64 wordcopy-ppc64 bcopy-ppc64 \
+ memmove-power10 memmove-power7 memmove-ppc64 \
+ wordcopy-ppc64 bcopy-ppc64 \
strncpy-power8 strstr-power7 strstr-ppc64 \
strspn-power8 strspn-ppc64 strcspn-power8 strcspn-ppc64 \
strlen-power8 strcasestr-power8 strcasestr-ppc64 \
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
index 1a6993616f..d1c159f2f7 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
@@ -67,6 +67,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
/* Support sysdeps/powerpc/powerpc64/multiarch/memmove.c. */
IFUNC_IMPL (i, name, memmove,
+#ifdef __LITTLE_ENDIAN__
+ IFUNC_IMPL_ADD (array, i, memmove,
+ hwcap2 & (PPC_FEATURE2_ARCH_3_1 |
+ PPC_FEATURE2_HAS_ISEL)
+ && (hwcap & PPC_FEATURE_HAS_VSX),
+ __memmove_power10)
+#endif
IFUNC_IMPL_ADD (array, i, memmove, hwcap & PPC_FEATURE_HAS_VSX,
__memmove_power7)
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ppc))
diff --git a/sysdeps/powerpc/powerpc64/multiarch/memmove-power10.S b/sysdeps/powerpc/powerpc64/multiarch/memmove-power10.S
new file mode 100644
index 0000000000..d6d8ea83ec
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/memmove-power10.S
@@ -0,0 +1,24 @@
+/* Optimized memmove implementation for POWER10.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#define MEMMOVE __memmove_power10
+
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+
+#include
diff --git a/sysdeps/powerpc/powerpc64/multiarch/memmove.c b/sysdeps/powerpc/powerpc64/multiarch/memmove.c
index 9bec61a321..4704636f5d 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/memmove.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/memmove.c
@@ -28,14 +28,22 @@
# include "init-arch.h"
extern __typeof (__redirect_memmove) __libc_memmove;
-
extern __typeof (__redirect_memmove) __memmove_ppc attribute_hidden;
extern __typeof (__redirect_memmove) __memmove_power7 attribute_hidden;
+#ifdef __LITTLE_ENDIAN__
+extern __typeof (__redirect_memmove) __memmove_power10 attribute_hidden;
+#endif
libc_ifunc (__libc_memmove,
- (hwcap & PPC_FEATURE_HAS_VSX)
- ? __memmove_power7
- : __memmove_ppc);
+#ifdef __LITTLE_ENDIAN__
+ hwcap2 & (PPC_FEATURE2_ARCH_3_1 |
+ PPC_FEATURE2_HAS_ISEL)
+ && (hwcap & PPC_FEATURE_HAS_VSX)
+ ? __memmove_power10 :
+#endif
+ (hwcap & PPC_FEATURE_HAS_VSX)
+ ? __memmove_power7
+ : __memmove_ppc);
#undef memmove
strong_alias (__libc_memmove, memmove);