Add ifunc memcpy and memmove for aarch64

Message ID 1485365668.29977.16.camel@caviumnetworks.com
State New, archived
Headers

Commit Message

Steve Ellcey Jan. 25, 2017, 5:34 p.m. UTC
  Here is a new version of the aarch64 ifunc patch with the cpu-features
style of initialization on startup.  Adhemerval, since I took some code
from your branch I added your name to the ChangeLog.  In addition to
doing the mrs instruction on startup the main difference in this patch
from the last one is that it uses ifuncs in both the shared and archive
libc libraries.

Steve Ellcey
sellcey@cavium.com


2017-01-25  Steve Ellcey  <sellcey@caviumnetworks.com>
	    Adhemerval Zanella  <adhemerval.zanella@linaro.org>

	* sysdeps/aarch64/dl-machine.h: Include cpu-features.c.
	(DL_PLATFORM_INIT): New define.
	(dl_platform_init): New function.
	* sysdeps/aarch64/ldsodefs.h: Include cpu-features.h.
	* sysdeps/aarch64/memcpy.S (MEMMOVE, MEMCPY): New macros.
	(memmove): Use MEMMOVE for name.
	(memcpy): Use MEMCPY for name.  Add loop with prefetching
	under USE_THUNDERX macro.
	* sysdeps/aarch64/multiarch/Makefile: New file.
	* sysdeps/aarch64/multiarch/ifunc-impl-list.c: Ditto.
	* sysdeps/aarch64/multiarch/init-arch.h: Ditto.
	* sysdeps/aarch64/multiarch/memcpy.c: Ditto.
	* sysdeps/aarch64/multiarch/memcpy_generic.S: Ditto.
	* sysdeps/aarch64/multiarch/memcpy_thunderx.S: Ditto.
	* sysdeps/aarch64/multiarch/memmove.c: Ditto.
	* sysdeps/unix/sysv/linux/aarch64/cpu-features.c: Ditto.
	* sysdeps/unix/sysv/linux/aarch64/cpu-features.h: Ditto.
	* sysdeps/unix/sysv/linux/aarch64/dl-procinfo.c: Ditto.
	* sysdeps/unix/sysv/linux/aarch64/libc-start.c: Ditto.
  

Comments

Adhemerval Zanella Feb. 6, 2017, 8:54 p.m. UTC | #1
On 25/01/2017 15:34, Steve Ellcey wrote:
> Here is a new version of the aarch64 ifunc patch with the cpu-features
> style of initialization on startup.  Adhemerval, since I took some code
> from your branch I added your name to the ChangeLog.  In addition to
> doing the mrs instruction on startup the main difference in this patch
> from the last one is that it uses ifuncs in both the shared and archive
> libc libraries.
> 
> Steve Ellcey
> sellcey@cavium.com

Hi Steve,

I think it is better to split this patchset in two, one for multiarch foundation
for aarch64 and another one for the thunderx memcpy implementation itself.

Besides that I think patch should be ok.

> diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h
> index e69de29..eafbf77 100644
> --- a/sysdeps/aarch64/multiarch/init-arch.h
> +++ b/sysdeps/aarch64/multiarch/init-arch.h
> @@ -0,0 +1,22 @@
> +/* This file is part of the GNU C Library.

Missing one line description for this file.
  
Siddhesh Poyarekar Feb. 7, 2017, 6:47 a.m. UTC | #2
On Wednesday 25 January 2017 11:04 PM, Steve Ellcey wrote:
> Here is a new version of the aarch64 ifunc patch with the cpu-features
> style of initialization on startup.  Adhemerval, since I took some code
> from your branch I added your name to the ChangeLog.  In addition to
> doing the mrs instruction on startup the main difference in this patch
> from the last one is that it uses ifuncs in both the shared and archive
> libc libraries.
> 
> Steve Ellcey
> sellcey@cavium.com
> 
> 
> 2017-01-25  Steve Ellcey  <sellcey@caviumnetworks.com>
> 	    Adhemerval Zanella  <adhemerval.zanella@linaro.org>
> 
> 	* sysdeps/aarch64/dl-machine.h: Include cpu-features.c.
> 	(DL_PLATFORM_INIT): New define.
> 	(dl_platform_init): New function.
> 	* sysdeps/aarch64/ldsodefs.h: Include cpu-features.h.
> 	* sysdeps/aarch64/memcpy.S (MEMMOVE, MEMCPY): New macros.
> 	(memmove): Use MEMMOVE for name.
> 	(memcpy): Use MEMCPY for name.  Add loop with prefetching
> 	under USE_THUNDERX macro.
> 	* sysdeps/aarch64/multiarch/Makefile: New file.
> 	* sysdeps/aarch64/multiarch/ifunc-impl-list.c: Ditto.
> 	* sysdeps/aarch64/multiarch/init-arch.h: Ditto.
> 	* sysdeps/aarch64/multiarch/memcpy.c: Ditto.
> 	* sysdeps/aarch64/multiarch/memcpy_generic.S: Ditto.
> 	* sysdeps/aarch64/multiarch/memcpy_thunderx.S: Ditto.
> 	* sysdeps/aarch64/multiarch/memmove.c: Ditto.
> 	* sysdeps/unix/sysv/linux/aarch64/cpu-features.c: Ditto.
> 	* sysdeps/unix/sysv/linux/aarch64/cpu-features.h: Ditto.
> 	* sysdeps/unix/sysv/linux/aarch64/dl-procinfo.c: Ditto.
> 	* sysdeps/unix/sysv/linux/aarch64/libc-start.c: Ditto.
> 
> 
> ifunc.patch
> 
> 
> diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h
> index 84b8aec..15d79a6 100644
> --- a/sysdeps/aarch64/dl-machine.h
> +++ b/sysdeps/aarch64/dl-machine.h
> @@ -25,6 +25,7 @@
>  #include <tls.h>
>  #include <dl-tlsdesc.h>
>  #include <dl-irel.h>
> +#include <cpu-features.c>
>  
>  /* Return nonzero iff ELF header is compatible with the running host.  */
>  static inline int __attribute__ ((unused))
> @@ -225,6 +226,23 @@ _dl_start_user:								\n\
>  #define ELF_MACHINE_NO_REL 1
>  #define ELF_MACHINE_NO_RELA 0
>  
> +#define DL_PLATFORM_INIT dl_platform_init ()
> +
> +static inline void __attribute__ ((unused))
> +dl_platform_init (void)
> +{
> +  if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
> +    /* Avoid an empty string which would disturb us.  */
> +    GLRO(dl_platform) = NULL;
> +
> +#ifdef SHARED
> +  /* init_cpu_features has been called early from __libc_start_main in
> +     static executable.  */
> +  init_cpu_features (&GLRO(dl_aarch64_cpu_features));
> +#endif
> +}
> +
> +
>  static inline ElfW(Addr)
>  elf_machine_fixup_plt (struct link_map *map, lookup_t t,
>  		       const ElfW(Rela) *reloc,
> diff --git a/sysdeps/aarch64/ldsodefs.h b/sysdeps/aarch64/ldsodefs.h
> index f277074..ba4ada3 100644
> --- a/sysdeps/aarch64/ldsodefs.h
> +++ b/sysdeps/aarch64/ldsodefs.h
> @@ -20,6 +20,7 @@
>  #define _AARCH64_LDSODEFS_H 1
>  
>  #include <elf.h>
> +#include <cpu-features.h>
>  
>  struct La_aarch64_regs;
>  struct La_aarch64_retval;
> diff --git a/sysdeps/aarch64/memcpy.S b/sysdeps/aarch64/memcpy.S
> index 29af8b1..74444b4 100644
> --- a/sysdeps/aarch64/memcpy.S
> +++ b/sysdeps/aarch64/memcpy.S
> @@ -59,7 +59,14 @@
>     Overlapping large forward memmoves use a loop that copies backwards.
>  */
>  
> -ENTRY_ALIGN (memmove, 6)
> +#ifndef MEMMOVE
> +#  define MEMMOVE memmove
> +#endif
> +#ifndef MEMCPY
> +#  define MEMCPY memcpy
> +#endif
> +
> +ENTRY_ALIGN (MEMMOVE, 6)
>  
>  	DELOUSE (0)
>  	DELOUSE (1)
> @@ -71,9 +78,9 @@ ENTRY_ALIGN (memmove, 6)
>  	b.lo	L(move_long)
>  
>  	/* Common case falls through into memcpy.  */
> -END (memmove)
> -libc_hidden_builtin_def (memmove)
> -ENTRY (memcpy)
> +END (MEMMOVE)
> +libc_hidden_builtin_def (MEMMOVE)
> +ENTRY (MEMCPY)
>  
>  	DELOUSE (0)
>  	DELOUSE (1)
> @@ -158,10 +165,22 @@ L(copy96):
>  
>  	.p2align 4
>  L(copy_long):
> +
> +#ifdef USE_THUNDERX
> +
> +	/* On thunderx, large memcpy's are helped by software prefetching.
> +	   This loop is identical to the one below it but with prefetching
> +	   instructions included.  For loops that are less than 32768 bytes,
> +	   the prefetching does not help and slow the code down so we only
> +	   use the prefetching loop for the largest memcpys.  */

I think it would be cleaner to put the full generic and thunderx
implementations in separate files instead of trying to do this macro
dance because it keeps micro-architecture details separate.  Assembly
code is hard to maintain as it is without adding conditional compilation
using macros.

I also second Adhemerval's suggestion to separate the patch to add the
framework from the one to add the thunderx ifunc.  It makes for easier
cherry picking and git-blaming.

Siddhesh

> +
> +	cmp	count, #32768
> +	b.lo	L(copy_long_without_prefetch)
>  	and	tmp1, dstin, 15
>  	bic	dst, dstin, 15
>  	ldp	D_l, D_h, [src]
>  	sub	src, src, tmp1
> +	prfm	pldl1strm, [src, 384]
>  	add	count, count, tmp1	/* Count is now 16 too large.  */
>  	ldp	A_l, A_h, [src, 16]
>  	stp	D_l, D_h, [dstin]
> @@ -169,7 +188,10 @@ L(copy_long):
>  	ldp	C_l, C_h, [src, 48]
>  	ldp	D_l, D_h, [src, 64]!
>  	subs	count, count, 128 + 16	/* Test and readjust count.  */
> -	b.ls	2f
> +
> +L(prefetch_loop64):
> +	tbz	src, #6, 1f
> +	prfm	pldl1strm, [src, 512]
>  1:
>  	stp	A_l, A_h, [dst, 16]
>  	ldp	A_l, A_h, [src, 16]
> @@ -180,12 +202,40 @@ L(copy_long):
>  	stp	D_l, D_h, [dst, 64]!
>  	ldp	D_l, D_h, [src, 64]!
>  	subs	count, count, 64
> -	b.hi	1b
> +	b.hi	L(prefetch_loop64)
> +	b	L(last64)
> +
> +L(copy_long_without_prefetch):
> +#endif
> +
> +	and	tmp1, dstin, 15
> +	bic	dst, dstin, 15
> +	ldp	D_l, D_h, [src]
> +	sub	src, src, tmp1
> +	add	count, count, tmp1	/* Count is now 16 too large.  */
> +	ldp	A_l, A_h, [src, 16]
> +	stp	D_l, D_h, [dstin]
> +	ldp	B_l, B_h, [src, 32]
> +	ldp	C_l, C_h, [src, 48]
> +	ldp	D_l, D_h, [src, 64]!
> +	subs	count, count, 128 + 16	/* Test and readjust count.  */
> +	b.ls	L(last64)
> +L(loop64):
> +	stp	A_l, A_h, [dst, 16]
> +	ldp	A_l, A_h, [src, 16]
> +	stp	B_l, B_h, [dst, 32]
> +	ldp	B_l, B_h, [src, 32]
> +	stp	C_l, C_h, [dst, 48]
> +	ldp	C_l, C_h, [src, 48]
> +	stp	D_l, D_h, [dst, 64]!
> +	ldp	D_l, D_h, [src, 64]!
> +	subs	count, count, 64
> +	b.hi	L(loop64)
>  
>  	/* Write the last full set of 64 bytes.  The remainder is at most 64
>  	   bytes, so it is safe to always copy 64 bytes from the end even if
>  	   there is just 1 byte left.  */
> -2:
> +L(last64):
>  	ldp	E_l, E_h, [srcend, -64]
>  	stp	A_l, A_h, [dst, 16]
>  	ldp	A_l, A_h, [srcend, -48]
> @@ -256,5 +306,5 @@ L(move_long):
>  	stp	C_l, C_h, [dstin]
>  3:	ret
>  
> -END (memcpy)
> -libc_hidden_builtin_def (memcpy)
> +END (MEMCPY)
> +libc_hidden_builtin_def (MEMCPY)
> diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile
> index e69de29..78d52c7 100644
> --- a/sysdeps/aarch64/multiarch/Makefile
> +++ b/sysdeps/aarch64/multiarch/Makefile
> @@ -0,0 +1,3 @@
> +ifeq ($(subdir),string)
> +sysdep_routines += memcpy_generic memcpy_thunderx
> +endif
> diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
> index e69de29..c4f23df 100644
> --- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c
> +++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
> @@ -0,0 +1,51 @@
> +/* Enumerate available IFUNC implementations of a function.  AARCH64 version.
> +   Copyright (C) 2017 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#include <assert.h>
> +#include <string.h>
> +#include <wchar.h>
> +#include <ldsodefs.h>
> +#include <ifunc-impl-list.h>
> +#include <init-arch.h>
> +#include <stdio.h>
> +
> +/* Maximum number of IFUNC implementations.  */
> +#define MAX_IFUNC	2
> +
> +size_t
> +__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> +			size_t max)
> +{
> +  assert (max >= MAX_IFUNC);
> +
> +  size_t i = 0;
> +
> +  INIT_ARCH ();
> +
> +  /* Support sysdeps/aarch64/multiarch/memcpy.c and memmove.c.  */
> +  IFUNC_IMPL (i, name, memcpy,
> +	      IFUNC_IMPL_ADD (array, i, memcpy, IS_THUNDERX (midr),
> +			      __memcpy_thunderx)
> +	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
> +  IFUNC_IMPL (i, name, memmove,
> +	      IFUNC_IMPL_ADD (array, i, memmove, IS_THUNDERX (midr),
> +			      __memmove_thunderx)
> +	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic))
> +
> +  return i;
> +}
> diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h
> index e69de29..eafbf77 100644
> --- a/sysdeps/aarch64/multiarch/init-arch.h
> +++ b/sysdeps/aarch64/multiarch/init-arch.h
> @@ -0,0 +1,22 @@
> +/* This file is part of the GNU C Library.
> +   Copyright (C) 2017 Free Software Foundation, Inc.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#include <ldsodefs.h>
> +
> +#define INIT_ARCH()						\
> +  uint64_t __attribute__((unused)) midr = 			\
> +    GLRO(dl_aarch64_cpu_features).midr_el1;
> diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c
> index e69de29..4e3f251 100644
> --- a/sysdeps/aarch64/multiarch/memcpy.c
> +++ b/sysdeps/aarch64/multiarch/memcpy.c
> @@ -0,0 +1,39 @@
> +/* Multiple versions of memcpy. AARCH64 version.
> +   Copyright (C) 2017 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +/* Define multiple versions only for the definition in libc.  */
> +
> +#if IS_IN (libc)
> +/* Redefine memcpy so that the compiler won't complain about the type
> +   mismatch with the IFUNC selector in strong_alias, below.  */
> +# undef memcpy
> +# define memcpy __redirect_memcpy
> +# include <string.h>
> +# include <init-arch.h>
> +
> +extern __typeof (__redirect_memcpy) __libc_memcpy;
> +
> +extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden;
> +extern __typeof (__redirect_memcpy) __memcpy_thunderx attribute_hidden;
> +
> +libc_ifunc (__libc_memcpy,
> +            IS_THUNDERX (midr) ? __memcpy_thunderx : __memcpy_generic);
> +
> +#undef memcpy
> +strong_alias (__libc_memcpy, memcpy);
> +#endif
> diff --git a/sysdeps/aarch64/multiarch/memcpy_generic.S b/sysdeps/aarch64/multiarch/memcpy_generic.S
> index e69de29..50e1a1c 100644
> --- a/sysdeps/aarch64/multiarch/memcpy_generic.S
> +++ b/sysdeps/aarch64/multiarch/memcpy_generic.S
> @@ -0,0 +1,42 @@
> +/* A Generic Optimized memcpy implementation for AARCH64.
> +   Copyright (C) 2017 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +/* The actual memcpy and memmove code is in ../memcpy.S.  If we are
> +   building libc this file defines __memcpy_generic and __memmove_generic.
> +   Otherwise the include of ../memcpy.S will define the normal __memcpy
> +   and__memmove entry points.  */
> +
> +#include <sysdep.h>
> +
> +#if IS_IN (libc)
> +
> +#define MEMCPY __memcpy_generic
> +#define MEMMOVE __memmove_generic
> +
> +/* Do not hide the generic versions of memcpy and memmove, we use them
> +   internally.  */
> +#undef libc_hidden_builtin_def
> +#define libc_hidden_builtin_def(name)
> +
> +/* It doesn't make sense to send libc-internal memcpy calls through a PLT. */
> +	.globl __GI_memcpy; __GI_memcpy = __memcpy_generic
> +	.globl __GI_memmove; __GI_memmove = __memmove_generic
> +
> +#endif
> +
> +#include "../memcpy.S"
> diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx.S b/sysdeps/aarch64/multiarch/memcpy_thunderx.S
> index e69de29..ee971c8 100644
> --- a/sysdeps/aarch64/multiarch/memcpy_thunderx.S
> +++ b/sysdeps/aarch64/multiarch/memcpy_thunderx.S
> @@ -0,0 +1,32 @@
> +/* A Thunderx Optimized memcpy implementation for AARCH64.
> +   Copyright (C) 2017 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +/* The actual thunderx optimized code is in ../memcpy.S under the USE_THUNDERX
> +   ifdef.  If we are not building libc then we do not build anything when
> +   compiling this file and __memcpy is defined by memcpy_generic.S.  */
> +
> +#include <sysdep.h>
> +
> +#if IS_IN (libc)
> +
> +#define MEMCPY __memcpy_thunderx
> +#define MEMMOVE __memmove_thunderx
> +#define USE_THUNDERX
> +#include "../memcpy.S"
> +
> +#endif
> diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c
> index e69de29..8d7a146 100644
> --- a/sysdeps/aarch64/multiarch/memmove.c
> +++ b/sysdeps/aarch64/multiarch/memmove.c
> @@ -0,0 +1,39 @@
> +/* Multiple versions of memmove. AARCH64 version.
> +   Copyright (C) 2017 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +/* Define multiple versions only for the definition in libc.  */
> +
> +#if IS_IN (libc)
> +/* Redefine memmove so that the compiler won't complain about the type
> +   mismatch with the IFUNC selector in strong_alias, below.  */
> +# undef memmove
> +# define memmove __redirect_memmove
> +# include <string.h>
> +# include <init-arch.h>
> +
> +extern __typeof (__redirect_memmove) __libc_memmove;
> +
> +extern __typeof (__redirect_memmove) __memmove_generic attribute_hidden;
> +extern __typeof (__redirect_memmove) __memmove_thunderx attribute_hidden;
> +
> +libc_ifunc (__libc_memmove,
> +            IS_THUNDERX (midr) ? __memmove_thunderx : __memmove_generic);
> +
> +#undef memmove
> +strong_alias (__libc_memmove, memmove);
> +#endif
> diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
> index e69de29..8e4b514 100644
> --- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
> +++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
> @@ -0,0 +1,38 @@
> +/* Initialize CPU feature data.  AArch64 version.
> +   This file is part of the GNU C Library.
> +   Copyright (C) 2017 Free Software Foundation, Inc.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#include <cpu-features.h>
> +
> +#ifndef HWCAP_CPUID
> +# define HWCAP_CPUID		(1 << 11)
> +#endif
> +
> +static inline void
> +init_cpu_features (struct cpu_features *cpu_features)
> +{
> +  if (GLRO(dl_hwcap) & HWCAP_CPUID)
> +    {
> +      register uint64_t id = 0;
> +      asm volatile ("mrs %0, midr_el1" : "=r"(id));
> +      cpu_features->midr_el1 = id;
> +    }
> +  else
> +    {
> +      cpu_features->midr_el1 = 0;
> +    }
> +}
> diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
> index e69de29..c92b650 100644
> --- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
> +++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
> @@ -0,0 +1,49 @@
> +/* Initialize CPU feature data.  AArch64 version.
> +   This file is part of the GNU C Library.
> +   Copyright (C) 2017 Free Software Foundation, Inc.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#ifndef _CPU_FEATURES_AARCH64_H
> +#define _CPU_FEATURES_AARCH64_H
> +
> +#include <stdint.h>
> +
> +#define MIDR_PARTNUM_SHIFT	4
> +#define MIDR_PARTNUM_MASK	(0xfff << MIDR_PARTNUM_SHIFT)
> +#define MIDR_PARTNUM(midr)	\
> +	(((midr) & MIDR_PARTNUM_MASK) >> MIDR_PARTNUM_SHIFT)
> +#define MIDR_ARCHITECTURE_SHIFT	16
> +#define MIDR_ARCHITECTURE_MASK	(0xf << MIDR_ARCHITECTURE_SHIFT)
> +#define MIDR_ARCHITECTURE(midr)	\
> +	(((midr) & MIDR_ARCHITECTURE_MASK) >> MIDR_ARCHITECTURE_SHIFT)
> +#define MIDR_VARIANT_SHIFT	20
> +#define MIDR_VARIANT_MASK	(0xf << MIDR_VARIANT_SHIFT)
> +#define MIDR_VARIANT(midr)	\
> +	(((midr) & MIDR_VARIANT_MASK) >> MIDR_VARIANT_SHIFT)
> +#define MIDR_IMPLEMENTOR_SHIFT	24
> +#define MIDR_IMPLEMENTOR_MASK	(0xff << MIDR_IMPLEMENTOR_SHIFT)
> +#define MIDR_IMPLEMENTOR(midr)	\
> +	(((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
> +
> +#define IS_THUNDERX(midr) (MIDR_IMPLEMENTOR(midr) == 'C'	\
> +			   && MIDR_PARTNUM(midr) == 0x0a1)
> +
> +struct cpu_features
> +{
> +  uint64_t midr_el1;
> +};
> +
> +#endif /* _CPU_FEATURES_AARCH64_H  */
> diff --git a/sysdeps/unix/sysv/linux/aarch64/dl-procinfo.c b/sysdeps/unix/sysv/linux/aarch64/dl-procinfo.c
> index e69de29..438046a 100644
> --- a/sysdeps/unix/sysv/linux/aarch64/dl-procinfo.c
> +++ b/sysdeps/unix/sysv/linux/aarch64/dl-procinfo.c
> @@ -0,0 +1,60 @@
> +/* Data for AArch64 version of processor capability information.
> +   Linux version.
> +   Copyright (C) 2017 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +/* If anything should be added here check whether the size of each string
> +   is still ok with the given array size.
> +
> +   All the #ifdefs in the definitions are quite irritating but
> +   necessary if we want to avoid duplicating the information.  There
> +   are three different modes:
> +
> +   - PROCINFO_DECL is defined.  This means we are only interested in
> +     declarations.
> +
> +   - PROCINFO_DECL is not defined:
> +
> +     + if SHARED is defined the file is included in an array
> +       initializer.  The .element = { ... } syntax is needed.
> +
> +     + if SHARED is not defined a normal array initialization is
> +       needed.
> +  */
> +
> +#ifndef PROCINFO_CLASS
> +# define PROCINFO_CLASS
> +#endif
> +
> +#if !IS_IN (ldconfig)
> +# if !defined PROCINFO_DECL && defined SHARED
> +  ._dl_aarch64_cpu_features
> +# else
> +PROCINFO_CLASS struct cpu_features _dl_aarch64_cpu_features
> +# endif
> +# ifndef PROCINFO_DECL
> += { }
> +# endif
> +# if !defined SHARED || defined PROCINFO_DECL
> +;
> +# else
> +,
> +# endif
> +#endif
> +
> +#undef PROCINFO_DECL
> +#undef PROCINFO_CLASS
> diff --git a/sysdeps/unix/sysv/linux/aarch64/libc-start.c b/sysdeps/unix/sysv/linux/aarch64/libc-start.c
> index e69de29..c98aff1 100644
> --- a/sysdeps/unix/sysv/linux/aarch64/libc-start.c
> +++ b/sysdeps/unix/sysv/linux/aarch64/libc-start.c
> @@ -0,0 +1,40 @@
> +/* Copyright (C) 2017 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#ifdef SHARED
> +# include <csu/libc-start.c>
> +# else
> +/* The main work is done in the generic function.  */
> +# define LIBC_START_DISABLE_INLINE
> +# define LIBC_START_MAIN generic_start_main
> +# include <csu/libc-start.c>
> +# include <cpu-features.c>
> +
> +extern struct cpu_features _dl_aarch64_cpu_features;
> +
> +int
> +__libc_start_main (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
> +		   int argc, char **argv,
> +		   __typeof (main) init,
> +		   void (*fini) (void),
> +		   void (*rtld_fini) (void), void *stack_end)
> +{
> +  init_cpu_features (&_dl_aarch64_cpu_features);
> +  return generic_start_main (main, argc, argv, init, fini, rtld_fini,
> +			     stack_end);
> +}
> +#endif
>
  

Patch

diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h
index 84b8aec..15d79a6 100644
--- a/sysdeps/aarch64/dl-machine.h
+++ b/sysdeps/aarch64/dl-machine.h
@@ -25,6 +25,7 @@ 
 #include <tls.h>
 #include <dl-tlsdesc.h>
 #include <dl-irel.h>
+#include <cpu-features.c>
 
 /* Return nonzero iff ELF header is compatible with the running host.  */
 static inline int __attribute__ ((unused))
@@ -225,6 +226,23 @@  _dl_start_user:								\n\
 #define ELF_MACHINE_NO_REL 1
 #define ELF_MACHINE_NO_RELA 0
 
+#define DL_PLATFORM_INIT dl_platform_init ()
+
+static inline void __attribute__ ((unused))
+dl_platform_init (void)
+{
+  if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
+    /* Avoid an empty string which would disturb us.  */
+    GLRO(dl_platform) = NULL;
+
+#ifdef SHARED
+  /* init_cpu_features has been called early from __libc_start_main in
+     static executable.  */
+  init_cpu_features (&GLRO(dl_aarch64_cpu_features));
+#endif
+}
+
+
 static inline ElfW(Addr)
 elf_machine_fixup_plt (struct link_map *map, lookup_t t,
 		       const ElfW(Rela) *reloc,
diff --git a/sysdeps/aarch64/ldsodefs.h b/sysdeps/aarch64/ldsodefs.h
index f277074..ba4ada3 100644
--- a/sysdeps/aarch64/ldsodefs.h
+++ b/sysdeps/aarch64/ldsodefs.h
@@ -20,6 +20,7 @@ 
 #define _AARCH64_LDSODEFS_H 1
 
 #include <elf.h>
+#include <cpu-features.h>
 
 struct La_aarch64_regs;
 struct La_aarch64_retval;
diff --git a/sysdeps/aarch64/memcpy.S b/sysdeps/aarch64/memcpy.S
index 29af8b1..74444b4 100644
--- a/sysdeps/aarch64/memcpy.S
+++ b/sysdeps/aarch64/memcpy.S
@@ -59,7 +59,14 @@ 
    Overlapping large forward memmoves use a loop that copies backwards.
 */
 
-ENTRY_ALIGN (memmove, 6)
+#ifndef MEMMOVE
+#  define MEMMOVE memmove
+#endif
+#ifndef MEMCPY
+#  define MEMCPY memcpy
+#endif
+
+ENTRY_ALIGN (MEMMOVE, 6)
 
 	DELOUSE (0)
 	DELOUSE (1)
@@ -71,9 +78,9 @@  ENTRY_ALIGN (memmove, 6)
 	b.lo	L(move_long)
 
 	/* Common case falls through into memcpy.  */
-END (memmove)
-libc_hidden_builtin_def (memmove)
-ENTRY (memcpy)
+END (MEMMOVE)
+libc_hidden_builtin_def (MEMMOVE)
+ENTRY (MEMCPY)
 
 	DELOUSE (0)
 	DELOUSE (1)
@@ -158,10 +165,22 @@  L(copy96):
 
 	.p2align 4
 L(copy_long):
+
+#ifdef USE_THUNDERX
+
+	/* On thunderx, large memcpy's are helped by software prefetching.
+	   This loop is identical to the one below it but with prefetching
+	   instructions included.  For loops that are less than 32768 bytes,
+	   the prefetching does not help and slow the code down so we only
+	   use the prefetching loop for the largest memcpys.  */
+
+	cmp	count, #32768
+	b.lo	L(copy_long_without_prefetch)
 	and	tmp1, dstin, 15
 	bic	dst, dstin, 15
 	ldp	D_l, D_h, [src]
 	sub	src, src, tmp1
+	prfm	pldl1strm, [src, 384]
 	add	count, count, tmp1	/* Count is now 16 too large.  */
 	ldp	A_l, A_h, [src, 16]
 	stp	D_l, D_h, [dstin]
@@ -169,7 +188,10 @@  L(copy_long):
 	ldp	C_l, C_h, [src, 48]
 	ldp	D_l, D_h, [src, 64]!
 	subs	count, count, 128 + 16	/* Test and readjust count.  */
-	b.ls	2f
+
+L(prefetch_loop64):
+	tbz	src, #6, 1f
+	prfm	pldl1strm, [src, 512]
 1:
 	stp	A_l, A_h, [dst, 16]
 	ldp	A_l, A_h, [src, 16]
@@ -180,12 +202,40 @@  L(copy_long):
 	stp	D_l, D_h, [dst, 64]!
 	ldp	D_l, D_h, [src, 64]!
 	subs	count, count, 64
-	b.hi	1b
+	b.hi	L(prefetch_loop64)
+	b	L(last64)
+
+L(copy_long_without_prefetch):
+#endif
+
+	and	tmp1, dstin, 15
+	bic	dst, dstin, 15
+	ldp	D_l, D_h, [src]
+	sub	src, src, tmp1
+	add	count, count, tmp1	/* Count is now 16 too large.  */
+	ldp	A_l, A_h, [src, 16]
+	stp	D_l, D_h, [dstin]
+	ldp	B_l, B_h, [src, 32]
+	ldp	C_l, C_h, [src, 48]
+	ldp	D_l, D_h, [src, 64]!
+	subs	count, count, 128 + 16	/* Test and readjust count.  */
+	b.ls	L(last64)
+L(loop64):
+	stp	A_l, A_h, [dst, 16]
+	ldp	A_l, A_h, [src, 16]
+	stp	B_l, B_h, [dst, 32]
+	ldp	B_l, B_h, [src, 32]
+	stp	C_l, C_h, [dst, 48]
+	ldp	C_l, C_h, [src, 48]
+	stp	D_l, D_h, [dst, 64]!
+	ldp	D_l, D_h, [src, 64]!
+	subs	count, count, 64
+	b.hi	L(loop64)
 
 	/* Write the last full set of 64 bytes.  The remainder is at most 64
 	   bytes, so it is safe to always copy 64 bytes from the end even if
 	   there is just 1 byte left.  */
-2:
+L(last64):
 	ldp	E_l, E_h, [srcend, -64]
 	stp	A_l, A_h, [dst, 16]
 	ldp	A_l, A_h, [srcend, -48]
@@ -256,5 +306,5 @@  L(move_long):
 	stp	C_l, C_h, [dstin]
 3:	ret
 
-END (memcpy)
-libc_hidden_builtin_def (memcpy)
+END (MEMCPY)
+libc_hidden_builtin_def (MEMCPY)
diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile
index e69de29..78d52c7 100644
--- a/sysdeps/aarch64/multiarch/Makefile
+++ b/sysdeps/aarch64/multiarch/Makefile
@@ -0,0 +1,3 @@ 
+ifeq ($(subdir),string)
+sysdep_routines += memcpy_generic memcpy_thunderx
+endif
diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
index e69de29..c4f23df 100644
--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
@@ -0,0 +1,51 @@ 
+/* Enumerate available IFUNC implementations of a function.  AARCH64 version.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <assert.h>
+#include <string.h>
+#include <wchar.h>
+#include <ldsodefs.h>
+#include <ifunc-impl-list.h>
+#include <init-arch.h>
+#include <stdio.h>
+
+/* Maximum number of IFUNC implementations.  */
+#define MAX_IFUNC	2
+
+size_t
+__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+			size_t max)
+{
+  assert (max >= MAX_IFUNC);
+
+  size_t i = 0;
+
+  INIT_ARCH ();
+
+  /* Support sysdeps/aarch64/multiarch/memcpy.c and memmove.c.  */
+  IFUNC_IMPL (i, name, memcpy,
+	      IFUNC_IMPL_ADD (array, i, memcpy, IS_THUNDERX (midr),
+			      __memcpy_thunderx)
+	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
+  IFUNC_IMPL (i, name, memmove,
+	      IFUNC_IMPL_ADD (array, i, memmove, IS_THUNDERX (midr),
+			      __memmove_thunderx)
+	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic))
+
+  return i;
+}
diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h
index e69de29..eafbf77 100644
--- a/sysdeps/aarch64/multiarch/init-arch.h
+++ b/sysdeps/aarch64/multiarch/init-arch.h
@@ -0,0 +1,22 @@ 
+/* This file is part of the GNU C Library.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <ldsodefs.h>
+
+#define INIT_ARCH()						\
+  uint64_t __attribute__((unused)) midr = 			\
+    GLRO(dl_aarch64_cpu_features).midr_el1;
diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c
index e69de29..4e3f251 100644
--- a/sysdeps/aarch64/multiarch/memcpy.c
+++ b/sysdeps/aarch64/multiarch/memcpy.c
@@ -0,0 +1,39 @@ 
+/* Multiple versions of memcpy. AARCH64 version.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+
+#if IS_IN (libc)
+/* Redefine memcpy so that the compiler won't complain about the type
+   mismatch with the IFUNC selector in strong_alias, below.  */
+# undef memcpy
+# define memcpy __redirect_memcpy
+# include <string.h>
+# include <init-arch.h>
+
+extern __typeof (__redirect_memcpy) __libc_memcpy;
+
+extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden;
+extern __typeof (__redirect_memcpy) __memcpy_thunderx attribute_hidden;
+
+libc_ifunc (__libc_memcpy,
+            IS_THUNDERX (midr) ? __memcpy_thunderx : __memcpy_generic);
+
+#undef memcpy
+strong_alias (__libc_memcpy, memcpy);
+#endif
diff --git a/sysdeps/aarch64/multiarch/memcpy_generic.S b/sysdeps/aarch64/multiarch/memcpy_generic.S
index e69de29..50e1a1c 100644
--- a/sysdeps/aarch64/multiarch/memcpy_generic.S
+++ b/sysdeps/aarch64/multiarch/memcpy_generic.S
@@ -0,0 +1,42 @@ 
+/* A Generic Optimized memcpy implementation for AARCH64.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* The actual memcpy and memmove code is in ../memcpy.S.  If we are
+   building libc this file defines __memcpy_generic and __memmove_generic.
+   Otherwise the include of ../memcpy.S will define the normal __memcpy
+   and__memmove entry points.  */
+
+#include <sysdep.h>
+
+#if IS_IN (libc)
+
+#define MEMCPY __memcpy_generic
+#define MEMMOVE __memmove_generic
+
+/* Do not hide the generic versions of memcpy and memmove, we use them
+   internally.  */
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+
+/* It doesn't make sense to send libc-internal memcpy calls through a PLT. */
+	.globl __GI_memcpy; __GI_memcpy = __memcpy_generic
+	.globl __GI_memmove; __GI_memmove = __memmove_generic
+
+#endif
+
+#include "../memcpy.S"
diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx.S b/sysdeps/aarch64/multiarch/memcpy_thunderx.S
index e69de29..ee971c8 100644
--- a/sysdeps/aarch64/multiarch/memcpy_thunderx.S
+++ b/sysdeps/aarch64/multiarch/memcpy_thunderx.S
@@ -0,0 +1,32 @@ 
+/* A Thunderx Optimized memcpy implementation for AARCH64.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* The actual thunderx optimized code is in ../memcpy.S under the USE_THUNDERX
+   ifdef.  If we are not building libc then we do not build anything when
+   compiling this file and __memcpy is defined by memcpy_generic.S.  */
+
+#include <sysdep.h>
+
+#if IS_IN (libc)
+
+#define MEMCPY __memcpy_thunderx
+#define MEMMOVE __memmove_thunderx
+#define USE_THUNDERX
+#include "../memcpy.S"
+
+#endif
diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c
index e69de29..8d7a146 100644
--- a/sysdeps/aarch64/multiarch/memmove.c
+++ b/sysdeps/aarch64/multiarch/memmove.c
@@ -0,0 +1,39 @@ 
+/* Multiple versions of memmove. AARCH64 version.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+
+#if IS_IN (libc)
+/* Redefine memmove so that the compiler won't complain about the type
+   mismatch with the IFUNC selector in strong_alias, below.  */
+# undef memmove
+# define memmove __redirect_memmove
+# include <string.h>
+# include <init-arch.h>
+
+extern __typeof (__redirect_memmove) __libc_memmove;
+
+extern __typeof (__redirect_memmove) __memmove_generic attribute_hidden;
+extern __typeof (__redirect_memmove) __memmove_thunderx attribute_hidden;
+
+libc_ifunc (__libc_memmove,
+            IS_THUNDERX (midr) ? __memmove_thunderx : __memmove_generic);
+
+#undef memmove
+strong_alias (__libc_memmove, memmove);
+#endif
diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
index e69de29..8e4b514 100644
--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
+++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c
@@ -0,0 +1,38 @@ 
+/* Initialize CPU feature data.  AArch64 version.
+   This file is part of the GNU C Library.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <cpu-features.h>
+
+#ifndef HWCAP_CPUID
+# define HWCAP_CPUID		(1 << 11)
+#endif
+
+static inline void
+init_cpu_features (struct cpu_features *cpu_features)
+{
+  if (GLRO(dl_hwcap) & HWCAP_CPUID)
+    {
+      register uint64_t id = 0;
+      asm volatile ("mrs %0, midr_el1" : "=r"(id));
+      cpu_features->midr_el1 = id;
+    }
+  else
+    {
+      cpu_features->midr_el1 = 0;
+    }
+}
diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
index e69de29..c92b650 100644
--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
+++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
@@ -0,0 +1,49 @@ 
+/* Initialize CPU feature data.  AArch64 version.
+   This file is part of the GNU C Library.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _CPU_FEATURES_AARCH64_H
+#define _CPU_FEATURES_AARCH64_H
+
+#include <stdint.h>
+
+#define MIDR_PARTNUM_SHIFT	4
+#define MIDR_PARTNUM_MASK	(0xfff << MIDR_PARTNUM_SHIFT)
+#define MIDR_PARTNUM(midr)	\
+	(((midr) & MIDR_PARTNUM_MASK) >> MIDR_PARTNUM_SHIFT)
+#define MIDR_ARCHITECTURE_SHIFT	16
+#define MIDR_ARCHITECTURE_MASK	(0xf << MIDR_ARCHITECTURE_SHIFT)
+#define MIDR_ARCHITECTURE(midr)	\
+	(((midr) & MIDR_ARCHITECTURE_MASK) >> MIDR_ARCHITECTURE_SHIFT)
+#define MIDR_VARIANT_SHIFT	20
+#define MIDR_VARIANT_MASK	(0xf << MIDR_VARIANT_SHIFT)
+#define MIDR_VARIANT(midr)	\
+	(((midr) & MIDR_VARIANT_MASK) >> MIDR_VARIANT_SHIFT)
+#define MIDR_IMPLEMENTOR_SHIFT	24
+#define MIDR_IMPLEMENTOR_MASK	(0xff << MIDR_IMPLEMENTOR_SHIFT)
+#define MIDR_IMPLEMENTOR(midr)	\
+	(((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
+
+#define IS_THUNDERX(midr) (MIDR_IMPLEMENTOR(midr) == 'C'	\
+			   && MIDR_PARTNUM(midr) == 0x0a1)
+
+struct cpu_features
+{
+  uint64_t midr_el1;
+};
+
+#endif /* _CPU_FEATURES_AARCH64_H  */
diff --git a/sysdeps/unix/sysv/linux/aarch64/dl-procinfo.c b/sysdeps/unix/sysv/linux/aarch64/dl-procinfo.c
index e69de29..438046a 100644
--- a/sysdeps/unix/sysv/linux/aarch64/dl-procinfo.c
+++ b/sysdeps/unix/sysv/linux/aarch64/dl-procinfo.c
@@ -0,0 +1,60 @@ 
+/* Data for AArch64 version of processor capability information.
+   Linux version.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* If anything should be added here check whether the size of each string
+   is still ok with the given array size.
+
+   All the #ifdefs in the definitions are quite irritating but
+   necessary if we want to avoid duplicating the information.  There
+   are three different modes:
+
+   - PROCINFO_DECL is defined.  This means we are only interested in
+     declarations.
+
+   - PROCINFO_DECL is not defined:
+
+     + if SHARED is defined the file is included in an array
+       initializer.  The .element = { ... } syntax is needed.
+
+     + if SHARED is not defined a normal array initialization is
+       needed.
+  */
+
+#ifndef PROCINFO_CLASS
+# define PROCINFO_CLASS
+#endif
+
+#if !IS_IN (ldconfig)
+# if !defined PROCINFO_DECL && defined SHARED
+  ._dl_aarch64_cpu_features
+# else
+PROCINFO_CLASS struct cpu_features _dl_aarch64_cpu_features
+# endif
+# ifndef PROCINFO_DECL
+= { }
+# endif
+# if !defined SHARED || defined PROCINFO_DECL
+;
+# else
+,
+# endif
+#endif
+
+#undef PROCINFO_DECL
+#undef PROCINFO_CLASS
diff --git a/sysdeps/unix/sysv/linux/aarch64/libc-start.c b/sysdeps/unix/sysv/linux/aarch64/libc-start.c
index e69de29..c98aff1 100644
--- a/sysdeps/unix/sysv/linux/aarch64/libc-start.c
+++ b/sysdeps/unix/sysv/linux/aarch64/libc-start.c
@@ -0,0 +1,40 @@ 
+/* Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef SHARED
+# include <csu/libc-start.c>
+# else
+/* The main work is done in the generic function.  */
+# define LIBC_START_DISABLE_INLINE
+# define LIBC_START_MAIN generic_start_main
+# include <csu/libc-start.c>
+# include <cpu-features.c>
+
+extern struct cpu_features _dl_aarch64_cpu_features;
+
+int
+__libc_start_main (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
+		   int argc, char **argv,
+		   __typeof (main) init,
+		   void (*fini) (void),
+		   void (*rtld_fini) (void), void *stack_end)
+{
+  init_cpu_features (&_dl_aarch64_cpu_features);
+  return generic_start_main (main, argc, argv, init, fini, rtld_fini,
+			     stack_end);
+}
+#endif