[2/2] powerpc: Move cache line size to rtld_global_ro

Message ID 20200110222743.79551-2-tuliom@linux.ibm.com
State Committed
Delegated to: Carlos O'Donell
Headers

Commit Message

Tulio Magno Quites Machado Filho Jan. 10, 2020, 10:27 p.m. UTC
  Changes since v1:
 - Updated copyright dates
 - Added tests
 - Fixed coding style issues
 - Added macros __GLRO_DEF and __GLRO in the 64-bit case.
 - Removed sysdeps/unix/sysv/linux/powerpc/dl-support.c in favor of
   sysdeps/generic/dl-auxv.h which is included by elf/dl-support.c and
   elf/dl-sysdep.c
 - Removed sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c

----8<----

GCC 10.0 enabled -fno-common by default and this started to point that
__cache_line_size had been implemented in 2 different places: loader and
libc.

In order to avoid this duplication, the libc variable has been removed
and the loader variable is moved to rtld_global_ro.

File sysdeps/unix/sysv/linux/powerpc/dl-auxv.h has been added in order
to reuse code for both static and dynamic linking scenarios.
---
 elf/dl-support.c                              |  3 +-
 elf/dl-sysdep.c                               |  3 +-
 sysdeps/generic/dl-auxv.h                     | 21 ++++++++
 sysdeps/powerpc/Makefile                      | 17 ++++++
 sysdeps/powerpc/dl-procinfo.c                 | 17 ++++++
 sysdeps/powerpc/mod-cache-ppc.c               | 45 ++++++++++++++++
 sysdeps/powerpc/powerpc32/a2/memcpy.S         | 23 ++++----
 sysdeps/powerpc/powerpc32/dl-machine.c        | 11 ++--
 sysdeps/powerpc/powerpc32/memset.S            | 29 +++++-----
 sysdeps/powerpc/powerpc32/sysdep.h            | 26 +++++++++
 sysdeps/powerpc/powerpc64/a2/memcpy.S         | 13 +++--
 sysdeps/powerpc/powerpc64/memset.S            | 11 ++--
 sysdeps/powerpc/powerpc64/sysdep.h            | 24 +++++++++
 sysdeps/powerpc/rtld-global-offsets.sym       |  1 +
 sysdeps/powerpc/tst-cache-ppc-static-dlopen.c | 54 +++++++++++++++++++
 sysdeps/powerpc/tst-cache-ppc-static.c        | 20 +++++++
 sysdeps/powerpc/tst-cache-ppc.c               | 29 ++++++++++
 .../linux/powerpc/{dl-sysdep.c => dl-auxv.h}  | 19 +++----
 sysdeps/unix/sysv/linux/powerpc/dl-static.c   |  3 ++
 sysdeps/unix/sysv/linux/powerpc/libc-start.c  | 10 ++--
 20 files changed, 312 insertions(+), 67 deletions(-)
 create mode 100644 sysdeps/generic/dl-auxv.h
 create mode 100644 sysdeps/powerpc/mod-cache-ppc.c
 create mode 100644 sysdeps/powerpc/tst-cache-ppc-static-dlopen.c
 create mode 100644 sysdeps/powerpc/tst-cache-ppc-static.c
 create mode 100644 sysdeps/powerpc/tst-cache-ppc.c
 rename sysdeps/unix/sysv/linux/powerpc/{dl-sysdep.c => dl-auxv.h} (60%)
  

Comments

Carlos O'Donell Jan. 16, 2020, 4:37 p.m. UTC | #1
On 1/10/20 5:27 PM, Tulio Magno Quites Machado Filho wrote:
> Changes since v1:
>  - Updated copyright dates
>  - Added tests
>  - Fixed coding style issues
>  - Added macros __GLRO_DEF and __GLRO in the 64-bit case.
>  - Removed sysdeps/unix/sysv/linux/powerpc/dl-support.c in favor of
>    sysdeps/generic/dl-auxv.h which is included by elf/dl-support.c and
>    elf/dl-sysdep.c
>  - Removed sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c
> 

OK for master. This is probably the smallest change you can make to fix
all of this up.

This needs approval again by Siddhesh.

Reviewed-by: Carlos O'Donell <carlos@redhat.com>

> ----8<----
> 
> GCC 10.0 enabled -fno-common by default and this started to point that
> __cache_line_size had been implemented in 2 different places: loader and
> libc.
> 
> In order to avoid this duplication, the libc variable has been removed
> and the loader variable is moved to rtld_global_ro.
> 
> File sysdeps/unix/sysv/linux/powerpc/dl-auxv.h has been added in order
> to reuse code for both static and dynamic linking scenarios.
> ---
>  elf/dl-support.c                              |  3 +-
>  elf/dl-sysdep.c                               |  3 +-
>  sysdeps/generic/dl-auxv.h                     | 21 ++++++++
>  sysdeps/powerpc/Makefile                      | 17 ++++++
>  sysdeps/powerpc/dl-procinfo.c                 | 17 ++++++
>  sysdeps/powerpc/mod-cache-ppc.c               | 45 ++++++++++++++++
>  sysdeps/powerpc/powerpc32/a2/memcpy.S         | 23 ++++----
>  sysdeps/powerpc/powerpc32/dl-machine.c        | 11 ++--
>  sysdeps/powerpc/powerpc32/memset.S            | 29 +++++-----
>  sysdeps/powerpc/powerpc32/sysdep.h            | 26 +++++++++
>  sysdeps/powerpc/powerpc64/a2/memcpy.S         | 13 +++--
>  sysdeps/powerpc/powerpc64/memset.S            | 11 ++--
>  sysdeps/powerpc/powerpc64/sysdep.h            | 24 +++++++++
>  sysdeps/powerpc/rtld-global-offsets.sym       |  1 +
>  sysdeps/powerpc/tst-cache-ppc-static-dlopen.c | 54 +++++++++++++++++++
>  sysdeps/powerpc/tst-cache-ppc-static.c        | 20 +++++++
>  sysdeps/powerpc/tst-cache-ppc.c               | 29 ++++++++++
>  .../linux/powerpc/{dl-sysdep.c => dl-auxv.h}  | 19 +++----
>  sysdeps/unix/sysv/linux/powerpc/dl-static.c   |  3 ++
>  sysdeps/unix/sysv/linux/powerpc/libc-start.c  | 10 ++--
>  20 files changed, 312 insertions(+), 67 deletions(-)
>  create mode 100644 sysdeps/generic/dl-auxv.h
>  create mode 100644 sysdeps/powerpc/mod-cache-ppc.c
>  create mode 100644 sysdeps/powerpc/tst-cache-ppc-static-dlopen.c
>  create mode 100644 sysdeps/powerpc/tst-cache-ppc-static.c
>  create mode 100644 sysdeps/powerpc/tst-cache-ppc.c
>  rename sysdeps/unix/sysv/linux/powerpc/{dl-sysdep.c => dl-auxv.h} (60%)
> 
> diff --git a/elf/dl-support.c b/elf/dl-support.c
> index ad791ab6ab..7704c101c5 100644
> --- a/elf/dl-support.c
> +++ b/elf/dl-support.c
> @@ -36,6 +36,7 @@
>  #include <stackinfo.h>
>  #include <dl-vdso.h>
>  #include <dl-vdso-setup.h>
> +#include <dl-auxv.h>

OK.

>  
>  extern char *__progname;
>  char **_dl_argv = &__progname;	/* This is checked for some error messages.  */
> @@ -293,9 +294,7 @@ _dl_aux_init (ElfW(auxv_t) *av)
>        case AT_RANDOM:
>  	_dl_random = (void *) av->a_un.a_val;
>  	break;
> -# ifdef DL_PLATFORM_AUXV
>        DL_PLATFORM_AUXV
> -# endif

OK. Because DL_PLATFORM_AUXV is always defined now.

>        }
>    if (seen == 0xf)
>      {
> diff --git a/elf/dl-sysdep.c b/elf/dl-sysdep.c
> index 53bbee14f4..854570821c 100644
> --- a/elf/dl-sysdep.c
> +++ b/elf/dl-sysdep.c
> @@ -45,6 +45,7 @@
>  #include <tls.h>
>  
>  #include <dl-tunables.h>
> +#include <dl-auxv.h>

OK.

>  
>  extern char **_environ attribute_hidden;
>  extern char _end[] attribute_hidden;
> @@ -180,9 +181,7 @@ _dl_sysdep_start (void **start_argptr,
>        case AT_RANDOM:
>  	_dl_random = (void *) av->a_un.a_val;
>  	break;
> -#ifdef DL_PLATFORM_AUXV
>        DL_PLATFORM_AUXV
> -#endif

OK. Because DL_PLATFORM_AUXV is always defined now.

>        }
>  
>  #ifndef HAVE_AUX_SECURE
> diff --git a/sysdeps/generic/dl-auxv.h b/sysdeps/generic/dl-auxv.h
> new file mode 100644
> index 0000000000..bf3c01182e
> --- /dev/null
> +++ b/sysdeps/generic/dl-auxv.h
> @@ -0,0 +1,21 @@
> +/* Auxiliary vector processing.  Generic version.
> +   Copyright (C) 2020 Free Software Foundation, Inc.

OK.

> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +/* Define DL_PLATFORM_AUXV in order to process platform-specific AUXV entries
> +   during the initialization of the loader or of a static libc.  */
> +#define DL_PLATFORM_AUXV

OK. Default version does nothing.

> diff --git a/sysdeps/powerpc/Makefile b/sysdeps/powerpc/Makefile
> index df45d348d2..d1c71a0ca4 100644
> --- a/sysdeps/powerpc/Makefile
> +++ b/sysdeps/powerpc/Makefile
> @@ -14,6 +14,23 @@ mod-tlsopt-powerpc.so-no-z-defs = yes
>  tests += tst-tlsopt-powerpc
>  $(objpfx)tst-tlsopt-powerpc: $(objpfx)mod-tlsopt-powerpc.so
>  
> +tests-static += tst-cache-ppc-static
> +tests-internal += tst-cache-ppc-static

OK. Add one test.

> +
> +ifeq (yes,$(build-shared))
> +modules-names += mod-cache-ppc
> +tests += tst-cache-ppc tst-cache-ppc-static-dlopen
> +tests-static += tst-cache-ppc-static-dlopen
> +test-internal-extras += mod-cache-ppc
> +
> +mod-cache-ppc.so-no-z-defs = yes
> +tst-cache-ppc-static-dlopen-ENV = LD_LIBRARY_PATH=$(objpfx):$(common-objpfx):$(common-objpfx)elf
> +$(objpfx)tst-cache-ppc-static-dlopen: $(common-objpfx)dlfcn/libdl.a
> +$(objpfx)tst-cache-ppc-static-dlopen.out: $(objpfx)mod-cache-ppc.so
> +
> +$(objpfx)tst-cache-ppc: $(objpfx)mod-cache-ppc.so
> +endif

OK. Add two tests and modules.

> +
>  ifneq (no,$(multi-arch))
>  tests-static += tst-tlsifunc-static
>  tests-internal += tst-tlsifunc-static
> diff --git a/sysdeps/powerpc/dl-procinfo.c b/sysdeps/powerpc/dl-procinfo.c
> index 2ae68c41f1..7a7d93dd0a 100644
> --- a/sysdeps/powerpc/dl-procinfo.c
> +++ b/sysdeps/powerpc/dl-procinfo.c
> @@ -89,5 +89,22 @@ PROCINFO_CLASS const char _dl_powerpc_cap_flags[64][15]
>  ,
>  #endif
>  
> +#if !IS_IN (ldconfig)
> +# if !defined PROCINFO_DECL && defined SHARED
> +     ._dl_cache_line_size

OK. Define cache line size.

> +# else
> +PROCINFO_CLASS int _dl_cache_line_size
> +# endif
> +# ifndef PROCINFO_DECL
> +     = 0
> +# endif
> +# if !defined SHARED || defined PROCINFO_DECL
> +;
> +# else
> +,
> +# endif
> +#endif
> +
> +
>  #undef PROCINFO_DECL
>  #undef PROCINFO_CLASS
> diff --git a/sysdeps/powerpc/mod-cache-ppc.c b/sysdeps/powerpc/mod-cache-ppc.c
> new file mode 100644
> index 0000000000..81fad52078
> --- /dev/null
> +++ b/sysdeps/powerpc/mod-cache-ppc.c
> @@ -0,0 +1,45 @@
> +/* Test if an executable can read from rtld_global_ro._dl_cache_line_size.
> +   Copyright (C) 2020 Free Software Foundation, Inc.

OK.

> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <inttypes.h>
> +#include <sys/auxv.h>
> +#include <ldsodefs.h>
> +#include <errno.h>
> +
> +/* errnop is required in order to work around BZ #20802.  */
> +int
> +test_cache (int *errnop)
> +{
> +  int cls1 = GLRO (dl_cache_line_size);

OK.

> +  errno = *errnop;
> +  uint64_t cls2 = getauxval (AT_DCACHEBSIZE);

OK.

> +  *errnop = errno;

OK. Copy back errno.

> +
> +  printf ("AT_DCACHEBSIZE      = %" PRIu64 " B\n", cls2);
> +  printf ("_dl_cache_line_size = %d B\n", cls1);
> +
> +  if (cls1 != cls2)
> +    {
> +      printf ("error: _dl_cache_line_size != AT_DCACHEBSIZE\n");
> +      return 1;
> +    }
> +
> +  return 0;
> +}
> diff --git a/sysdeps/powerpc/powerpc32/a2/memcpy.S b/sysdeps/powerpc/powerpc32/a2/memcpy.S
> index fe5dab847a..6f4d8a7b34 100644
> --- a/sysdeps/powerpc/powerpc32/a2/memcpy.S
> +++ b/sysdeps/powerpc/powerpc32/a2/memcpy.S
> @@ -18,6 +18,7 @@
>     <https://www.gnu.org/licenses/>.  */
>  
>  #include <sysdep.h>
> +#include <rtld-global-offsets.h>

OK.

>  
>  #define PREFETCH_AHEAD 4        /* no cache lines SRC prefetching ahead  */
>  #define ZERO_AHEAD 2            /* no cache lines DST zeroing ahead  */
> @@ -106,25 +107,23 @@ EALIGN (memcpy, 5, 0)
>  L(dst_aligned):
>  
>  
> -#ifdef SHARED
> +#ifdef PIC
>  	mflr    r0
> -/* Establishes GOT addressability so we can load __cache_line_size
> -   from static. This value was set from the aux vector during startup.  */
> +/* Establishes GOT addressability so we can load the cache line size
> +   from rtld_global_ro.  This value was set from the aux vector during
> +   startup.  */

OK.

>  	SETUP_GOT_ACCESS(r9,got_label)
> -	addis   r9,r9,__cache_line_size-got_label@ha
> -	lwz     r9,__cache_line_size-got_label@l(r9)
> -	mtlr    r0
> -#else
> -/* Load __cache_line_size from static. This value was set from the
> -   aux vector during startup.  */
> -	lis     r9,__cache_line_size@ha
> -	lwz     r9,__cache_line_size@l(r9)
> +	addis	r9,r9,_GLOBAL_OFFSET_TABLE_-got_label@ha
> +	addi	r9,r9,_GLOBAL_OFFSET_TABLE_-got_label@l
> +	mtlr	r0

OK.

>  #endif
> +	__GLRO(r9, r9, _dl_cache_line_size,
> +	       RTLD_GLOBAL_RO_DL_CACHE_LINE_SIZE_OFFSET)

OK.

>  
>  	cmplwi  cr5, r9, 0
>  	bne+    cr5,L(cachelineset)
>  
> -/* __cache_line_size not set: generic byte copy without much optimization */
> +/* Cache line size not set: generic byte copy without much optimization */

OK.

>  	andi.	r0,r5,1		/* If length is odd copy one byte.  */
>  	beq	L(cachelinenotset_align)
>  	lbz	r7,0(r4)	/* Read one byte from source.  */
> diff --git a/sysdeps/powerpc/powerpc32/dl-machine.c b/sysdeps/powerpc/powerpc32/dl-machine.c
> index d5ea4b97f4..6090e60d3c 100644
> --- a/sysdeps/powerpc/powerpc32/dl-machine.c
> +++ b/sysdeps/powerpc/powerpc32/dl-machine.c
> @@ -25,11 +25,6 @@
>  #include <dl-machine.h>
>  #include <_itoa.h>
>  
> -/* The value __cache_line_size is defined in dl-sysdep.c and is initialised
> -   by _dl_sysdep_start via DL_PLATFORM_INIT.  */
> -extern int __cache_line_size attribute_hidden;

OK. Remove the use of __cache_line_size.

> -
> -
>  /* Stuff for the PLT.  */
>  #define PLT_INITIAL_ENTRY_WORDS 18
>  #define PLT_LONGBRANCH_ENTRY_WORDS 0
> @@ -309,14 +304,14 @@ __elf_machine_runtime_setup (struct link_map *map, int lazy, int profile)
>  
>  	 Assumes that dcbst and icbi apply to lines of 16 bytes or
>  	 more.  Current known line sizes are 16, 32, and 128 bytes.
> -	 The following gets the __cache_line_size, when available.  */
> +	 The following gets the cache line size, when available.  */

OK.

>  
>        /* Default minimum 4 words per cache line.  */
>        int line_size_words = 4;
>  
> -      if (lazy && __cache_line_size != 0)
> +      if (lazy && GLRO(dl_cache_line_size) != 0)

OK.

>  	/* Convert bytes to words.  */
> -	line_size_words = __cache_line_size / 4;
> +	line_size_words = GLRO(dl_cache_line_size) / 4;

OK.

>  
>        size_modified = lazy ? rel_offset_words : 6;
>        for (i = 0; i < size_modified; i += line_size_words)
> diff --git a/sysdeps/powerpc/powerpc32/memset.S b/sysdeps/powerpc/powerpc32/memset.S
> index 5f614c07d7..26c37f8a17 100644
> --- a/sysdeps/powerpc/powerpc32/memset.S
> +++ b/sysdeps/powerpc/powerpc32/memset.S
> @@ -17,12 +17,13 @@
>     <https://www.gnu.org/licenses/>.  */
>  
>  #include <sysdep.h>
> +#include <rtld-global-offsets.h>

OK.

>  
>  /* void * [r3] memset (void *s [r3], int c [r4], size_t n [r5]));
>     Returns 's'.
>  
>     The memset is done in four sizes: byte (8 bits), word (32 bits),
> -   32-byte blocks (256 bits) and __cache_line_size (128, 256, 1024 bits).
> +   32-byte blocks (256 bits) and cache line size (128, 256, 1024 bits).

OK.

>     There is a special case for setting whole cache lines to 0, which
>     takes advantage of the dcbz instruction.  */
>  
> @@ -95,7 +96,7 @@ L(caligned):
>  
>  /* Check if we can use the special case for clearing memory using dcbz.
>     This requires that we know the correct cache line size for this
> -   processor.  Getting the __cache_line_size may require establishing GOT
> +   processor.  Getting the cache line size may require establishing GOT

OK.

>     addressability, so branch out of line to set this up.  */
>  	beq	cr1, L(checklinesize)
>  
> @@ -230,26 +231,22 @@ L(medium_28t):
>  	blr
>  
>  L(checklinesize):
> -#ifdef SHARED
> -	mflr	rTMP
>  /* If the remaining length is less the 32 bytes then don't bother getting
>     the cache line size.  */
>  	beq	L(medium)
> -/* Establishes GOT addressability so we can load __cache_line_size
> -   from static. This value was set from the aux vector during startup.  */
> +#ifdef PIC
> +	mflr	rTMP
> +/* Establishes GOT addressability so we can load the cache line size
> +   from rtld_global_ro. This value was set from the aux vector during
> +   startup.  */
>  	SETUP_GOT_ACCESS(rGOT,got_label)
> -	addis	rGOT,rGOT,__cache_line_size-got_label@ha
> -	lwz	rCLS,__cache_line_size-got_label@l(rGOT)
> +	addis	rGOT,rGOT,_GLOBAL_OFFSET_TABLE_-got_label@ha
> +	addi	rGOT,rGOT,_GLOBAL_OFFSET_TABLE_-got_label@l

OK.

>  	mtlr	rTMP
> -#else
> -/* Load __cache_line_size from static. This value was set from the
> -   aux vector during startup.  */
> -	lis	rCLS,__cache_line_size@ha
> -/* If the remaining length is less the 32 bytes then don't bother getting
> -   the cache line size.  */
> -	beq	L(medium)
> -	lwz	rCLS,__cache_line_size@l(rCLS)
>  #endif
> +/* Load rtld_global_ro._dl_cache_line_size.  */
> +	__GLRO(rCLS, rGOT, _dl_cache_line_size,
> +	       RTLD_GLOBAL_RO_DL_CACHE_LINE_SIZE_OFFSET)

OK.

>  
>  /* If the cache line size was not set then goto to L(nondcbz), which is
>     safe for any cache line size.  */
> diff --git a/sysdeps/powerpc/powerpc32/sysdep.h b/sysdeps/powerpc/powerpc32/sysdep.h
> index ceed9ef158..0dee5f2757 100644
> --- a/sysdeps/powerpc/powerpc32/sysdep.h
> +++ b/sysdeps/powerpc/powerpc32/sysdep.h
> @@ -157,4 +157,30 @@ GOT_LABEL:			;					      \
>  /* Label in text section.  */
>  #define C_TEXT(name) name
>  
> +/* Read the value of member from rtld_global_ro.  */
> +#ifdef PIC
> +# ifdef SHARED
> +#  if IS_IN (rtld)
> +/* Inside ld.so we use the local alias to avoid runtime GOT
> +   relocations.  */
> +#   define __GLRO(rOUT, rGOT, member, offset)				\
> +	lwz     rOUT,_rtld_local_ro@got(rGOT);				\
> +	lwz     rOUT,offset(rOUT)

OK. Within rtld use local.

> +#  else
> +#   define __GLRO(rOUT, rGOT, member, offset)				\
> +	lwz     rOUT,_rtld_global_ro@got(rGOT);				\
> +	lwz     rOUT,offset(rOUT)

OK.

> +#  endif
> +# else
> +#  define __GLRO(rOUT, rGOT, member, offset)				\
> +	lwz     rOUT,member@got(rGOT);					\
> +	lwz     rOUT,0(rOUT)

OK.

> +# endif
> +#else
> +/* Position-dependent code does not require access to the GOT.  */
> +# define __GLRO(rOUT, rGOT, member, offset)				\
> +	lis     rOUT,(member+LOWORD)@ha					\
> +	lwz     rOUT,(member+LOWORD)@l(rOUT)

OK.

> +#endif	/* PIC */
> +
>  #endif	/* __ASSEMBLER__ */
> diff --git a/sysdeps/powerpc/powerpc64/a2/memcpy.S b/sysdeps/powerpc/powerpc64/a2/memcpy.S
> index 0e3c435f3c..1162cc2207 100644
> --- a/sysdeps/powerpc/powerpc64/a2/memcpy.S
> +++ b/sysdeps/powerpc/powerpc64/a2/memcpy.S
> @@ -18,6 +18,7 @@
>     <https://www.gnu.org/licenses/>.  */
>  
>  #include <sysdep.h>
> +#include <rtld-global-offsets.h>

OK.

>  
>  #ifndef MEMCPY
>  # define MEMCPY memcpy
> @@ -27,8 +28,9 @@
>  #define ZERO_AHEAD 2            /* no cache lines DST zeroing ahead  */
>  
>  	.section        ".toc","aw"
> -.LC0:
> -	.tc __cache_line_size[TC],__cache_line_size
> +__GLRO_DEF(dl_cache_line_size)
> +
> +

OK.

>  	.section        ".text"
>  	.align 2
>  
> @@ -55,10 +57,11 @@ ENTRY (MEMCPY, 5)
>  	*/
>  
>  	neg     r8,r3           /* LS 4 bits = # bytes to 8-byte dest bdry  */
> -	ld      r9,.LC0@toc(r2) /* Get cache line size (part 1) */
> +	/* Get the cache line size.  */
> +	__GLRO (r9, dl_cache_line_size,
> +		RTLD_GLOBAL_RO_DL_CACHE_LINE_SIZE_OFFSET)

OK.

>  	clrldi  r8,r8,64-4      /* align to 16byte boundary  */
>  	sub     r7,r4,r3        /* compute offset to src from dest */
> -	lwz     r9,0(r9)        /* Get cache line size (part 2) */
>  	cmpldi  cr0,r8,0        /* Were we aligned on a 16 byte bdy? */
>  	addi    r10,r9,-1       /* Cache line mask */
>  	beq+    L(dst_aligned)
> @@ -121,7 +124,7 @@ L(dst_aligned):
>  	cmpdi	cr0,r9,0	/* Cache line size set? */
>  	bne+	cr0,L(cachelineset)
>  
> -/* __cache_line_size not set: generic byte copy without much optimization */
> +/* Cache line size not set: generic byte copy without much optimization */

OK.

>  	clrldi.	r0,r5,63	/* If length is odd copy one byte */
>  	beq	L(cachelinenotset_align)
>  	lbz	r7,0(r4)	/* Read one byte from source */
> diff --git a/sysdeps/powerpc/powerpc64/memset.S b/sysdeps/powerpc/powerpc64/memset.S
> index 857c023755..2fa98e6e2d 100644
> --- a/sysdeps/powerpc/powerpc64/memset.S
> +++ b/sysdeps/powerpc/powerpc64/memset.S
> @@ -17,10 +17,11 @@
>     <https://www.gnu.org/licenses/>.  */
>  
>  #include <sysdep.h>
> +#include <rtld-global-offsets.h>

OK.

>  
>  	.section	".toc","aw"
> -.LC0:
> -	.tc __cache_line_size[TC],__cache_line_size
> +__GLRO_DEF(dl_cache_line_size)
> +

OK.

>  	.section	".text"
>  	.align 2
>  
> @@ -146,8 +147,10 @@ L(zloopstart):
>  /* If the remaining length is less the 32 bytes, don't bother getting
>  	 the cache line size.  */
>  	beq	L(medium)
> -	ld	rCLS,.LC0@toc(r2)
> -	lwz	rCLS,0(rCLS)
> +	/* Read the cache line size.  */
> +	__GLRO (rCLS, dl_cache_line_size,
> +		RTLD_GLOBAL_RO_DL_CACHE_LINE_SIZE_OFFSET)
> +

OK.

>  /* If the cache line size was not set just goto to L(nondcbz) which is
>  	 safe for any cache line size.  */
>  	cmpldi	cr1,rCLS,0
> diff --git a/sysdeps/powerpc/powerpc64/sysdep.h b/sysdeps/powerpc/powerpc64/sysdep.h
> index aefd29a14d..d6616ac905 100644
> --- a/sysdeps/powerpc/powerpc64/sysdep.h
> +++ b/sysdeps/powerpc/powerpc64/sysdep.h
> @@ -342,6 +342,30 @@ LT_LABELSUFFIX(name,_name_end): ; \
>  #define	PSEUDO_END_ERRVAL(name) \
>    END (name)
>  
> +#ifdef SHARED
> +# if IS_IN (rtld)
> +	 /* Inside ld.so we use the local alias to avoid runtime GOT
> +	    relocations.  */
> +#  define __GLRO_DEF(var)				\
> +.LC__ ## var:						\
> +	.tc _rtld_local_ro[TC],_rtld_local_ro

OK.

> +# else
> +#  define __GLRO_DEF(var)				\
> +.LC__ ## var:						\
> +	.tc _rtld_global_ro[TC],_rtld_global_ro
> +# endif
> +# define __GLRO(rOUT, var, offset)		\
> +	ld	rOUT,.LC__ ## var@toc(r2);	\
> +	lwz	rOUT,offset(rOUT)
> +#else
> +# define __GLRO_DEF(var)			\
> +.LC__ ## var:					\
> +	.tc _ ## var[TC],_ ## var
> +# define __GLRO(rOUT, var, offset)		\
> +	ld	rOUT,.LC__ ## var@toc(r2);	\
> +	lwz	rOUT,0(rOUT)
> +#endif

OK.

> +
>  #else /* !__ASSEMBLER__ */
>  
>  #if _CALL_ELF != 2
> diff --git a/sysdeps/powerpc/rtld-global-offsets.sym b/sysdeps/powerpc/rtld-global-offsets.sym
> index f5ea5a1466..6b348fd522 100644
> --- a/sysdeps/powerpc/rtld-global-offsets.sym
> +++ b/sysdeps/powerpc/rtld-global-offsets.sym
> @@ -6,3 +6,4 @@
>  
>  RTLD_GLOBAL_RO_DL_HWCAP_OFFSET	rtld_global_ro_offsetof (_dl_hwcap)
>  RTLD_GLOBAL_RO_DL_HWCAP2_OFFSET	rtld_global_ro_offsetof (_dl_hwcap2)
> +RTLD_GLOBAL_RO_DL_CACHE_LINE_SIZE_OFFSET	rtld_global_ro_offsetof (_dl_cache_line_size)

Ok.

> diff --git a/sysdeps/powerpc/tst-cache-ppc-static-dlopen.c b/sysdeps/powerpc/tst-cache-ppc-static-dlopen.c
> new file mode 100644
> index 0000000000..296d0f4397
> --- /dev/null
> +++ b/sysdeps/powerpc/tst-cache-ppc-static-dlopen.c
> @@ -0,0 +1,54 @@
> +/* Test dl_cache_line_size from a dlopen'ed DSO from a static executable.
> +   Copyright (C) 2020 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <dlfcn.h>
> +#include <stdio.h>
> +#include <errno.h>
> +
> +int test_cache(int *);
> +
> +static int
> +do_test (void)
> +{
> +  int ret;
> +  void *handle;
> +  int (*test_cache) (int *);
> +
> +  handle = dlopen ("mod-cache-ppc.so", RTLD_LAZY | RTLD_LOCAL);
> +  if (handle == NULL)
> +    {
> +      printf ("dlopen (mod-cache-ppc.so): %s\n", dlerror ());
> +      return 1;
> +    }
> +
> +  test_cache = dlsym (handle, "test_cache");
> +  if (test_cache == NULL)
> +    {
> +      printf ("dlsym (test_cache): %s\n", dlerror ());
> +      return 1;
> +    }
> +
> +  ret = test_cache(&errno);
> +
> +  test_cache = NULL;
> +  dlclose (handle);

OK.

> +
> +  return ret;
> +}
> +
> +#include <support/test-driver.c>
> diff --git a/sysdeps/powerpc/tst-cache-ppc-static.c b/sysdeps/powerpc/tst-cache-ppc-static.c
> new file mode 100644
> index 0000000000..b0c417e822
> --- /dev/null
> +++ b/sysdeps/powerpc/tst-cache-ppc-static.c
> @@ -0,0 +1,20 @@
> +/* Test if an executable can read from _dl_cache_line_size.
> +   Copyright (C) 2020 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include "tst-cache-ppc.c"
> +#include "mod-cache-ppc.c"

OK.

> diff --git a/sysdeps/powerpc/tst-cache-ppc.c b/sysdeps/powerpc/tst-cache-ppc.c
> new file mode 100644
> index 0000000000..86c7117c43
> --- /dev/null
> +++ b/sysdeps/powerpc/tst-cache-ppc.c
> @@ -0,0 +1,29 @@
> +/* Test if an executable can read from rtld_global_ro._dl_cache_line_size.
> +   Copyright (C) 2020 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <errno.h>
> +
> +int test_cache(int *);
> +
> +static int
> +do_test (void)
> +{
> +  return test_cache(&errno);
> +}

OK.

> +
> +#include <support/test-driver.c>
> diff --git a/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c b/sysdeps/unix/sysv/linux/powerpc/dl-auxv.h
> similarity index 60%
> rename from sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c
> rename to sysdeps/unix/sysv/linux/powerpc/dl-auxv.h
> index 5d65bc6303..be2189732a 100644
> --- a/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c
> +++ b/sysdeps/unix/sysv/linux/powerpc/dl-auxv.h
> @@ -1,5 +1,5 @@
> -/* Operating system support for run-time dynamic linker.  Linux/PPC version.
> -   Copyright (C) 1997-2020 Free Software Foundation, Inc.
> +/* Auxiliary vector processing.  Linux/PPC version.
> +   Copyright (C) 2020 Free Software Foundation, Inc.

OK.

>     This file is part of the GNU C Library.
>  
>     The GNU C Library is free software; you can redistribute it and/or
> @@ -16,18 +16,15 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>  
> -#include <config.h>
>  #include <ldsodefs.h>
>  
> -int __cache_line_size attribute_hidden;
> +#if IS_IN (libc) && !defined SHARED
> +int GLRO(dl_cache_line_size);
> +#endif
>  
> -/* Scan the Aux Vector for the "Data Cache Block Size" entry.  If found
> -   verify that the static extern __cache_line_size is defined by checking
> -   for not NULL.  If it is defined then assign the cache block size
> -   value to __cache_line_size.  */
> +/* Scan the Aux Vector for the "Data Cache Block Size" entry and assign it
> +   to dl_cache_line_size.  */
>  #define DL_PLATFORM_AUXV						      \
>        case AT_DCACHEBSIZE:						      \
> -	__cache_line_size = av->a_un.a_val;				      \
> +	GLRO(dl_cache_line_size) = av->a_un.a_val;			      \

OK.

>  	break;
> -
> -#include <sysdeps/unix/sysv/linux/dl-sysdep.c>
> diff --git a/sysdeps/unix/sysv/linux/powerpc/dl-static.c b/sysdeps/unix/sysv/linux/powerpc/dl-static.c
> index 59ce4e8972..a77e07b503 100644
> --- a/sysdeps/unix/sysv/linux/powerpc/dl-static.c
> +++ b/sysdeps/unix/sysv/linux/powerpc/dl-static.c
> @@ -30,12 +30,14 @@ _dl_var_init (void *array[])
>        DL_AUXV = 1,
>        DL_HWCAP = 2,
>        DL_HWCAP2 = 3,
> +      DL_CACHE_LINE_SIZE = 4

OK.

>      };
>  
>    GLRO(dl_pagesize) = *((size_t *) array[DL_PAGESIZE]);
>    GLRO(dl_auxv) = (ElfW(auxv_t) *) *((size_t *) array[DL_AUXV]);
>    GLRO(dl_hwcap)  = *((unsigned long int *) array[DL_HWCAP]);
>    GLRO(dl_hwcap2) = *((unsigned long int *) array[DL_HWCAP2]);
> +  GLRO(dl_cache_line_size) = (int) *((int *) array[DL_CACHE_LINE_SIZE]);

OK.

>  }
>  
>  #else
> @@ -46,6 +48,7 @@ static void *variables[] =
>    &GLRO(dl_auxv),
>    &GLRO(dl_hwcap),
>    &GLRO(dl_hwcap2),
> +  &GLRO(dl_cache_line_size)

OK.

>  };
>  
>  static void
> diff --git a/sysdeps/unix/sysv/linux/powerpc/libc-start.c b/sysdeps/unix/sysv/linux/powerpc/libc-start.c
> index 93f8659fa6..fc86d6e234 100644
> --- a/sysdeps/unix/sysv/linux/powerpc/libc-start.c
> +++ b/sysdeps/unix/sysv/linux/powerpc/libc-start.c
> @@ -24,7 +24,6 @@
>  #include <hwcapinfo.h>
>  #endif
>  
> -int __cache_line_size attribute_hidden;

OK.

>  /* The main work is done in the generic function.  */
>  #define LIBC_START_MAIN generic_start_main
>  #define LIBC_START_DISABLE_INLINE
> @@ -71,15 +70,12 @@ __libc_start_main (int argc, char **argv,
>        rtld_fini = NULL;
>      }
>  
> -  /* Initialize the __cache_line_size variable from the aux vector.  For the
> -     static case, we also need _dl_hwcap, _dl_hwcap2 and _dl_platform, so we
> -     can call __tcb_parse_hwcap_and_convert_at_platform ().  */

OK.

>    for (ElfW (auxv_t) * av = auxvec; av->a_type != AT_NULL; ++av)
>      switch (av->a_type)
>        {
> -      case AT_DCACHEBSIZE:
> -	__cache_line_size = av->a_un.a_val;
> -	break;
> +      /* For the static case, we also need _dl_hwcap, _dl_hwcap2 and
> +         _dl_platform, so we can call
> +         __tcb_parse_hwcap_and_convert_at_platform ().  */

OK.

>  #ifndef SHARED
>        case AT_HWCAP:
>  	_dl_hwcap = (unsigned long int) av->a_un.a_val;
>
  
Siddhesh Poyarekar Jan. 17, 2020, 3:56 a.m. UTC | #2
On 16/01/20 10:07 pm, Carlos O'Donell wrote:
> On 1/10/20 5:27 PM, Tulio Magno Quites Machado Filho wrote:
>> Changes since v1:
>>  - Updated copyright dates
>>  - Added tests
>>  - Fixed coding style issues
>>  - Added macros __GLRO_DEF and __GLRO in the 64-bit case.
>>  - Removed sysdeps/unix/sysv/linux/powerpc/dl-support.c in favor of
>>    sysdeps/generic/dl-auxv.h which is included by elf/dl-support.c and
>>    elf/dl-sysdep.c
>>  - Removed sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c
>>
> 
> OK for master. This is probably the smallest change you can make to fix
> all of this up.
> 
> This needs approval again by Siddhesh.
> 
> Reviewed-by: Carlos O'Donell <carlos@redhat.com>
> 

This is fine.

Siddhesh
  
Tulio Magno Quites Machado Filho Jan. 17, 2020, 12:39 p.m. UTC | #3
Siddhesh Poyarekar <siddhesh@gotplt.org> writes:

> On 16/01/20 10:07 pm, Carlos O'Donell wrote:
>> On 1/10/20 5:27 PM, Tulio Magno Quites Machado Filho wrote:
>>> Changes since v1:
>>>  - Updated copyright dates
>>>  - Added tests
>>>  - Fixed coding style issues
>>>  - Added macros __GLRO_DEF and __GLRO in the 64-bit case.
>>>  - Removed sysdeps/unix/sysv/linux/powerpc/dl-support.c in favor of
>>>    sysdeps/generic/dl-auxv.h which is included by elf/dl-support.c and
>>>    elf/dl-sysdep.c
>>>  - Removed sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c
>>>
>> 
>> OK for master. This is probably the smallest change you can make to fix
>> all of this up.
>> 
>> This needs approval again by Siddhesh.
>> 
>> Reviewed-by: Carlos O'Donell <carlos@redhat.com>
>
> This is fine.

I've just pushed both patches to master.

Thanks!
  
Florian Weimer Jan. 17, 2020, 5:14 p.m. UTC | #4
* Tulio Magno Quites Machado Filho:

> Siddhesh Poyarekar <siddhesh@gotplt.org> writes:
>
>> On 16/01/20 10:07 pm, Carlos O'Donell wrote:
>>> On 1/10/20 5:27 PM, Tulio Magno Quites Machado Filho wrote:
>>>> Changes since v1:
>>>>  - Updated copyright dates
>>>>  - Added tests
>>>>  - Fixed coding style issues
>>>>  - Added macros __GLRO_DEF and __GLRO in the 64-bit case.
>>>>  - Removed sysdeps/unix/sysv/linux/powerpc/dl-support.c in favor of
>>>>    sysdeps/generic/dl-auxv.h which is included by elf/dl-support.c and
>>>>    elf/dl-sysdep.c
>>>>  - Removed sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c
>>>>
>>> 
>>> OK for master. This is probably the smallest change you can make to fix
>>> all of this up.
>>> 
>>> This needs approval again by Siddhesh.
>>> 
>>> Reviewed-by: Carlos O'Donell <carlos@redhat.com>
>>
>> This is fine.
>
> I've just pushed both patches to master.

This appears to have caused (on powerpc-linux-gnu):

../sysdeps/powerpc/powerpc32/memset.S: Assembler messages:
../sysdeps/powerpc/powerpc32/memset.S:248: Error: syntax error; found ` ', expected `,'
../sysdeps/powerpc/powerpc32/memset.S:248: Error: junk at end of line: `lwz 8,(_dl_cache_line_size+4)@l(8)'

Thanks,
Florian
  
Joseph Myers Jan. 17, 2020, 5:15 p.m. UTC | #5
One of these changes appears to have broken the build for 32-bit powerpc 
(all the 32-bit configurations in build-many-glibcs.py), at least with 
GCC 8 / binutils 2.33.

https://sourceware.org/ml/libc-testresults/2020-q1/msg00077.html

../sysdeps/powerpc/powerpc32/memset.S: Assembler messages:
../sysdeps/powerpc/powerpc32/memset.S:248: Error: syntax error; found ` ', expected `,'
../sysdeps/powerpc/powerpc32/memset.S:248: Error: junk at end of line: `lwz 8,(_dl_cache_line_size+4)@l(8)'
  
Tulio Magno Quites Machado Filho Jan. 17, 2020, 10:28 p.m. UTC | #6
Joseph Myers <joseph@codesourcery.com> writes:

> One of these changes appears to have broken the build for 32-bit powerpc 
> (all the 32-bit configurations in build-many-glibcs.py), at least with 
> GCC 8 / binutils 2.33.
>
> https://sourceware.org/ml/libc-testresults/2020-q1/msg00077.html
>
> ../sysdeps/powerpc/powerpc32/memset.S: Assembler messages:
> ../sysdeps/powerpc/powerpc32/memset.S:248: Error: syntax error; found ` ', expected `,'
> ../sysdeps/powerpc/powerpc32/memset.S:248: Error: junk at end of line: `lwz 8,(_dl_cache_line_size+4)@l(8)'

I've just reproduced this.
Interestingly, I can't reproduce it from outside a build-many-glibcs.py with
Binutils 2.33.1 (Debian 10). I tried with GCC 7, 8 and 9.

I'll continue to investigate it.

Thanks!
  
Joseph Myers July 23, 2020, 2:47 p.m. UTC | #7
I'm seeing all the statically linked glibc tests fail for 32-bit powerpc 
when built with GCC 10:

malloc.c:2394: sysmalloc: Assertion `(old_top == initial_top (av) && old_size == 0) || ((unsigned long) (old_size) >= MINSIZE && prev_inuse (old_top) && ((unsigned long) old_end & (pagesize - 1)) == 0)' failed.
Aborted

(A statically linked program with empty main produces that error on 
startup.)

If I build glibc with -fcommon, at least a trivial statically linked 
binary no longer fails.  So I think there may have been something missing 
from the fixes to build with -fno-common; they got glibc building again, 
but not working in the statically linked case.
  
Andreas Schwab July 23, 2020, 4:46 p.m. UTC | #8
On Jul 23 2020, Joseph Myers wrote:

> I'm seeing all the statically linked glibc tests fail for 32-bit powerpc 
> when built with GCC 10:
>
> malloc.c:2394: sysmalloc: Assertion `(old_top == initial_top (av) && old_size == 0) || ((unsigned long) (old_size) >= MINSIZE && prev_inuse (old_top) && ((unsigned long) old_end & (pagesize - 1)) == 0)' failed.
> Aborted

I don't see that here.

https://build.opensuse.org/package/live_build_log/home:Andreas_Schwab:glibc/glibc:testsuite/p/ppc

Andreas.
  
Florian Weimer July 31, 2020, 12:42 p.m. UTC | #9
* Joseph Myers:

> I'm seeing all the statically linked glibc tests fail for 32-bit powerpc 
> when built with GCC 10:
>
> malloc.c:2394: sysmalloc: Assertion `(old_top == initial_top (av) && old_size == 0) || ((unsigned long) (old_size) >= MINSIZE && prev_inuse (old_top) && ((unsigned long) old_end & (pagesize - 1)) == 0)' failed.
> Aborted
>
> (A statically linked program with empty main produces that error on 
> startup.)
>
> If I build glibc with -fcommon, at least a trivial statically linked 
> binary no longer fails.  So I think there may have been something missing 
> from the fixes to build with -fno-common; they got glibc building again, 
> but not working in the statically linked case.

What's your binutils version?

Does this affect the statically linked test binaries built by
build-many-glibcs.py?

Thanks,
Florian
  
Joseph Myers July 31, 2020, 6:35 p.m. UTC | #10
On Fri, 31 Jul 2020, Florian Weimer via Libc-alpha wrote:

> * Joseph Myers:
> 
> > I'm seeing all the statically linked glibc tests fail for 32-bit powerpc 
> > when built with GCC 10:
> >
> > malloc.c:2394: sysmalloc: Assertion `(old_top == initial_top (av) && old_size == 0) || ((unsigned long) (old_size) >= MINSIZE && prev_inuse (old_top) && ((unsigned long) old_end & (pagesize - 1)) == 0)' failed.
> > Aborted
> >
> > (A statically linked program with empty main produces that error on 
> > startup.)
> >
> > If I build glibc with -fcommon, at least a trivial statically linked 
> > binary no longer fails.  So I think there may have been something missing 
> > from the fixes to build with -fno-common; they got glibc building again, 
> > but not working in the statically linked case.
> 
> What's your binutils version?

This testing was with 2.35.50.20200720.

> Does this affect the statically linked test binaries built by
> build-many-glibcs.py?

Yes.  I tested build-many-glibcs.py for powerpc-linux-gnu with current 
default versions of everything (so binutils 2.35 branch in this case); 
running the math/atest-exp binary left from a --keep=all build produces 
that same assertion failure.
  
Florian Weimer Aug. 3, 2020, 8:15 a.m. UTC | #11
* Joseph Myers:

>> Does this affect the statically linked test binaries built by
>> build-many-glibcs.py?
>
> Yes.  I tested build-many-glibcs.py? for powerpc-linux-gnu with current 
> default versions of everything (so binutils 2.35 branch in this case); 
> running the math/atest-exp binary left from a --keep=all build produces 
> that same assertion failure.

I can reproduce it:

(gdb) bt
#0  0x10007d74 in __libc_signal_restore_set (set=0xfffed708)
    at ../sysdeps/unix/sysv/linux/internal-signals.h:104
#1  raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:47
#2  0x10000268 in abort () at abort.c:79
#3  0x1001c7b8 in __malloc_assert (
    assertion=assertion@entry=0x10081ec4 "(old_top == initial_top (av) && old_size == 0) || ((unsigned long) (old_size) >= MINSIZE && prev_inuse (old_top) && ((unsigned long) old_end & (pagesize - 1)) == 0)", 
    file=file@entry=0x10081690 "malloc.c", line=line@entry=2394, 
    function=function@entry=0x100828b8 <__PRETTY_FUNCTION__.3> "sysmalloc") at malloc.c:298
#4  0x1001ee1c in sysmalloc (nb=nb@entry=64, 
    av=av@entry=0x100c03d4 <main_arena>) at malloc.c:2394
#5  0x10020614 in _int_malloc (av=av@entry=0x100c03d4 <main_arena>, 
    bytes=bytes@entry=53) at malloc.c:4169
#6  0x10021804 in __libc_malloc (bytes=53) at malloc.c:3078
#7  0x10060f44 in _dl_get_origin ()
    at ../sysdeps/unix/sysv/linux/dl-origin.c:49
#8  0x100302f4 in _dl_non_dynamic_init () at dl-support.c:311
#9  0x10031908 in __libc_init_first (argc=argc@entry=2, 
    argv=argv@entry=0xfffeecd4, envp=0xfffeece0) at init-first.c:72
#10 0x100024dc in generic_start_main (main=0x100003d4 <main>, 
    argc=argc@entry=2, argv=argv@entry=0xfffeecd4, 
    auxvec=auxvec@entry=0xfffeed9c, init=0x10002be0 <__libc_csu_init>, 
    fini=0x10002d58 <__libc_csu_fini>, rtld_fini=rtld_fini@entry=0x0, 
    stack_end=stack_end@entry=0xfffeecd0) at ../csu/libc-start.c:250
#11 0x10002774 in __libc_start_main (argc=2, argv=0xfffeecd4, 
    ev=<optimized out>, auxvec=0xfffeed9c, rtld_fini=0x0, 
    stinfo=0x1007f3e0, stack_on_entry=0xfffeecd0)
    at ../sysdeps/unix/sysv/linux/powerpc/libc-start.c:98
#12 0x00000000 in ?? ()

main_arena.top->mchunk_size gets overwritten during tcache_init:

#0  memset () at ../sysdeps/powerpc/powerpc32/memset.S:291
#1  0x10021660 in tcache_init () at malloc.c:3021
#2  0x10021af4 in __libc_malloc (bytes=bytes@entry=53) at malloc.c:3064
#3  0x10021d80 in malloc_hook_ini (sz=53, caller=<optimized out>) at hooks.c:32
#4  0x10021aa0 in __libc_malloc (bytes=53) at malloc.c:3053
#5  0x100614c4 in _dl_get_origin () at ../sysdeps/unix/sysv/linux/dl-origin.c:49
#6  0x10030874 in _dl_non_dynamic_init () at dl-support.c:311
#7  0x10031e88 in __libc_init_first (argc=argc@entry=2, 
    argv=argv@entry=0xfffeecd4, envp=0xfffeece0) at init-first.c:72
#8  0x100024dc in generic_start_main (main=0x100003d4 <main>, argc=argc@entry=2, 
    argv=argv@entry=0xfffeecd4, auxvec=auxvec@entry=0xfffeed9c, 
    init=0x10002be0 <__libc_csu_init>, fini=0x10002d58 <__libc_csu_fini>, 
    rtld_fini=rtld_fini@entry=0x0, stack_end=stack_end@entry=0xfffeecd0)
    at ../csu/libc-start.c:250
#9  0x10002774 in __libc_start_main (argc=2, argv=0xfffeecd4, 
    ev=<optimized out>, auxvec=0xfffeed9c, rtld_fini=0x0, stinfo=0x1007f960, 
    stack_on_entry=0xfffeecd0)
    at ../sysdeps/unix/sysv/linux/powerpc/libc-start.c:98
#10 0x00000000 in ?? ()

The memset goes wrong because it loads the cache line size as 1 here:

/* Load rtld_global_ro._dl_cache_line_size.  */
	__GLRO(rCLS, rGOT, _dl_cache_line_size,
	       RTLD_GLOBAL_RO_DL_CACHE_LINE_SIZE_OFFSET)

0x100259e4 <+576>:   lis     r8,4108
=> 0x100259e8 <+580>:   lwz     r8,3912(r8)

(gdb) print (void*)($r8 + 3912)
$24 = (void *) 0x100c0f48 <__libc_enable_secure_decided>

This is not the address that is seen by the debugger for
_dl_cache_line_size:

(gdb) print &_dl_cache_line_size 
$26 = (int *) 0x100c0f44 <_dl_cache_line_size>

The symbol table looks pretty reasonable:
  1461: 100c0f44     4 OBJECT  GLOBAL DEFAULT   23 _dl_cache_line_size
  1495: 100c0f40     4 OBJECT  GLOBAL DEFAULT   23 _dl_platform
  1512: 100c0f48     4 OBJECT  GLOBAL DEFAULT   23 __libc_enable_secure_decided
  2135: 100c0f4c     4 OBJECT  GLOBAL DEFAULT   23 __libc_argv

For some reason, we have relocations with displacements in
string/memset.o:

 244:   3d 00 00 00     lis     r8,0
                        246: R_PPC_ADDR16_HA    _dl_cache_line_size+0x4
 248:   81 08 00 00     lwz     r8,0(r8)
                        24a: R_PPC_ADDR16_LO    _dl_cache_line_size+0x4

This is due to the definition of __GLRO:

#else
/* Position-dependent code does not require access to the GOT.  */
# define __GLRO(rOUT, rGOT, member, offset)                             \
        lis     rOUT,(member+LOWORD)@ha;                                        \
        lwz     rOUT,(member+LOWORD)@l(rOUT)
#endif  /* PIC */

And LOWORD is 4 on big-endian PowerPC:

/* The 32-bit words of a 64-bit dword are at these offsets in memory.  */
#if defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN
# define LOWORD 0
# define HIWORD 4
#else
# define LOWORD 4
# define HIWORD 0
#endif

I believe we should remove the “+LOWORD” part here:

diff --git a/sysdeps/powerpc/powerpc32/sysdep.h b/sysdeps/powerpc/powerpc32/sysdep.h
index 2ba009e9..829eec26 100644
--- a/sysdeps/powerpc/powerpc32/sysdep.h
+++ b/sysdeps/powerpc/powerpc32/sysdep.h
@@ -179,8 +179,8 @@ GOT_LABEL:                  ;                               
              \
 #else
 /* Position-dependent code does not require access to the GOT.  */
 # define __GLRO(rOUT, rGOT, member, offset)                            \
-       lis     rOUT,(member+LOWORD)@ha;                                        \
-       lwz     rOUT,(member+LOWORD)@l(rOUT)
+       lis     rOUT,(member)@ha;                                       \
+       lwz     rOUT,(member)@l(rOUT)
 #endif /* PIC */
 
 #endif /* __ASSEMBLER__ */

It fixes math/atest-exp for me.

Tulio, I believe you constructed this macro from
sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S, where it
is needed because we are loading the lower 32 bits of a 64-bit value.
It's not correct for loading a 32-bit quantity.

Technically, this bug is not a release blocker.  It's not a regression,
it's present in 2.31 as well.  I will file a bug and post a proper patch.

Thanks,
Florian
  
Carlos O'Donell Aug. 3, 2020, 4:07 p.m. UTC | #12
On 8/3/20 4:15 AM, Florian Weimer wrote:
> Technically, this bug is not a release blocker.  It's not a regression,
> it's present in 2.31 as well.  I will file a bug and post a proper patch.

I just ack'd your patch for inclusion in 2.32.

Please commit for 2.32 so the release goes out without the bug.

Tulio, please review ASAP to double check our results here.
  

Patch

diff --git a/elf/dl-support.c b/elf/dl-support.c
index ad791ab6ab..7704c101c5 100644
--- a/elf/dl-support.c
+++ b/elf/dl-support.c
@@ -36,6 +36,7 @@ 
 #include <stackinfo.h>
 #include <dl-vdso.h>
 #include <dl-vdso-setup.h>
+#include <dl-auxv.h>
 
 extern char *__progname;
 char **_dl_argv = &__progname;	/* This is checked for some error messages.  */
@@ -293,9 +294,7 @@  _dl_aux_init (ElfW(auxv_t) *av)
       case AT_RANDOM:
 	_dl_random = (void *) av->a_un.a_val;
 	break;
-# ifdef DL_PLATFORM_AUXV
       DL_PLATFORM_AUXV
-# endif
       }
   if (seen == 0xf)
     {
diff --git a/elf/dl-sysdep.c b/elf/dl-sysdep.c
index 53bbee14f4..854570821c 100644
--- a/elf/dl-sysdep.c
+++ b/elf/dl-sysdep.c
@@ -45,6 +45,7 @@ 
 #include <tls.h>
 
 #include <dl-tunables.h>
+#include <dl-auxv.h>
 
 extern char **_environ attribute_hidden;
 extern char _end[] attribute_hidden;
@@ -180,9 +181,7 @@  _dl_sysdep_start (void **start_argptr,
       case AT_RANDOM:
 	_dl_random = (void *) av->a_un.a_val;
 	break;
-#ifdef DL_PLATFORM_AUXV
       DL_PLATFORM_AUXV
-#endif
       }
 
 #ifndef HAVE_AUX_SECURE
diff --git a/sysdeps/generic/dl-auxv.h b/sysdeps/generic/dl-auxv.h
new file mode 100644
index 0000000000..bf3c01182e
--- /dev/null
+++ b/sysdeps/generic/dl-auxv.h
@@ -0,0 +1,21 @@ 
+/* Auxiliary vector processing.  Generic version.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* Define DL_PLATFORM_AUXV in order to process platform-specific AUXV entries
+   during the initialization of the loader or of a static libc.  */
+#define DL_PLATFORM_AUXV
diff --git a/sysdeps/powerpc/Makefile b/sysdeps/powerpc/Makefile
index df45d348d2..d1c71a0ca4 100644
--- a/sysdeps/powerpc/Makefile
+++ b/sysdeps/powerpc/Makefile
@@ -14,6 +14,23 @@  mod-tlsopt-powerpc.so-no-z-defs = yes
 tests += tst-tlsopt-powerpc
 $(objpfx)tst-tlsopt-powerpc: $(objpfx)mod-tlsopt-powerpc.so
 
+tests-static += tst-cache-ppc-static
+tests-internal += tst-cache-ppc-static
+
+ifeq (yes,$(build-shared))
+modules-names += mod-cache-ppc
+tests += tst-cache-ppc tst-cache-ppc-static-dlopen
+tests-static += tst-cache-ppc-static-dlopen
+test-internal-extras += mod-cache-ppc
+
+mod-cache-ppc.so-no-z-defs = yes
+tst-cache-ppc-static-dlopen-ENV = LD_LIBRARY_PATH=$(objpfx):$(common-objpfx):$(common-objpfx)elf
+$(objpfx)tst-cache-ppc-static-dlopen: $(common-objpfx)dlfcn/libdl.a
+$(objpfx)tst-cache-ppc-static-dlopen.out: $(objpfx)mod-cache-ppc.so
+
+$(objpfx)tst-cache-ppc: $(objpfx)mod-cache-ppc.so
+endif
+
 ifneq (no,$(multi-arch))
 tests-static += tst-tlsifunc-static
 tests-internal += tst-tlsifunc-static
diff --git a/sysdeps/powerpc/dl-procinfo.c b/sysdeps/powerpc/dl-procinfo.c
index 2ae68c41f1..7a7d93dd0a 100644
--- a/sysdeps/powerpc/dl-procinfo.c
+++ b/sysdeps/powerpc/dl-procinfo.c
@@ -89,5 +89,22 @@  PROCINFO_CLASS const char _dl_powerpc_cap_flags[64][15]
 ,
 #endif
 
+#if !IS_IN (ldconfig)
+# if !defined PROCINFO_DECL && defined SHARED
+     ._dl_cache_line_size
+# else
+PROCINFO_CLASS int _dl_cache_line_size
+# endif
+# ifndef PROCINFO_DECL
+     = 0
+# endif
+# if !defined SHARED || defined PROCINFO_DECL
+;
+# else
+,
+# endif
+#endif
+
+
 #undef PROCINFO_DECL
 #undef PROCINFO_CLASS
diff --git a/sysdeps/powerpc/mod-cache-ppc.c b/sysdeps/powerpc/mod-cache-ppc.c
new file mode 100644
index 0000000000..81fad52078
--- /dev/null
+++ b/sysdeps/powerpc/mod-cache-ppc.c
@@ -0,0 +1,45 @@ 
+/* Test if an executable can read from rtld_global_ro._dl_cache_line_size.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/auxv.h>
+#include <ldsodefs.h>
+#include <errno.h>
+
+/* errnop is required in order to work around BZ #20802.  */
+int
+test_cache (int *errnop)
+{
+  int cls1 = GLRO (dl_cache_line_size);
+  errno = *errnop;
+  uint64_t cls2 = getauxval (AT_DCACHEBSIZE);
+  *errnop = errno;
+
+  printf ("AT_DCACHEBSIZE      = %" PRIu64 " B\n", cls2);
+  printf ("_dl_cache_line_size = %d B\n", cls1);
+
+  if (cls1 != cls2)
+    {
+      printf ("error: _dl_cache_line_size != AT_DCACHEBSIZE\n");
+      return 1;
+    }
+
+  return 0;
+}
diff --git a/sysdeps/powerpc/powerpc32/a2/memcpy.S b/sysdeps/powerpc/powerpc32/a2/memcpy.S
index fe5dab847a..6f4d8a7b34 100644
--- a/sysdeps/powerpc/powerpc32/a2/memcpy.S
+++ b/sysdeps/powerpc/powerpc32/a2/memcpy.S
@@ -18,6 +18,7 @@ 
    <https://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <rtld-global-offsets.h>
 
 #define PREFETCH_AHEAD 4        /* no cache lines SRC prefetching ahead  */
 #define ZERO_AHEAD 2            /* no cache lines DST zeroing ahead  */
@@ -106,25 +107,23 @@  EALIGN (memcpy, 5, 0)
 L(dst_aligned):
 
 
-#ifdef SHARED
+#ifdef PIC
 	mflr    r0
-/* Establishes GOT addressability so we can load __cache_line_size
-   from static. This value was set from the aux vector during startup.  */
+/* Establishes GOT addressability so we can load the cache line size
+   from rtld_global_ro.  This value was set from the aux vector during
+   startup.  */
 	SETUP_GOT_ACCESS(r9,got_label)
-	addis   r9,r9,__cache_line_size-got_label@ha
-	lwz     r9,__cache_line_size-got_label@l(r9)
-	mtlr    r0
-#else
-/* Load __cache_line_size from static. This value was set from the
-   aux vector during startup.  */
-	lis     r9,__cache_line_size@ha
-	lwz     r9,__cache_line_size@l(r9)
+	addis	r9,r9,_GLOBAL_OFFSET_TABLE_-got_label@ha
+	addi	r9,r9,_GLOBAL_OFFSET_TABLE_-got_label@l
+	mtlr	r0
 #endif
+	__GLRO(r9, r9, _dl_cache_line_size,
+	       RTLD_GLOBAL_RO_DL_CACHE_LINE_SIZE_OFFSET)
 
 	cmplwi  cr5, r9, 0
 	bne+    cr5,L(cachelineset)
 
-/* __cache_line_size not set: generic byte copy without much optimization */
+/* Cache line size not set: generic byte copy without much optimization */
 	andi.	r0,r5,1		/* If length is odd copy one byte.  */
 	beq	L(cachelinenotset_align)
 	lbz	r7,0(r4)	/* Read one byte from source.  */
diff --git a/sysdeps/powerpc/powerpc32/dl-machine.c b/sysdeps/powerpc/powerpc32/dl-machine.c
index d5ea4b97f4..6090e60d3c 100644
--- a/sysdeps/powerpc/powerpc32/dl-machine.c
+++ b/sysdeps/powerpc/powerpc32/dl-machine.c
@@ -25,11 +25,6 @@ 
 #include <dl-machine.h>
 #include <_itoa.h>
 
-/* The value __cache_line_size is defined in dl-sysdep.c and is initialised
-   by _dl_sysdep_start via DL_PLATFORM_INIT.  */
-extern int __cache_line_size attribute_hidden;
-
-
 /* Stuff for the PLT.  */
 #define PLT_INITIAL_ENTRY_WORDS 18
 #define PLT_LONGBRANCH_ENTRY_WORDS 0
@@ -309,14 +304,14 @@  __elf_machine_runtime_setup (struct link_map *map, int lazy, int profile)
 
 	 Assumes that dcbst and icbi apply to lines of 16 bytes or
 	 more.  Current known line sizes are 16, 32, and 128 bytes.
-	 The following gets the __cache_line_size, when available.  */
+	 The following gets the cache line size, when available.  */
 
       /* Default minimum 4 words per cache line.  */
       int line_size_words = 4;
 
-      if (lazy && __cache_line_size != 0)
+      if (lazy && GLRO(dl_cache_line_size) != 0)
 	/* Convert bytes to words.  */
-	line_size_words = __cache_line_size / 4;
+	line_size_words = GLRO(dl_cache_line_size) / 4;
 
       size_modified = lazy ? rel_offset_words : 6;
       for (i = 0; i < size_modified; i += line_size_words)
diff --git a/sysdeps/powerpc/powerpc32/memset.S b/sysdeps/powerpc/powerpc32/memset.S
index 5f614c07d7..26c37f8a17 100644
--- a/sysdeps/powerpc/powerpc32/memset.S
+++ b/sysdeps/powerpc/powerpc32/memset.S
@@ -17,12 +17,13 @@ 
    <https://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <rtld-global-offsets.h>
 
 /* void * [r3] memset (void *s [r3], int c [r4], size_t n [r5]));
    Returns 's'.
 
    The memset is done in four sizes: byte (8 bits), word (32 bits),
-   32-byte blocks (256 bits) and __cache_line_size (128, 256, 1024 bits).
+   32-byte blocks (256 bits) and cache line size (128, 256, 1024 bits).
    There is a special case for setting whole cache lines to 0, which
    takes advantage of the dcbz instruction.  */
 
@@ -95,7 +96,7 @@  L(caligned):
 
 /* Check if we can use the special case for clearing memory using dcbz.
    This requires that we know the correct cache line size for this
-   processor.  Getting the __cache_line_size may require establishing GOT
+   processor.  Getting the cache line size may require establishing GOT
    addressability, so branch out of line to set this up.  */
 	beq	cr1, L(checklinesize)
 
@@ -230,26 +231,22 @@  L(medium_28t):
 	blr
 
 L(checklinesize):
-#ifdef SHARED
-	mflr	rTMP
 /* If the remaining length is less the 32 bytes then don't bother getting
    the cache line size.  */
 	beq	L(medium)
-/* Establishes GOT addressability so we can load __cache_line_size
-   from static. This value was set from the aux vector during startup.  */
+#ifdef PIC
+	mflr	rTMP
+/* Establishes GOT addressability so we can load the cache line size
+   from rtld_global_ro. This value was set from the aux vector during
+   startup.  */
 	SETUP_GOT_ACCESS(rGOT,got_label)
-	addis	rGOT,rGOT,__cache_line_size-got_label@ha
-	lwz	rCLS,__cache_line_size-got_label@l(rGOT)
+	addis	rGOT,rGOT,_GLOBAL_OFFSET_TABLE_-got_label@ha
+	addi	rGOT,rGOT,_GLOBAL_OFFSET_TABLE_-got_label@l
 	mtlr	rTMP
-#else
-/* Load __cache_line_size from static. This value was set from the
-   aux vector during startup.  */
-	lis	rCLS,__cache_line_size@ha
-/* If the remaining length is less the 32 bytes then don't bother getting
-   the cache line size.  */
-	beq	L(medium)
-	lwz	rCLS,__cache_line_size@l(rCLS)
 #endif
+/* Load rtld_global_ro._dl_cache_line_size.  */
+	__GLRO(rCLS, rGOT, _dl_cache_line_size,
+	       RTLD_GLOBAL_RO_DL_CACHE_LINE_SIZE_OFFSET)
 
 /* If the cache line size was not set then goto to L(nondcbz), which is
    safe for any cache line size.  */
diff --git a/sysdeps/powerpc/powerpc32/sysdep.h b/sysdeps/powerpc/powerpc32/sysdep.h
index ceed9ef158..0dee5f2757 100644
--- a/sysdeps/powerpc/powerpc32/sysdep.h
+++ b/sysdeps/powerpc/powerpc32/sysdep.h
@@ -157,4 +157,30 @@  GOT_LABEL:			;					      \
 /* Label in text section.  */
 #define C_TEXT(name) name
 
+/* Read the value of member from rtld_global_ro.  */
+#ifdef PIC
+# ifdef SHARED
+#  if IS_IN (rtld)
+/* Inside ld.so we use the local alias to avoid runtime GOT
+   relocations.  */
+#   define __GLRO(rOUT, rGOT, member, offset)				\
+	lwz     rOUT,_rtld_local_ro@got(rGOT);				\
+	lwz     rOUT,offset(rOUT)
+#  else
+#   define __GLRO(rOUT, rGOT, member, offset)				\
+	lwz     rOUT,_rtld_global_ro@got(rGOT);				\
+	lwz     rOUT,offset(rOUT)
+#  endif
+# else
+#  define __GLRO(rOUT, rGOT, member, offset)				\
+	lwz     rOUT,member@got(rGOT);					\
+	lwz     rOUT,0(rOUT)
+# endif
+#else
+/* Position-dependent code does not require access to the GOT.  */
+# define __GLRO(rOUT, rGOT, member, offset)				\
+	lis     rOUT,(member+LOWORD)@ha					\
+	lwz     rOUT,(member+LOWORD)@l(rOUT)
+#endif	/* PIC */
+
 #endif	/* __ASSEMBLER__ */
diff --git a/sysdeps/powerpc/powerpc64/a2/memcpy.S b/sysdeps/powerpc/powerpc64/a2/memcpy.S
index 0e3c435f3c..1162cc2207 100644
--- a/sysdeps/powerpc/powerpc64/a2/memcpy.S
+++ b/sysdeps/powerpc/powerpc64/a2/memcpy.S
@@ -18,6 +18,7 @@ 
    <https://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <rtld-global-offsets.h>
 
 #ifndef MEMCPY
 # define MEMCPY memcpy
@@ -27,8 +28,9 @@ 
 #define ZERO_AHEAD 2            /* no cache lines DST zeroing ahead  */
 
 	.section        ".toc","aw"
-.LC0:
-	.tc __cache_line_size[TC],__cache_line_size
+__GLRO_DEF(dl_cache_line_size)
+
+
 	.section        ".text"
 	.align 2
 
@@ -55,10 +57,11 @@  ENTRY (MEMCPY, 5)
 	*/
 
 	neg     r8,r3           /* LS 4 bits = # bytes to 8-byte dest bdry  */
-	ld      r9,.LC0@toc(r2) /* Get cache line size (part 1) */
+	/* Get the cache line size.  */
+	__GLRO (r9, dl_cache_line_size,
+		RTLD_GLOBAL_RO_DL_CACHE_LINE_SIZE_OFFSET)
 	clrldi  r8,r8,64-4      /* align to 16byte boundary  */
 	sub     r7,r4,r3        /* compute offset to src from dest */
-	lwz     r9,0(r9)        /* Get cache line size (part 2) */
 	cmpldi  cr0,r8,0        /* Were we aligned on a 16 byte bdy? */
 	addi    r10,r9,-1       /* Cache line mask */
 	beq+    L(dst_aligned)
@@ -121,7 +124,7 @@  L(dst_aligned):
 	cmpdi	cr0,r9,0	/* Cache line size set? */
 	bne+	cr0,L(cachelineset)
 
-/* __cache_line_size not set: generic byte copy without much optimization */
+/* Cache line size not set: generic byte copy without much optimization */
 	clrldi.	r0,r5,63	/* If length is odd copy one byte */
 	beq	L(cachelinenotset_align)
 	lbz	r7,0(r4)	/* Read one byte from source */
diff --git a/sysdeps/powerpc/powerpc64/memset.S b/sysdeps/powerpc/powerpc64/memset.S
index 857c023755..2fa98e6e2d 100644
--- a/sysdeps/powerpc/powerpc64/memset.S
+++ b/sysdeps/powerpc/powerpc64/memset.S
@@ -17,10 +17,11 @@ 
    <https://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <rtld-global-offsets.h>
 
 	.section	".toc","aw"
-.LC0:
-	.tc __cache_line_size[TC],__cache_line_size
+__GLRO_DEF(dl_cache_line_size)
+
 	.section	".text"
 	.align 2
 
@@ -146,8 +147,10 @@  L(zloopstart):
 /* If the remaining length is less the 32 bytes, don't bother getting
 	 the cache line size.  */
 	beq	L(medium)
-	ld	rCLS,.LC0@toc(r2)
-	lwz	rCLS,0(rCLS)
+	/* Read the cache line size.  */
+	__GLRO (rCLS, dl_cache_line_size,
+		RTLD_GLOBAL_RO_DL_CACHE_LINE_SIZE_OFFSET)
+
 /* If the cache line size was not set just goto to L(nondcbz) which is
 	 safe for any cache line size.  */
 	cmpldi	cr1,rCLS,0
diff --git a/sysdeps/powerpc/powerpc64/sysdep.h b/sysdeps/powerpc/powerpc64/sysdep.h
index aefd29a14d..d6616ac905 100644
--- a/sysdeps/powerpc/powerpc64/sysdep.h
+++ b/sysdeps/powerpc/powerpc64/sysdep.h
@@ -342,6 +342,30 @@  LT_LABELSUFFIX(name,_name_end): ; \
 #define	PSEUDO_END_ERRVAL(name) \
   END (name)
 
+#ifdef SHARED
+# if IS_IN (rtld)
+	 /* Inside ld.so we use the local alias to avoid runtime GOT
+	    relocations.  */
+#  define __GLRO_DEF(var)				\
+.LC__ ## var:						\
+	.tc _rtld_local_ro[TC],_rtld_local_ro
+# else
+#  define __GLRO_DEF(var)				\
+.LC__ ## var:						\
+	.tc _rtld_global_ro[TC],_rtld_global_ro
+# endif
+# define __GLRO(rOUT, var, offset)		\
+	ld	rOUT,.LC__ ## var@toc(r2);	\
+	lwz	rOUT,offset(rOUT)
+#else
+# define __GLRO_DEF(var)			\
+.LC__ ## var:					\
+	.tc _ ## var[TC],_ ## var
+# define __GLRO(rOUT, var, offset)		\
+	ld	rOUT,.LC__ ## var@toc(r2);	\
+	lwz	rOUT,0(rOUT)
+#endif
+
 #else /* !__ASSEMBLER__ */
 
 #if _CALL_ELF != 2
diff --git a/sysdeps/powerpc/rtld-global-offsets.sym b/sysdeps/powerpc/rtld-global-offsets.sym
index f5ea5a1466..6b348fd522 100644
--- a/sysdeps/powerpc/rtld-global-offsets.sym
+++ b/sysdeps/powerpc/rtld-global-offsets.sym
@@ -6,3 +6,4 @@ 
 
 RTLD_GLOBAL_RO_DL_HWCAP_OFFSET	rtld_global_ro_offsetof (_dl_hwcap)
 RTLD_GLOBAL_RO_DL_HWCAP2_OFFSET	rtld_global_ro_offsetof (_dl_hwcap2)
+RTLD_GLOBAL_RO_DL_CACHE_LINE_SIZE_OFFSET	rtld_global_ro_offsetof (_dl_cache_line_size)
diff --git a/sysdeps/powerpc/tst-cache-ppc-static-dlopen.c b/sysdeps/powerpc/tst-cache-ppc-static-dlopen.c
new file mode 100644
index 0000000000..296d0f4397
--- /dev/null
+++ b/sysdeps/powerpc/tst-cache-ppc-static-dlopen.c
@@ -0,0 +1,54 @@ 
+/* Test dl_cache_line_size from a dlopen'ed DSO from a static executable.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <dlfcn.h>
+#include <stdio.h>
+#include <errno.h>
+
+int test_cache(int *);
+
+static int
+do_test (void)
+{
+  int ret;
+  void *handle;
+  int (*test_cache) (int *);
+
+  handle = dlopen ("mod-cache-ppc.so", RTLD_LAZY | RTLD_LOCAL);
+  if (handle == NULL)
+    {
+      printf ("dlopen (mod-cache-ppc.so): %s\n", dlerror ());
+      return 1;
+    }
+
+  test_cache = dlsym (handle, "test_cache");
+  if (test_cache == NULL)
+    {
+      printf ("dlsym (test_cache): %s\n", dlerror ());
+      return 1;
+    }
+
+  ret = test_cache(&errno);
+
+  test_cache = NULL;
+  dlclose (handle);
+
+  return ret;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/powerpc/tst-cache-ppc-static.c b/sysdeps/powerpc/tst-cache-ppc-static.c
new file mode 100644
index 0000000000..b0c417e822
--- /dev/null
+++ b/sysdeps/powerpc/tst-cache-ppc-static.c
@@ -0,0 +1,20 @@ 
+/* Test if an executable can read from _dl_cache_line_size.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "tst-cache-ppc.c"
+#include "mod-cache-ppc.c"
diff --git a/sysdeps/powerpc/tst-cache-ppc.c b/sysdeps/powerpc/tst-cache-ppc.c
new file mode 100644
index 0000000000..86c7117c43
--- /dev/null
+++ b/sysdeps/powerpc/tst-cache-ppc.c
@@ -0,0 +1,29 @@ 
+/* Test if an executable can read from rtld_global_ro._dl_cache_line_size.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <errno.h>
+
+int test_cache(int *);
+
+static int
+do_test (void)
+{
+  return test_cache(&errno);
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c b/sysdeps/unix/sysv/linux/powerpc/dl-auxv.h
similarity index 60%
rename from sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c
rename to sysdeps/unix/sysv/linux/powerpc/dl-auxv.h
index 5d65bc6303..be2189732a 100644
--- a/sysdeps/unix/sysv/linux/powerpc/dl-sysdep.c
+++ b/sysdeps/unix/sysv/linux/powerpc/dl-auxv.h
@@ -1,5 +1,5 @@ 
-/* Operating system support for run-time dynamic linker.  Linux/PPC version.
-   Copyright (C) 1997-2020 Free Software Foundation, Inc.
+/* Auxiliary vector processing.  Linux/PPC version.
+   Copyright (C) 2020 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,18 +16,15 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <config.h>
 #include <ldsodefs.h>
 
-int __cache_line_size attribute_hidden;
+#if IS_IN (libc) && !defined SHARED
+int GLRO(dl_cache_line_size);
+#endif
 
-/* Scan the Aux Vector for the "Data Cache Block Size" entry.  If found
-   verify that the static extern __cache_line_size is defined by checking
-   for not NULL.  If it is defined then assign the cache block size
-   value to __cache_line_size.  */
+/* Scan the Aux Vector for the "Data Cache Block Size" entry and assign it
+   to dl_cache_line_size.  */
 #define DL_PLATFORM_AUXV						      \
       case AT_DCACHEBSIZE:						      \
-	__cache_line_size = av->a_un.a_val;				      \
+	GLRO(dl_cache_line_size) = av->a_un.a_val;			      \
 	break;
-
-#include <sysdeps/unix/sysv/linux/dl-sysdep.c>
diff --git a/sysdeps/unix/sysv/linux/powerpc/dl-static.c b/sysdeps/unix/sysv/linux/powerpc/dl-static.c
index 59ce4e8972..a77e07b503 100644
--- a/sysdeps/unix/sysv/linux/powerpc/dl-static.c
+++ b/sysdeps/unix/sysv/linux/powerpc/dl-static.c
@@ -30,12 +30,14 @@  _dl_var_init (void *array[])
       DL_AUXV = 1,
       DL_HWCAP = 2,
       DL_HWCAP2 = 3,
+      DL_CACHE_LINE_SIZE = 4
     };
 
   GLRO(dl_pagesize) = *((size_t *) array[DL_PAGESIZE]);
   GLRO(dl_auxv) = (ElfW(auxv_t) *) *((size_t *) array[DL_AUXV]);
   GLRO(dl_hwcap)  = *((unsigned long int *) array[DL_HWCAP]);
   GLRO(dl_hwcap2) = *((unsigned long int *) array[DL_HWCAP2]);
+  GLRO(dl_cache_line_size) = (int) *((int *) array[DL_CACHE_LINE_SIZE]);
 }
 
 #else
@@ -46,6 +48,7 @@  static void *variables[] =
   &GLRO(dl_auxv),
   &GLRO(dl_hwcap),
   &GLRO(dl_hwcap2),
+  &GLRO(dl_cache_line_size)
 };
 
 static void
diff --git a/sysdeps/unix/sysv/linux/powerpc/libc-start.c b/sysdeps/unix/sysv/linux/powerpc/libc-start.c
index 93f8659fa6..fc86d6e234 100644
--- a/sysdeps/unix/sysv/linux/powerpc/libc-start.c
+++ b/sysdeps/unix/sysv/linux/powerpc/libc-start.c
@@ -24,7 +24,6 @@ 
 #include <hwcapinfo.h>
 #endif
 
-int __cache_line_size attribute_hidden;
 /* The main work is done in the generic function.  */
 #define LIBC_START_MAIN generic_start_main
 #define LIBC_START_DISABLE_INLINE
@@ -71,15 +70,12 @@  __libc_start_main (int argc, char **argv,
       rtld_fini = NULL;
     }
 
-  /* Initialize the __cache_line_size variable from the aux vector.  For the
-     static case, we also need _dl_hwcap, _dl_hwcap2 and _dl_platform, so we
-     can call __tcb_parse_hwcap_and_convert_at_platform ().  */
   for (ElfW (auxv_t) * av = auxvec; av->a_type != AT_NULL; ++av)
     switch (av->a_type)
       {
-      case AT_DCACHEBSIZE:
-	__cache_line_size = av->a_un.a_val;
-	break;
+      /* For the static case, we also need _dl_hwcap, _dl_hwcap2 and
+         _dl_platform, so we can call
+         __tcb_parse_hwcap_and_convert_at_platform ().  */
 #ifndef SHARED
       case AT_HWCAP:
 	_dl_hwcap = (unsigned long int) av->a_un.a_val;