powerpc64: small-model @got@tprel accesses

Message ID Yl+1wM1iIdoHlZSV@squeak.grove.modra.org
State New
Headers
Series powerpc64: small-model @got@tprel accesses |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent
dj/TryBot-32bit success Build for i686
redhat-pt-bot/TryBot-still_applies warning Patch no longer applies to master

Commit Message

Alan Modra April 20, 2022, 7:26 a.m. UTC
  Three powerpc64 assembly files use small-model GOT accesses.  This is
fine when building libc.so since .got is relatively small, but when
compiled for libc.a they may be linked into binaries with a large
.got, relying on the linker to lay out .got with entries used by
small-model code close to ".TOC.".  PowerPC64 linkers will do that,
but only 8k entries can be close enough.  It's better to support a
large .got with @got@tprel@ha and @got@tprel@l addressing.  Or even
better, since libc.a is compiled with -fpie -ftls-model=initial-exec
use tp-relative code suitable for up to 2G of thread local storage.
In case anyone thinks more than 2G of TLS is needed (most unlikely
considering that TLS is per-thread), see SUPPORT_HUGE_TLS.

Note that libc.so must use a GOT indirection to find its own thread
variables.  Shared libraries can't know where their own .tdata/.tbss
is laid out relative to the process thread pointer until run time.

The patch also tidies the code a little, in particular scrcasestr
avoids reloading of __libc_tsd_LOCALE on a number of code paths, and
strcasecmp can now be ENTRY_TOCLESS when @got@tprel relocs are not
used.  (strcasestr can't because it make calls, some of which can
be ifunc.)

Tested powerpc64le-linux.
  

Patch

diff --git a/sysdeps/powerpc/powerpc64/power7/strcasecmp.S b/sysdeps/powerpc/powerpc64/power7/strcasecmp.S
index acc9334b2c..9fafc07010 100644
--- a/sysdeps/powerpc/powerpc64/power7/strcasecmp.S
+++ b/sysdeps/powerpc/powerpc64/power7/strcasecmp.S
@@ -31,12 +31,15 @@ 
 # define STRCMP   strcasecmp
 #endif
 
-#ifndef USE_IN_EXTENDED_LOCALE_MODEL
+#ifdef USE_IN_EXTENDED_LOCALE_MODEL
+ENTRY_TOCLESS (__STRCMP)
+	CALL_MCOUNT 3
+#elif defined SHARED || defined SUPPORT_HUGE_TLS
 ENTRY (__STRCMP)
 	CALL_MCOUNT 2
 #else
 ENTRY_TOCLESS (__STRCMP)
-	CALL_MCOUNT 3
+	CALL_MCOUNT 2
 #endif
 
 #define rRTN	r3	/* Return value */
@@ -53,12 +56,18 @@  ENTRY_TOCLESS (__STRCMP)
 #define rLOC	r11	/* Default locale address */
 
 	cmpd	cr7, r3, r4
-#ifndef USE_IN_EXTENDED_LOCALE_MODEL
-	ld 	rTMP, __libc_tsd_LOCALE@got@tprel(r2)
-	add 	rLOC, rTMP, __libc_tsd_LOCALE@tls
-	ld	rLOC, 0(rLOC)
-#else
+#ifdef USE_IN_EXTENDED_LOCALE_MODEL
 	mr	rLOC, rLOCARG
+#elif defined SHARED
+	ld	rTMP, __libc_tsd_LOCALE@got@tprel(r2)
+	ldx	rLOC, rTMP, r13
+#elif defined SUPPORT_HUGE_TLS
+	addis	rTMP, r2, __libc_tsd_LOCALE@got@tprel@ha
+	ld	rTMP, __libc_tsd_LOCALE@got@tprel@l(rTMP)
+	ldx	rLOC, rTMP, r13
+#else
+	addis 	rTMP, r13, __libc_tsd_LOCALE@tprel@ha
+	ld	rLOC, __libc_tsd_LOCALE@tprel@l(rTMP)
 #endif
 	ld	rLOC, LOCALE_CTYPE_TOLOWER(rLOC)
 	mr	rSTR1, rRTN
diff --git a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
index 2fada04cc5..9b5467ac7c 100644
--- a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
+++ b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S
@@ -93,7 +93,11 @@ 
 
 	.machine  power8
 
+#if defined SHARED || defined SUPPORT_HUGE_TLS
 ENTRY (__STRCASECMP)
+#else
+ENTRY_TOCLESS (__STRCASECMP)
+#endif
 #ifdef USE_AS_STRNCASECMP
 	CALL_MCOUNT 3
 #else
@@ -114,9 +118,17 @@  ENTRY (__STRCASECMP)
 	cmpd	cr7, rRTN, rSTR2
 
 	/* Get locale address.  */
-	ld 	rTMP, __libc_tsd_LOCALE@got@tprel(r2)
-	add 	rLOC, rTMP, __libc_tsd_LOCALE@tls
-	ld	rLOC, 0(rLOC)
+#ifdef SHARED
+	ld	rTMP, __libc_tsd_LOCALE@got@tprel(r2)
+	ldx	rLOC, rTMP, r13
+#elif defined SUPPORT_HUGE_TLS
+	addis	rTMP, r2, __libc_tsd_LOCALE@got@tprel@ha
+	ld	rTMP, __libc_tsd_LOCALE@got@tprel@l(rTMP)
+	ldx	rLOC, rTMP, r13
+#else
+	addis 	rTMP, r13, __libc_tsd_LOCALE@tprel@ha
+	ld	rLOC, __libc_tsd_LOCALE@tprel@l(rTMP)
+#endif
 
 	mr	rSTR1, rRTN
 	li	rRTN, 0
diff --git a/sysdeps/powerpc/powerpc64/power8/strcasestr.S b/sysdeps/powerpc/powerpc64/power8/strcasestr.S
index 52aef7897e..92c8934167 100644
--- a/sysdeps/powerpc/powerpc64/power8/strcasestr.S
+++ b/sysdeps/powerpc/powerpc64/power8/strcasestr.S
@@ -107,19 +107,26 @@  ENTRY (STRCASESTR, 4)
 	cmpdi	cr7, r6, 0
 	beq	cr7, L(ret_r3)
 
+#ifdef SHARED
 	ld	r10, __libc_tsd_LOCALE@got@tprel(r2)
-	add	r9, r10, __libc_tsd_LOCALE@tls
-	ld	r9, 0(r9)
-	ld	r9, LOCALE_CTYPE_TOUPPER(r9)
-	sldi	r10, r6, 2		/* Convert to upper case.  */
+	ldx	r11, r10, r13
+#elif defined SUPPORT_HUGE_TLS
+	addis	r10, r2, __libc_tsd_LOCALE@got@tprel@ha
+	ld	r10, __libc_tsd_LOCALE@got@tprel@l(r10)
+	ldx	r11, r10, r13
+#else
+	addis 	r10, r13, __libc_tsd_LOCALE@tprel@ha
+	ld	r11, __libc_tsd_LOCALE@tprel@l(r10)
+#endif
+
+	/* Convert to upper case.  */
+	ld	r9, LOCALE_CTYPE_TOUPPER(r11)
+	sldi	r10, r6, 2
 	lwzx	r28, r9, r10
 
-	ld	r10, __libc_tsd_LOCALE@got@tprel(r2)
-	add	r11, r10, __libc_tsd_LOCALE@tls
-	ld	r11, 0(r11)
-	ld	r11, LOCALE_CTYPE_TOLOWER(r11)
-	sldi	r10, r6, 2              /* Convert to lower case.  */
-	lwzx	r27, r11, r10
+	/* Convert to lower case.  */
+	ld	r9, LOCALE_CTYPE_TOLOWER(r11)
+	lwzx	r27, r9, r10
 
 	/* Check if the first char is present.  */
 	mr	r4, r27
@@ -171,12 +178,19 @@  L(firstpos):
 	mr	r3, r29
 
 	/* Locales not matching ASCII for single bytes.  */
+#ifdef SHARED
 	ld	r10, __libc_tsd_LOCALE@got@tprel(r2)
-	add	r9, r10, __libc_tsd_LOCALE@tls
-	ld	r9, 0(r9)
-	ld	r7, 0(r9)
-	addi	r7, r7, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES
-	lwz	r8, 0(r7)
+	ldx	r11, r10, r13
+#elif defined SUPPORT_HUGE_TLS
+	addis	r10, r2, __libc_tsd_LOCALE@got@tprel@ha
+	ld	r10, __libc_tsd_LOCALE@got@tprel@l(r10)
+	ldx	r11, r10, r13
+#else
+	addis 	r10, r13, __libc_tsd_LOCALE@tprel@ha
+	ld	r11, __libc_tsd_LOCALE@tprel@l(r10)
+#endif
+	ld	r7, 0(r11)
+	lwz	r8, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(r7)
 	cmpdi	cr7, r8, 1
 	beq	cr7, L(bytebybyte)
 
@@ -384,10 +398,7 @@  L(nullchk5):
 
 	.align	4
 L(trailcheck):
-	ld	r10, __libc_tsd_LOCALE@got@tprel(r2)
-	add	r11, r10, __libc_tsd_LOCALE@tls
-	ld	r11, 0(r11)
-	ld	r11, LOCALE_CTYPE_TOLOWER(r11)
+	ld	r9, LOCALE_CTYPE_TOLOWER(r11)
 L(loop2):
 	lbz	r5, 0(r3)               /* Load byte from r3.  */
 	lbz	r6, 0(r4)               /* Load next byte from r4.  */
@@ -398,9 +409,9 @@  L(loop2):
 	addi	r3, r3, 1
 	addi	r4, r4, 1               /* Increment r4.  */
 	sldi	r10, r5, 2              /* Convert to lower case.  */
-	lwzx	r10, r11, r10
+	lwzx	r10, r9, r10
 	sldi	r7, r6, 2               /* Convert to lower case.  */
-	lwzx	r7, r11, r7
+	lwzx	r7, r9, r7
 	cmpw	cr7, r7, r10            /* Compare with byte from r4.  */
 	bne	cr7, L(begin)
 	b	L(loop2)
@@ -455,11 +466,19 @@  L(nextpos):
 	mr	r29, r3
 	cmpdi 	cr7, r3, 0
 	ble 	cr7, L(retnull)
-L(bytebybyte):
+#ifdef SHARED
 	ld	r10, __libc_tsd_LOCALE@got@tprel(r2)
-	add	r11, r10, __libc_tsd_LOCALE@tls
-	ld	r11, 0(r11)
-	ld	r11, LOCALE_CTYPE_TOLOWER(r11)
+	ldx	r11, r10, r13
+#elif defined SUPPORT_HUGE_TLS
+	addis	r10, r2, __libc_tsd_LOCALE@got@tprel@ha
+	ld	r10, __libc_tsd_LOCALE@got@tprel@l(r10)
+	ldx	r11, r10, r13
+#else
+	addis 	r10, r13, __libc_tsd_LOCALE@tprel@ha
+	ld	r11, __libc_tsd_LOCALE@tprel@l(r10)
+#endif
+L(bytebybyte):
+	ld	r9, LOCALE_CTYPE_TOLOWER(r11)
 	mr	r4, r30                 /* Restore r4.  */
 	mr	r8, r3                  /* Save r3.  */
 	addi	r8, r8, 1
@@ -474,9 +493,9 @@  L(loop):
 	cmpdi 	cr7, r5, 0              /* Is it null?  */
 	beq 	cr7, L(retnull)         /* If yes, return.  */
 	sldi	r10, r5, 2              /* Convert to lower case.  */
-	lwzx	r10, r11, r10
+	lwzx	r10, r9, r10
 	sldi	r7, r6, 2               /* Convert to lower case.  */
-	lwzx	r7, r11, r7
+	lwzx	r7, r9, r7
 	cmpw	cr7, r7, r10            /* Compare with byte from r4.  */
 	bne 	cr7, L(loop1)
 	b	L(loop)