From patchwork Wed Apr 20 07:26:56 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alan Modra X-Patchwork-Id: 53066 Return-Path: X-Original-To: patchwork@sourceware.org Delivered-To: patchwork@sourceware.org Received: from server2.sourceware.org (localhost [IPv6:::1]) by sourceware.org (Postfix) with ESMTP id 9993E3857353 for ; Wed, 20 Apr 2022 07:27:22 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 9993E3857353 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1650439642; bh=K2WEeWCyVFSEbLnNd08CPKKvvxRTgULLV/kwl3MPd7s=; h=Date:To:Subject:List-Id:List-Unsubscribe:List-Archive:List-Post: List-Help:List-Subscribe:From:Reply-To:Cc:From; b=mhdU4l01JzZ4p8mB3a3Ciq4ekB4gL0KZPhYGbyqZQRtG7M/TkiGWYbJKADiJHOTTr TQ77QVwy4d0RBRGZC8v5aBrEW27+MZtpkN0cGZ+/bNLeX3h2PW68dzPDJ5F+Sgs8eA 2t15rOKnyEiAX6V/EFQlue5MN7P3TEZsLBbILVec= X-Original-To: libc-alpha@sourceware.org Delivered-To: libc-alpha@sourceware.org Received: from mail-pl1-x62d.google.com (mail-pl1-x62d.google.com [IPv6:2607:f8b0:4864:20::62d]) by sourceware.org (Postfix) with ESMTPS id 2D85D385734D for ; Wed, 20 Apr 2022 07:27:01 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 2D85D385734D Received: by mail-pl1-x62d.google.com with SMTP id q3so1050239plg.3 for ; Wed, 20 Apr 2022 00:27:01 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:date:from:to:cc:subject:message-id:mime-version :content-disposition; bh=K2WEeWCyVFSEbLnNd08CPKKvvxRTgULLV/kwl3MPd7s=; b=XWyd+i06AMh5GEDYCSQqESXtfT2g+ArD7++6SXTmL6P9ue0kJvrUcBqQUYJwDDzSEs gdbSs2GDdqP+cBkJnmAc+K/to2QmJtl9YfMDmJ5Ke6KjB7WT6v00OJJ0d0y0i83EtB7L bq/miBvYAnhcFn7DLmzjjLfT+Rez3bUhNQyBqmwPqDKRJbRqaJoomI04BY4lyfNecqNt M/dBKr7K0sRqydQLXfe8qAA6kWUdEArzStTul7Uk4+gdPLg6GsNrWoeckZ4f5zw5gM3D qAj1NPxGMwGF6utZc6LNjPLzSn3eaamBRA+BDvgyubycH8FBAP/SCmKHUwkDp7rVD27a g3jQ== X-Gm-Message-State: AOAM533n46EV/Xacq4Qp6kDTKuKWwz+CoEyIfzKVRQX7QREq5ue7z5QV 5de0x6TEh/xzx2OB49aM/6ZXkpqvX1o= X-Google-Smtp-Source: ABdhPJyiY1U4KEKRg2eNA57C1ktgHxNzTG94nqb8HpRflwuPu00SiNxNpo7ppio5oHvkIBAA0dUFOQ== X-Received: by 2002:a17:90a:68c3:b0:1cb:a136:76a0 with SMTP id q3-20020a17090a68c300b001cba13676a0mr2888800pjj.101.1650439620222; Wed, 20 Apr 2022 00:27:00 -0700 (PDT) Received: from squeak.grove.modra.org (158.106.96.58.static.exetel.com.au. [58.96.106.158]) by smtp.gmail.com with ESMTPSA id t15-20020a63b70f000000b00381510608e9sm18396219pgf.14.2022.04.20.00.26.58 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Wed, 20 Apr 2022 00:26:59 -0700 (PDT) Received: by squeak.grove.modra.org (Postfix, from userid 1000) id 96B5B1140617; Wed, 20 Apr 2022 16:56:56 +0930 (ACST) Date: Wed, 20 Apr 2022 16:56:56 +0930 To: libc-alpha@sourceware.org Subject: [PATCH] powerpc64: small-model @got@tprel accesses Message-ID: MIME-Version: 1.0 Content-Disposition: inline X-Spam-Status: No, score=-3036.8 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FREEMAIL_FROM, GIT_PATCH_0, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP, T_SCC_BODY_TEXT_LINE, URIBL_BLACK autolearn=ham autolearn_force=no version=3.4.4 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on server2.sourceware.org X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-Patchwork-Original-From: Alan Modra via Libc-alpha From: Alan Modra Reply-To: Alan Modra Cc: Tulio Magno Quites Machado Filho Errors-To: libc-alpha-bounces+patchwork=sourceware.org@sourceware.org Sender: "Libc-alpha" Three powerpc64 assembly files use small-model GOT accesses. This is fine when building libc.so since .got is relatively small, but when compiled for libc.a they may be linked into binaries with a large .got, relying on the linker to lay out .got with entries used by small-model code close to ".TOC.". PowerPC64 linkers will do that, but only 8k entries can be close enough. It's better to support a large .got with @got@tprel@ha and @got@tprel@l addressing. Or even better, since libc.a is compiled with -fpie -ftls-model=initial-exec use tp-relative code suitable for up to 2G of thread local storage. In case anyone thinks more than 2G of TLS is needed (most unlikely considering that TLS is per-thread), see SUPPORT_HUGE_TLS. Note that libc.so must use a GOT indirection to find its own thread variables. Shared libraries can't know where their own .tdata/.tbss is laid out relative to the process thread pointer until run time. The patch also tidies the code a little, in particular scrcasestr avoids reloading of __libc_tsd_LOCALE on a number of code paths, and strcasecmp can now be ENTRY_TOCLESS when @got@tprel relocs are not used. (strcasestr can't because it make calls, some of which can be ifunc.) Tested powerpc64le-linux. diff --git a/sysdeps/powerpc/powerpc64/power7/strcasecmp.S b/sysdeps/powerpc/powerpc64/power7/strcasecmp.S index acc9334b2c..9fafc07010 100644 --- a/sysdeps/powerpc/powerpc64/power7/strcasecmp.S +++ b/sysdeps/powerpc/powerpc64/power7/strcasecmp.S @@ -31,12 +31,15 @@ # define STRCMP strcasecmp #endif -#ifndef USE_IN_EXTENDED_LOCALE_MODEL +#ifdef USE_IN_EXTENDED_LOCALE_MODEL +ENTRY_TOCLESS (__STRCMP) + CALL_MCOUNT 3 +#elif defined SHARED || defined SUPPORT_HUGE_TLS ENTRY (__STRCMP) CALL_MCOUNT 2 #else ENTRY_TOCLESS (__STRCMP) - CALL_MCOUNT 3 + CALL_MCOUNT 2 #endif #define rRTN r3 /* Return value */ @@ -53,12 +56,18 @@ ENTRY_TOCLESS (__STRCMP) #define rLOC r11 /* Default locale address */ cmpd cr7, r3, r4 -#ifndef USE_IN_EXTENDED_LOCALE_MODEL - ld rTMP, __libc_tsd_LOCALE@got@tprel(r2) - add rLOC, rTMP, __libc_tsd_LOCALE@tls - ld rLOC, 0(rLOC) -#else +#ifdef USE_IN_EXTENDED_LOCALE_MODEL mr rLOC, rLOCARG +#elif defined SHARED + ld rTMP, __libc_tsd_LOCALE@got@tprel(r2) + ldx rLOC, rTMP, r13 +#elif defined SUPPORT_HUGE_TLS + addis rTMP, r2, __libc_tsd_LOCALE@got@tprel@ha + ld rTMP, __libc_tsd_LOCALE@got@tprel@l(rTMP) + ldx rLOC, rTMP, r13 +#else + addis rTMP, r13, __libc_tsd_LOCALE@tprel@ha + ld rLOC, __libc_tsd_LOCALE@tprel@l(rTMP) #endif ld rLOC, LOCALE_CTYPE_TOLOWER(rLOC) mr rSTR1, rRTN diff --git a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S index 2fada04cc5..9b5467ac7c 100644 --- a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S +++ b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S @@ -93,7 +93,11 @@ .machine power8 +#if defined SHARED || defined SUPPORT_HUGE_TLS ENTRY (__STRCASECMP) +#else +ENTRY_TOCLESS (__STRCASECMP) +#endif #ifdef USE_AS_STRNCASECMP CALL_MCOUNT 3 #else @@ -114,9 +118,17 @@ ENTRY (__STRCASECMP) cmpd cr7, rRTN, rSTR2 /* Get locale address. */ - ld rTMP, __libc_tsd_LOCALE@got@tprel(r2) - add rLOC, rTMP, __libc_tsd_LOCALE@tls - ld rLOC, 0(rLOC) +#ifdef SHARED + ld rTMP, __libc_tsd_LOCALE@got@tprel(r2) + ldx rLOC, rTMP, r13 +#elif defined SUPPORT_HUGE_TLS + addis rTMP, r2, __libc_tsd_LOCALE@got@tprel@ha + ld rTMP, __libc_tsd_LOCALE@got@tprel@l(rTMP) + ldx rLOC, rTMP, r13 +#else + addis rTMP, r13, __libc_tsd_LOCALE@tprel@ha + ld rLOC, __libc_tsd_LOCALE@tprel@l(rTMP) +#endif mr rSTR1, rRTN li rRTN, 0 diff --git a/sysdeps/powerpc/powerpc64/power8/strcasestr.S b/sysdeps/powerpc/powerpc64/power8/strcasestr.S index 52aef7897e..92c8934167 100644 --- a/sysdeps/powerpc/powerpc64/power8/strcasestr.S +++ b/sysdeps/powerpc/powerpc64/power8/strcasestr.S @@ -107,19 +107,26 @@ ENTRY (STRCASESTR, 4) cmpdi cr7, r6, 0 beq cr7, L(ret_r3) +#ifdef SHARED ld r10, __libc_tsd_LOCALE@got@tprel(r2) - add r9, r10, __libc_tsd_LOCALE@tls - ld r9, 0(r9) - ld r9, LOCALE_CTYPE_TOUPPER(r9) - sldi r10, r6, 2 /* Convert to upper case. */ + ldx r11, r10, r13 +#elif defined SUPPORT_HUGE_TLS + addis r10, r2, __libc_tsd_LOCALE@got@tprel@ha + ld r10, __libc_tsd_LOCALE@got@tprel@l(r10) + ldx r11, r10, r13 +#else + addis r10, r13, __libc_tsd_LOCALE@tprel@ha + ld r11, __libc_tsd_LOCALE@tprel@l(r10) +#endif + + /* Convert to upper case. */ + ld r9, LOCALE_CTYPE_TOUPPER(r11) + sldi r10, r6, 2 lwzx r28, r9, r10 - ld r10, __libc_tsd_LOCALE@got@tprel(r2) - add r11, r10, __libc_tsd_LOCALE@tls - ld r11, 0(r11) - ld r11, LOCALE_CTYPE_TOLOWER(r11) - sldi r10, r6, 2 /* Convert to lower case. */ - lwzx r27, r11, r10 + /* Convert to lower case. */ + ld r9, LOCALE_CTYPE_TOLOWER(r11) + lwzx r27, r9, r10 /* Check if the first char is present. */ mr r4, r27 @@ -171,12 +178,19 @@ L(firstpos): mr r3, r29 /* Locales not matching ASCII for single bytes. */ +#ifdef SHARED ld r10, __libc_tsd_LOCALE@got@tprel(r2) - add r9, r10, __libc_tsd_LOCALE@tls - ld r9, 0(r9) - ld r7, 0(r9) - addi r7, r7, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES - lwz r8, 0(r7) + ldx r11, r10, r13 +#elif defined SUPPORT_HUGE_TLS + addis r10, r2, __libc_tsd_LOCALE@got@tprel@ha + ld r10, __libc_tsd_LOCALE@got@tprel@l(r10) + ldx r11, r10, r13 +#else + addis r10, r13, __libc_tsd_LOCALE@tprel@ha + ld r11, __libc_tsd_LOCALE@tprel@l(r10) +#endif + ld r7, 0(r11) + lwz r8, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(r7) cmpdi cr7, r8, 1 beq cr7, L(bytebybyte) @@ -384,10 +398,7 @@ L(nullchk5): .align 4 L(trailcheck): - ld r10, __libc_tsd_LOCALE@got@tprel(r2) - add r11, r10, __libc_tsd_LOCALE@tls - ld r11, 0(r11) - ld r11, LOCALE_CTYPE_TOLOWER(r11) + ld r9, LOCALE_CTYPE_TOLOWER(r11) L(loop2): lbz r5, 0(r3) /* Load byte from r3. */ lbz r6, 0(r4) /* Load next byte from r4. */ @@ -398,9 +409,9 @@ L(loop2): addi r3, r3, 1 addi r4, r4, 1 /* Increment r4. */ sldi r10, r5, 2 /* Convert to lower case. */ - lwzx r10, r11, r10 + lwzx r10, r9, r10 sldi r7, r6, 2 /* Convert to lower case. */ - lwzx r7, r11, r7 + lwzx r7, r9, r7 cmpw cr7, r7, r10 /* Compare with byte from r4. */ bne cr7, L(begin) b L(loop2) @@ -455,11 +466,19 @@ L(nextpos): mr r29, r3 cmpdi cr7, r3, 0 ble cr7, L(retnull) -L(bytebybyte): +#ifdef SHARED ld r10, __libc_tsd_LOCALE@got@tprel(r2) - add r11, r10, __libc_tsd_LOCALE@tls - ld r11, 0(r11) - ld r11, LOCALE_CTYPE_TOLOWER(r11) + ldx r11, r10, r13 +#elif defined SUPPORT_HUGE_TLS + addis r10, r2, __libc_tsd_LOCALE@got@tprel@ha + ld r10, __libc_tsd_LOCALE@got@tprel@l(r10) + ldx r11, r10, r13 +#else + addis r10, r13, __libc_tsd_LOCALE@tprel@ha + ld r11, __libc_tsd_LOCALE@tprel@l(r10) +#endif +L(bytebybyte): + ld r9, LOCALE_CTYPE_TOLOWER(r11) mr r4, r30 /* Restore r4. */ mr r8, r3 /* Save r3. */ addi r8, r8, 1 @@ -474,9 +493,9 @@ L(loop): cmpdi cr7, r5, 0 /* Is it null? */ beq cr7, L(retnull) /* If yes, return. */ sldi r10, r5, 2 /* Convert to lower case. */ - lwzx r10, r11, r10 + lwzx r10, r9, r10 sldi r7, r6, 2 /* Convert to lower case. */ - lwzx r7, r11, r7 + lwzx r7, r9, r7 cmpw cr7, r7, r10 /* Compare with byte from r4. */ bne cr7, L(loop1) b L(loop)