From patchwork Thu Feb 10 19:58:37 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Adhemerval Zanella X-Patchwork-Id: 51030 Return-Path: X-Original-To: patchwork@sourceware.org Delivered-To: patchwork@sourceware.org Received: from server2.sourceware.org (localhost [IPv6:::1]) by sourceware.org (Postfix) with ESMTP id 988FB385840E for ; Thu, 10 Feb 2022 20:06:58 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 988FB385840E DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1644523618; bh=H5EsMQeOltNOy6JkihNotVFTgekkfOUZOAlVxQGNgLE=; h=To:Subject:Date:In-Reply-To:References:List-Id:List-Unsubscribe: List-Archive:List-Post:List-Help:List-Subscribe:From:Reply-To: From; b=x+ne4/sO8REgLnNDbMxhv5aStEn2NBTxXGYRiqEh+DxEzvjmjTg4wvhujxTOxwENr H+u0YO60507TLn0y4B/ZPmWgGXBlivmN9nSt9zM9Ea+1rouWfXYw0WkB9M7bTf0a08 391hQqjwppDoQ0fLk39eRuHHGOtyFkvG2Rb2esr8= X-Original-To: libc-alpha@sourceware.org Delivered-To: libc-alpha@sourceware.org Received: from mail-oi1-x231.google.com (mail-oi1-x231.google.com [IPv6:2607:f8b0:4864:20::231]) by sourceware.org (Postfix) with ESMTPS id BBF403858433 for ; Thu, 10 Feb 2022 19:59:00 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org BBF403858433 Received: by mail-oi1-x231.google.com with SMTP id r27so7183134oiw.4 for ; Thu, 10 Feb 2022 11:59:00 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=H5EsMQeOltNOy6JkihNotVFTgekkfOUZOAlVxQGNgLE=; b=6PjutytgNZaSQyZakDXhj0Mw9bNM5SPcK1GMY8Q733B7oTdumpMbJKRk/LrdupbeDS hwyyr7KH3dbBBDf5SfPlT0RTK2MxpasDni6aYP0KsY7rUCmMgwOdbgt570Ija7/+0YDC QVJujjajV37X5qsDlwKER5+5bnSpKTfi6gXuu+DwjyDl9KN2KlSD1EfeJ9gdAbYM2dsB D7P19zKiLm56SJFSlktmQct6MQEH8N9iBGrgdCcVc62TRZgQR+WKzeZXbSvi426x/U5c teHmfpe0Z1ju8d5j1wxT5f8PVC6hJTyKGNp+GZBWAr1a4125kB1gb50VB/62f8YUUU+L Nr+g== X-Gm-Message-State: AOAM5328lfT649tzT8IBaZwzUL+BZm5glhlWg0FRCtlQ5owIWrI0QmDH YIQr8OiW+KjX2mbHET05St02hACj44fPXw== X-Google-Smtp-Source: ABdhPJxzElrnAQhgwZvx+gRxx7GvRPXR6ZnI1VZzaM4gCGOnLo1BDez8+rBZiXEyyGM4XqIOHPFX8A== X-Received: by 2002:a05:6808:1508:: with SMTP id u8mr1722318oiw.4.1644523139869; Thu, 10 Feb 2022 11:58:59 -0800 (PST) Received: from birita.. ([2804:431:c7ca:733:a925:765e:3799:3d34]) by smtp.gmail.com with ESMTPSA id bg34sm8859219oob.14.2022.02.10.11.58.58 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Thu, 10 Feb 2022 11:58:59 -0800 (PST) To: libc-alpha@sourceware.org, Wilco Dijkstra , "H . J . Lu" , Noah Goldstein Subject: [PATCH 11/12] i686: Remove bzero optimizations Date: Thu, 10 Feb 2022 16:58:37 -0300 Message-Id: <20220210195838.1036012-12-adhemerval.zanella@linaro.org> X-Mailer: git-send-email 2.32.0 In-Reply-To: <20220210195838.1036012-1-adhemerval.zanella@linaro.org> References: <20220210195838.1036012-1-adhemerval.zanella@linaro.org> MIME-Version: 1.0 X-Spam-Status: No, score=-11.5 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, GIT_PATCH_0, KAM_SHORT, KAM_STOCKGEN, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP, T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.4 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on server2.sourceware.org X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-Patchwork-Original-From: Adhemerval Zanella via Libc-alpha From: Adhemerval Zanella Reply-To: Adhemerval Zanella Errors-To: libc-alpha-bounces+patchwork=sourceware.org@sourceware.org Sender: "Libc-alpha" The symbol is not present in current POSIX specification and compiler already generates memset call. --- sysdeps/i386/bzero.S | 5 --- sysdeps/i386/i586/bzero.S | 4 -- sysdeps/i386/i586/memset.S | 16 ++------ sysdeps/i386/i686/bzero.S | 4 -- sysdeps/i386/i686/memset.S | 23 +++--------- sysdeps/i386/i686/multiarch/Makefile | 6 +-- sysdeps/i386/i686/multiarch/bzero-ia32.S | 37 ------------------- sysdeps/i386/i686/multiarch/bzero-sse2-rep.S | 3 -- sysdeps/i386/i686/multiarch/bzero-sse2.S | 3 -- sysdeps/i386/i686/multiarch/bzero.c | 32 ---------------- sysdeps/i386/i686/multiarch/ifunc-impl-list.c | 8 ---- sysdeps/i386/i686/multiarch/memset-sse2-rep.S | 24 +++--------- sysdeps/i386/i686/multiarch/memset-sse2.S | 24 +++--------- sysdeps/i386/memset.S | 14 +------ 14 files changed, 22 insertions(+), 181 deletions(-) delete mode 100644 sysdeps/i386/bzero.S delete mode 100644 sysdeps/i386/i586/bzero.S delete mode 100644 sysdeps/i386/i686/bzero.S delete mode 100644 sysdeps/i386/i686/multiarch/bzero-ia32.S delete mode 100644 sysdeps/i386/i686/multiarch/bzero-sse2-rep.S delete mode 100644 sysdeps/i386/i686/multiarch/bzero-sse2.S delete mode 100644 sysdeps/i386/i686/multiarch/bzero.c diff --git a/sysdeps/i386/bzero.S b/sysdeps/i386/bzero.S deleted file mode 100644 index c8dd47b4da..0000000000 --- a/sysdeps/i386/bzero.S +++ /dev/null @@ -1,5 +0,0 @@ -#define USE_AS_BZERO -#define memset __bzero -#include "memset.S" - -weak_alias (__bzero, bzero) diff --git a/sysdeps/i386/i586/bzero.S b/sysdeps/i386/i586/bzero.S deleted file mode 100644 index 2a106719a4..0000000000 --- a/sysdeps/i386/i586/bzero.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_BZERO -#define memset __bzero -#include -weak_alias (__bzero, bzero) diff --git a/sysdeps/i386/i586/memset.S b/sysdeps/i386/i586/memset.S index ae09c3b40a..672af41398 100644 --- a/sysdeps/i386/i586/memset.S +++ b/sysdeps/i386/i586/memset.S @@ -23,15 +23,11 @@ #define PARMS 4+4 /* space for 1 saved reg */ #define RTN PARMS #define DEST RTN -#ifdef USE_AS_BZERO -# define LEN DEST+4 -#else -# define CHR DEST+4 -# define LEN CHR+4 -#endif +#define CHR DEST+4 +#define LEN CHR+4 .text -#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO +#if defined SHARED && IS_IN (libc) ENTRY (__memset_chk) movl 12(%esp), %eax cmpl %eax, 16(%esp) @@ -46,15 +42,11 @@ ENTRY (memset) movl DEST(%esp), %edi cfi_rel_offset (edi, 0) movl LEN(%esp), %edx -#ifdef USE_AS_BZERO - xorl %eax, %eax /* we fill with 0 */ -#else movb CHR(%esp), %al movb %al, %ah movl %eax, %ecx shll $16, %eax movw %cx, %ax -#endif cld /* If less than 36 bytes to write, skip tricky code (it wouldn't work). */ @@ -100,10 +92,8 @@ L(2): shrl $2, %ecx /* convert byte count to longword count */ rep stosb -#ifndef USE_AS_BZERO /* Load result (only if used as memset). */ movl DEST(%esp), %eax /* start address of destination is result */ -#endif popl %edi cfi_adjust_cfa_offset (-4) cfi_restore (edi) diff --git a/sysdeps/i386/i686/bzero.S b/sysdeps/i386/i686/bzero.S deleted file mode 100644 index c7898f18e0..0000000000 --- a/sysdeps/i386/i686/bzero.S +++ /dev/null @@ -1,4 +0,0 @@ -#define USE_AS_BZERO -#define memset __bzero -#include -weak_alias (__bzero, bzero) diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S index fd5b26aeae..3cb86c016d 100644 --- a/sysdeps/i386/i686/memset.S +++ b/sysdeps/i386/i686/memset.S @@ -21,18 +21,13 @@ #include "asm-syntax.h" #define PARMS 4+4 /* space for 1 saved reg */ -#ifdef USE_AS_BZERO -# define DEST PARMS -# define LEN DEST+4 -#else -# define RTN PARMS -# define DEST RTN -# define CHR DEST+4 -# define LEN CHR+4 -#endif +#define RTN PARMS +#define DEST RTN +#define CHR DEST+4 +#define LEN CHR+4 .text -#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO +#if defined SHARED && IS_IN (libc) ENTRY_CHK (__memset_chk) movl 12(%esp), %eax cmpl %eax, 16(%esp) @@ -46,11 +41,7 @@ ENTRY (memset) cfi_adjust_cfa_offset (4) movl DEST(%esp), %edx movl LEN(%esp), %ecx -#ifdef USE_AS_BZERO - xorl %eax, %eax /* fill with 0 */ -#else movzbl CHR(%esp), %eax -#endif jecxz 1f movl %edx, %edi cfi_rel_offset (edi, 0) @@ -70,9 +61,7 @@ ENTRY (memset) 2: movl %ecx, %edx shrl $2, %ecx andl $3, %edx -#ifndef USE_AS_BZERO imul $0x01010101, %eax -#endif rep stosl movl %edx, %ecx @@ -80,9 +69,7 @@ ENTRY (memset) stosb 1: -#ifndef USE_AS_BZERO movl DEST(%esp), %eax /* start address of destination is result */ -#endif popl %edi cfi_adjust_cfa_offset (-4) cfi_restore (edi) diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile index 02fa02658e..9fe5ea8639 100644 --- a/sysdeps/i386/i686/multiarch/Makefile +++ b/sysdeps/i386/i686/multiarch/Makefile @@ -1,9 +1,9 @@ ifeq ($(subdir),string) gen-as-const-headers += locale-defines.sym -sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \ +sysdep_routines += memset-sse2 memcpy-ssse3 mempcpy-ssse3 \ memmove-ssse3 memcpy-ssse3-rep mempcpy-ssse3-rep \ memmove-ssse3-rep \ - memset-sse2-rep bzero-sse2-rep strcmp-ssse3 \ + memset-sse2-rep strcmp-ssse3 \ strcmp-sse4 strncmp-c strncmp-ssse3 strncmp-sse4 \ memcmp-ssse3 memcmp-sse4 varshift \ strlen-sse2 strlen-sse2-bsf strncpy-c strcpy-ssse3 \ @@ -21,7 +21,7 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \ memcpy-sse2-unaligned \ mempcpy-sse2-unaligned memmove-sse2-unaligned \ strcspn-c strpbrk-c strspn-c \ - bzero-ia32 rawmemchr-ia32 \ + rawmemchr-ia32 \ memchr-ia32 memcmp-ia32 memcpy-ia32 memmove-ia32 \ mempcpy-ia32 memset-ia32 strcat-ia32 strchr-ia32 \ strrchr-ia32 strcpy-ia32 strcmp-ia32 strcspn-ia32 \ diff --git a/sysdeps/i386/i686/multiarch/bzero-ia32.S b/sysdeps/i386/i686/multiarch/bzero-ia32.S deleted file mode 100644 index 96afe9bad1..0000000000 --- a/sysdeps/i386/i686/multiarch/bzero-ia32.S +++ /dev/null @@ -1,37 +0,0 @@ -/* bzero optimized for i686. - Copyright (C) 2017-2022 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include - -#if IS_IN (libc) -# define __bzero __bzero_ia32 - -# ifdef SHARED -# undef libc_hidden_builtin_def -/* IFUNC doesn't work with the hidden functions in shared library since - they will be called without setting up EBX needed for PLT which is - used by IFUNC. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI___bzero; __GI___bzero = __bzero -# endif - -# undef weak_alias -# define weak_alias(original, alias) - -# include -#endif diff --git a/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S b/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S deleted file mode 100644 index 507b288bb3..0000000000 --- a/sysdeps/i386/i686/multiarch/bzero-sse2-rep.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_BZERO -#define __memset_sse2_rep __bzero_sse2_rep -#include "memset-sse2-rep.S" diff --git a/sysdeps/i386/i686/multiarch/bzero-sse2.S b/sysdeps/i386/i686/multiarch/bzero-sse2.S deleted file mode 100644 index 8d04512e4e..0000000000 --- a/sysdeps/i386/i686/multiarch/bzero-sse2.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_BZERO -#define __memset_sse2 __bzero_sse2 -#include "memset-sse2.S" diff --git a/sysdeps/i386/i686/multiarch/bzero.c b/sysdeps/i386/i686/multiarch/bzero.c deleted file mode 100644 index 7fd0ddd576..0000000000 --- a/sysdeps/i386/i686/multiarch/bzero.c +++ /dev/null @@ -1,32 +0,0 @@ -/* Multiple versions of bzero. - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2017-2022 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -/* Define multiple versions only for the definition in libc. */ -#if IS_IN (libc) -# define bzero __redirect_bzero -# include -# undef bzero - -# define SYMBOL_NAME bzero -# include "ifunc-memset.h" - -libc_ifunc_redirected (__redirect_bzero, __bzero, IFUNC_SELECTOR ()); - -weak_alias (__bzero, bzero) -#endif diff --git a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c index 5c7a42dc97..c014f52bf9 100644 --- a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c +++ b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c @@ -36,14 +36,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, size_t i = 0; - /* Support sysdeps/i386/i686/multiarch/bzero.S. */ - IFUNC_IMPL (i, name, bzero, - IFUNC_IMPL_ADD (array, i, bzero, CPU_FEATURE_USABLE (SSE2), - __bzero_sse2_rep) - IFUNC_IMPL_ADD (array, i, bzero, CPU_FEATURE_USABLE (SSE2), - __bzero_sse2) - IFUNC_IMPL_ADD (array, i, bzero, 1, __bzero_ia32)) - /* Support sysdeps/i386/i686/multiarch/memchr.S. */ IFUNC_IMPL (i, name, memchr, IFUNC_IMPL_ADD (array, i, memchr, CPU_FEATURE_USABLE (SSE2), diff --git a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S b/sysdeps/i386/i686/multiarch/memset-sse2-rep.S index 37a10575e7..28df7836e0 100644 --- a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S +++ b/sysdeps/i386/i686/multiarch/memset-sse2-rep.S @@ -32,16 +32,10 @@ #define PUSH(REG) pushl REG; CFI_PUSH (REG) #define POP(REG) popl REG; CFI_POP (REG) -#ifdef USE_AS_BZERO -# define DEST PARMS -# define LEN DEST+4 -# define SETRTNVAL -#else -# define DEST PARMS -# define CHR DEST+4 -# define LEN CHR+4 -# define SETRTNVAL movl DEST(%esp), %eax -#endif +#define DEST PARMS +#define CHR DEST+4 +#define LEN CHR+4 +#define SETRTNVAL movl DEST(%esp), %eax #ifdef PIC # define ENTRANCE PUSH (%ebx); @@ -78,7 +72,7 @@ #endif .section .text.sse2,"ax",@progbits -#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO +#if defined SHARED && IS_IN (libc) ENTRY (__memset_chk_sse2_rep) movl 12(%esp), %eax cmpl %eax, 16(%esp) @@ -89,16 +83,12 @@ ENTRY (__memset_sse2_rep) ENTRANCE movl LEN(%esp), %ecx -#ifdef USE_AS_BZERO - xor %eax, %eax -#else movzbl CHR(%esp), %eax movb %al, %ah /* Fill the whole EAX with pattern. */ movl %eax, %edx shl $16, %eax or %edx, %eax -#endif movl DEST(%esp), %edx cmp $32, %ecx jae L(32bytesormore) @@ -228,12 +218,8 @@ L(write_3bytes): /* ECX > 32 and EDX is 4 byte aligned. */ L(32bytesormore): /* Fill xmm0 with the pattern. */ -#ifdef USE_AS_BZERO - pxor %xmm0, %xmm0 -#else movd %eax, %xmm0 pshufd $0, %xmm0, %xmm0 -#endif testl $0xf, %edx jz L(aligned_16) /* ECX > 32 and EDX is not 16 byte aligned. */ diff --git a/sysdeps/i386/i686/multiarch/memset-sse2.S b/sysdeps/i386/i686/multiarch/memset-sse2.S index 455519c7ac..4e8414fd51 100644 --- a/sysdeps/i386/i686/multiarch/memset-sse2.S +++ b/sysdeps/i386/i686/multiarch/memset-sse2.S @@ -32,16 +32,10 @@ #define PUSH(REG) pushl REG; CFI_PUSH (REG) #define POP(REG) popl REG; CFI_POP (REG) -#ifdef USE_AS_BZERO -# define DEST PARMS -# define LEN DEST+4 -# define SETRTNVAL -#else -# define DEST PARMS -# define CHR DEST+4 -# define LEN CHR+4 -# define SETRTNVAL movl DEST(%esp), %eax -#endif +#define DEST PARMS +#define CHR DEST+4 +#define LEN CHR+4 +#define SETRTNVAL movl DEST(%esp), %eax #ifdef PIC # define ENTRANCE PUSH (%ebx); @@ -78,7 +72,7 @@ #endif .section .text.sse2,"ax",@progbits -#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO +#if defined SHARED && IS_IN (libc) ENTRY (__memset_chk_sse2) movl 12(%esp), %eax cmpl %eax, 16(%esp) @@ -89,16 +83,12 @@ ENTRY (__memset_sse2) ENTRANCE movl LEN(%esp), %ecx -#ifdef USE_AS_BZERO - xor %eax, %eax -#else movzbl CHR(%esp), %eax movb %al, %ah /* Fill the whole EAX with pattern. */ movl %eax, %edx shl $16, %eax or %edx, %eax -#endif movl DEST(%esp), %edx cmp $32, %ecx jae L(32bytesormore) @@ -228,12 +218,8 @@ L(write_3bytes): /* ECX > 32 and EDX is 4 byte aligned. */ L(32bytesormore): /* Fill xmm0 with the pattern. */ -#ifdef USE_AS_BZERO - pxor %xmm0, %xmm0 -#else movd %eax, %xmm0 pshufd $0, %xmm0, %xmm0 -#endif testl $0xf, %edx jz L(aligned_16) /* ECX > 32 and EDX is not 16 byte aligned. */ diff --git a/sysdeps/i386/memset.S b/sysdeps/i386/memset.S index f470511b64..db2753eb2f 100644 --- a/sysdeps/i386/memset.S +++ b/sysdeps/i386/memset.S @@ -30,15 +30,11 @@ #define POP(REG) popl REG; CFI_POP (REG) #define STR1 8 -#ifdef USE_AS_BZERO -#define N STR1+4 -#else #define STR2 STR1+4 #define N STR2+4 -#endif .text -#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO +#if defined SHARED && IS_IN (libc) ENTRY (__memset_chk) movl 12(%esp), %eax cmpl %eax, 16(%esp) @@ -49,20 +45,12 @@ ENTRY (memset) PUSH (%edi) movl N(%esp), %ecx movl STR1(%esp), %edi -#ifdef USE_AS_BZERO - xor %eax, %eax -#else movzbl STR2(%esp), %eax mov %edi, %edx -#endif rep stosb -#ifndef USE_AS_BZERO mov %edx, %eax -#endif POP (%edi) ret END (memset) -#ifndef USE_AS_BZERO libc_hidden_builtin_def (memset) -#endif