From patchwork Fri Jan 6 14:26:03 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Adhemerval Zanella Netto X-Patchwork-Id: 18807 X-Patchwork-Delegate: tuliom@linux.vnet.ibm.com Received: (qmail 76638 invoked by alias); 6 Jan 2017 14:26:22 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 76626 invoked by uid 89); 6 Jan 2017 14:26:21 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=0.2 required=5.0 tests=AWL, BAYES_00, KAM_ASCII_DIVIDERS, RCVD_IN_DNSWL_NONE, RCVD_IN_SORBS_SPAM, SPF_PASS, TVD_SUBJ_WIPE_DEBT autolearn=no version=3.3.2 spammy=!!!, needle, Lesser, strcspn X-HELO: mail-yw0-f175.google.com X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:subject:date:message-id; bh=Hdy16x0uOKlXZjXm+rE2LNQmQbSSYuxO51Gc2Oue7oU=; b=krm36KnbAFGZOE2HaYF1HALlvnwpsgYTzPTeUeC/0DJFzPWMZ8zp5QJk9tyDeo5KXL bp/bo+DRbTRXpOoLenGDOwiUt/Ju1UZ5PckRKaX6wbhpfxomfJMXhl1AoAr3aZ+Xcg5A HBm1xCV9AAgc8HedX9XmHSAiWCIOokW9Al0BarZ9QvEywVsXkMuhWC4DbUDJgxnilxsh K2P/9NldZh3oaFTAuByyFbtVkNH2IpzTIwH5q5SapdAORJa42+IHz/4pvDd1dml6PKII v4pP/n2oyEIeB0BOgSEQPiJ3COIh0EokQJ1hVgV/QYqTLRFFBPTAVEU6jn95OThzXD1n DRVw== X-Gm-Message-State: AIkVDXIHclShiFUSwoDF2sIdeV0qQvNrJPaTbtTYlJUzUN39HBzzuW74pumjFi/bhesmSz/L X-Received: by 10.129.4.130 with SMTP id 124mr72811370ywe.333.1483712768678; Fri, 06 Jan 2017 06:26:08 -0800 (PST) From: Adhemerval Zanella To: libc-alpha@sourceware.org Subject: [PATCH] Remove i686, x86_64, and powerpc strtok implementations Date: Fri, 6 Jan 2017 12:26:03 -0200 Message-Id: <1483712763-27890-1-git-send-email-adhemerval.zanella@linaro.org> Based on comments on previous attempt to address BZ#16640 [1], the idea is not support invalid use of strtok (the original bug report proposal). This leader to a new strtok optimized strtok implementation [2]. The idea of this patch is to fix BZ#16640 to align all the implementations to a same contract. However, with newer strtok code it is better to get remove the old assembly ones instead of fix them. For x86 is a gain in all cases since the new implementation can potentially use sse2/sse42 implementation for strspn and strcspn. This shows a better performance on both i686 and x86_64 using the string benchtests. On powerpc64 the gains are mixed, where only for larger inputs or keys some gains are showns (based on benchtest it seems that it shows some gains for keys larger than 10 and inputs larger than 32). I would prefer to remove the optimized implementation based on first code simplicity and second because some more gain could be optimized using a better optimized strcspn/strspn code (as for x86). However if powerpc arch maintainers prefer I can send a v2 with the assembly code adjusted instead. Checked on x86_64-linux-gnu, i686-linux-gnu, and powerpc64le-linux-gnu. [BZ #16640] * sysdeps/i386/i686/strtok.S: Remove file. * sysdeps/i386/i686/strtok_r.S: Likewise. * sysdeps/i386/strtok.S: Likewise. * sysdeps/i386/strtok_r.S: Likewise. * sysdeps/powerpc/powerpc64/strtok.S: Likewise. * sysdeps/powerpc/powerpc64/strtok_r.S: Likewise. * sysdeps/x86_64/strtok.S: Likewise. * sysdeps/x86_64/strtok_r.S: Likewise. [1] https://sourceware.org/ml/libc-alpha/2016-10/msg00411.html [2] https://sourceware.org/ml/libc-alpha/2016-12/msg00461.html --- ChangeLog | 12 ++ sysdeps/i386/i686/strtok.S | 244 ------------------------ sysdeps/i386/i686/strtok_r.S | 5 - sysdeps/i386/strtok.S | 358 ----------------------------------- sysdeps/i386/strtok_r.S | 5 - sysdeps/powerpc/powerpc64/strtok.S | 226 ---------------------- sysdeps/powerpc/powerpc64/strtok_r.S | 24 --- sysdeps/x86_64/strtok.S | 208 -------------------- sysdeps/x86_64/strtok_r.S | 5 - 9 files changed, 12 insertions(+), 1075 deletions(-) delete mode 100644 sysdeps/i386/i686/strtok.S delete mode 100644 sysdeps/i386/i686/strtok_r.S delete mode 100644 sysdeps/i386/strtok.S delete mode 100644 sysdeps/i386/strtok_r.S delete mode 100644 sysdeps/powerpc/powerpc64/strtok.S delete mode 100644 sysdeps/powerpc/powerpc64/strtok_r.S delete mode 100644 sysdeps/x86_64/strtok.S delete mode 100644 sysdeps/x86_64/strtok_r.S diff --git a/sysdeps/i386/i686/strtok.S b/sysdeps/i386/i686/strtok.S deleted file mode 100644 index 1a24bc1..0000000 --- a/sysdeps/i386/i686/strtok.S +++ /dev/null @@ -1,244 +0,0 @@ -/* strtok (str, delim) -- Return next DELIM separated token from STR. - For Intel 80686. - Copyright (C) 1998-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper , 1998. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include "asm-syntax.h" - -/* This file can be used for three variants of the strtok function: - - strtok: - INPUT PARAMETER: - str (sp + 4) - delim (sp + 8) - - strtok_r: - INPUT PARAMETER: - str (sp + 4) - delim (sp + 8) - save_ptr (sp + 12) - - We do a common implementation here. */ - -#ifdef USE_AS_STRTOK_R -# define SAVE_PTR 0(%ecx) -#else - .bss - .local save_ptr - .type save_ptr, @object - .size save_ptr, 4 -save_ptr: - .space 4 - -# ifdef PIC -# define SAVE_PTR save_ptr@GOTOFF(%ebx) -# else -# define SAVE_PTR save_ptr -# endif - -# define FUNCTION strtok -#endif - -#if !defined USE_AS_STRTOK_R && defined PIC -# define PARMS 4+256+4 /* space for table and saved PIC register */ -#else -# define PARMS 4+256 /* space for table */ -#endif -#define RTN PARMS -#define STR RTN -#define DELIM STR+4 -#ifdef USE_AS_STRTOK_R -# define SAVE DELIM+4 -#endif - - .text - -#if !defined USE_AS_STRTOK_R && defined PIC -0: movl (%esp), %ebx - ret -#endif - -ENTRY (FUNCTION) - -#if !defined USE_AS_STRTOK_R && defined PIC - pushl %ebx /* Save PIC register. */ - cfi_adjust_cfa_offset (4) - cfi_rel_offset (ebx, 0) - call 0b - addl $_GLOBAL_OFFSET_TABLE_, %ebx -#endif - - /* First we create a table with flags for all possible characters. - For the ASCII (7bit/8bit) or ISO-8859-X character sets which are - supported by the C string functions we have 256 characters. - Before inserting marks for the stop characters we clear the whole - table. */ - movl %edi, %edx - subl $256, %esp - cfi_adjust_cfa_offset (256) - movl $64, %ecx - movl %esp, %edi - xorl %eax, %eax - cld - rep - stosl - - /* Note: %ecx = 0 !!! */ - movl %edx, %edi - - movl STR(%esp), %edx /* Get start of string. */ - -#ifdef USE_AS_STRTOK_R - /* The value is stored in the third argument. */ - movl SAVE(%esp), %eax - movl (%eax), %eax -#else - /* The value is in the local variable defined above. But - we have to take care for PIC code. */ - movl SAVE_PTR, %eax -#endif - - /* If the pointer is NULL we have to use the stored value of - the last run. */ - cmpl $0, %edx - cmove %eax, %edx - testl %edx, %edx - jz L(returnNULL) - movl DELIM(%esp), %eax /* Get start of delimiter set. */ - -/* For understanding the following code remember that %ecx == 0 now. - Although all the following instruction only modify %cl we always - have a correct zero-extended 32-bit value in %ecx. */ - -L(2): movb (%eax), %cl /* get byte from stopset */ - testb %cl, %cl /* is NUL char? */ - jz L(1) /* yes => start compare loop */ - movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */ - - movb 1(%eax), %cl /* get byte from stopset */ - testb $0xff, %cl /* is NUL char? */ - jz L(1) /* yes => start compare loop */ - movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */ - - movb 2(%eax), %cl /* get byte from stopset */ - testb $0xff, %cl /* is NUL char? */ - jz L(1) /* yes => start compare loop */ - movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */ - - movb 3(%eax), %cl /* get byte from stopset */ - addl $4, %eax /* increment stopset pointer */ - movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */ - testb $0xff, %cl /* is NUL char? */ - jnz L(2) /* no => process next dword from stopset */ - -L(1): leal -4(%edx), %eax /* prepare loop */ - - /* We use a neat trick for the following loop. Normally we would - have to test for two termination conditions - 1. a character in the stopset was found - and - 2. the end of the string was found - As a sign that the character is in the stopset we store its - value in the table. The value of NUL is NUL so the loop - terminates for NUL in every case. */ - -L(3): addl $4, %eax /* adjust pointer for full loop round */ - - movb (%eax), %cl /* get byte from string */ - testb %cl, (%esp,%ecx) /* is it contained in stopset? */ - jz L(4) /* no => start of token */ - - movb 1(%eax), %cl /* get byte from string */ - testb %cl, (%esp,%ecx) /* is it contained in stopset? */ - jz L(5) /* no => start of token */ - - movb 2(%eax), %cl /* get byte from string */ - testb %cl, (%esp,%ecx) /* is it contained in stopset? */ - jz L(6) /* no => start of token */ - - movb 3(%eax), %cl /* get byte from string */ - testb %cl, (%esp,%ecx) /* is it contained in stopset? */ - jnz L(3) /* yes => start of loop */ - - incl %eax /* adjust pointer */ -L(6): incl %eax -L(5): incl %eax - - /* Now we have to terminate the string. */ - -L(4): leal -4(%eax), %edx /* We use %EDX for the next run. */ - -L(7): addl $4, %edx /* adjust pointer for full loop round */ - - movb (%edx), %cl /* get byte from string */ - cmpb %cl, (%esp,%ecx) /* is it contained in skipset? */ - je L(8) /* yes => return */ - - movb 1(%edx), %cl /* get byte from string */ - cmpb %cl, (%esp,%ecx) /* is it contained in skipset? */ - je L(9) /* yes => return */ - - movb 2(%edx), %cl /* get byte from string */ - cmpb %cl, (%esp,%ecx) /* is it contained in skipset? */ - je L(10) /* yes => return */ - - movb 3(%edx), %cl /* get byte from string */ - cmpb %cl, (%esp,%ecx) /* is it contained in skipset? */ - jne L(7) /* no => start loop again */ - - incl %edx /* adjust pointer */ -L(10): incl %edx -L(9): incl %edx - -L(8): cmpl %eax, %edx - je L(returnNULL) /* There was no token anymore. */ - - movb $0, (%edx) /* Terminate string. */ - - /* Are we at end of string? */ - cmpb $0, %cl - leal 1(%edx), %ecx - cmovne %ecx, %edx - - /* Store the pointer to the next character. */ -#ifdef USE_AS_STRTOK_R - movl SAVE(%esp), %ecx -#endif - movl %edx, SAVE_PTR - -L(epilogue): - /* Remove the stopset table. */ - addl $256, %esp - cfi_adjust_cfa_offset (-256) -#if !defined USE_AS_STRTOK_R && defined PIC - popl %ebx - cfi_adjust_cfa_offset (-4) - cfi_restore (ebx) -#endif - ret - -L(returnNULL): - xorl %eax, %eax -#ifdef USE_AS_STRTOK_R - movl SAVE(%esp), %ecx -#endif - movl %edx, SAVE_PTR - jmp L(epilogue) - -END (FUNCTION) diff --git a/sysdeps/i386/i686/strtok_r.S b/sysdeps/i386/i686/strtok_r.S deleted file mode 100644 index 353e076..0000000 --- a/sysdeps/i386/i686/strtok_r.S +++ /dev/null @@ -1,5 +0,0 @@ -#define FUNCTION __strtok_r -#define USE_AS_STRTOK_R 1 -#include -weak_alias (__strtok_r, strtok_r) -strong_alias (__strtok_r, __GI___strtok_r) diff --git a/sysdeps/i386/strtok.S b/sysdeps/i386/strtok.S deleted file mode 100644 index f7aadc3..0000000 --- a/sysdeps/i386/strtok.S +++ /dev/null @@ -1,358 +0,0 @@ -/* strtok (str, delim) -- Return next DELIM separated token from STR. - For Intel 80x86, x>=3. - Copyright (C) 1996-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Ulrich Drepper , 1996. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include "asm-syntax.h" - -/* This file can be used for three variants of the strtok function: - - strtok: - INPUT PARAMETER: - str (sp + 4) - delim (sp + 8) - - strtok_r: - INPUT PARAMETER: - str (sp + 4) - delim (sp + 8) - save_ptr (sp + 12) - - We do a common implementation here. */ - -#ifdef USE_AS_STRTOK_R -# define SAVE_PTR 0(%ecx) -#else - .bss - .local save_ptr - .type save_ptr, @object - .size save_ptr, 4 -save_ptr: - .space 4 - -# ifdef PIC -# define SAVE_PTR save_ptr@GOTOFF(%ebx) -# else -# define SAVE_PTR save_ptr -# endif - -# define FUNCTION strtok -#endif - -#define PARMS 4 /* no space for saved regs */ -#define RTN PARMS -#define STR RTN -#define DELIM STR+4 -#define SAVE DELIM+4 - - .text -ENTRY (FUNCTION) - - movl STR(%esp), %edx - movl DELIM(%esp), %eax - -#if !defined USE_AS_STRTOK_R && defined PIC - pushl %ebx /* Save PIC register. */ - cfi_adjust_cfa_offset (4) - call L(here) - cfi_adjust_cfa_offset (4) - cfi_rel_offset (ebx, 0) -L(here): - popl %ebx - cfi_adjust_cfa_offset (-4) - addl $_GLOBAL_OFFSET_TABLE_+[.-L(here)], %ebx -#endif - - /* If the pointer is NULL we have to use the stored value of - the last run. */ - cmpl $0, %edx - jne L(1) - -#ifdef USE_AS_STRTOK_R - /* The value is stored in the third argument. */ - movl SAVE(%esp), %edx - movl (%edx), %edx -#else - /* The value is in the local variable defined above. But - we have to take care for PIC code. */ - movl SAVE_PTR, %edx -#endif - testl %edx, %edx - jz L(returnNULL) - -L(1): - /* First we create a table with flags for all possible characters. - For the ASCII (7bit/8bit) or ISO-8859-X character sets which are - supported by the C string functions we have 256 characters. - Before inserting marks for the stop characters we clear the whole - table. The unrolled form is much faster than a loop. */ - xorl %ecx, %ecx /* %ecx = 0 !!! */ - - pushl %ecx /* make a 256 bytes long block filled with 0 */ - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl %ecx - cfi_adjust_cfa_offset (4) - pushl $0 /* These immediate values make the label 2 */ - cfi_adjust_cfa_offset (4) - pushl $0 /* to be aligned on a 16 byte boundary to */ - cfi_adjust_cfa_offset (4) - pushl $0 /* get a better performance of the loop. */ - cfi_adjust_cfa_offset (4) - pushl $0 - cfi_adjust_cfa_offset (4) - pushl $0 - cfi_adjust_cfa_offset (4) - pushl $0 - cfi_adjust_cfa_offset (4) - -/* For understanding the following code remember that %ecx == 0 now. - Although all the following instruction only modify %cl we always - have a correct zero-extended 32-bit value in %ecx. */ - -L(2): movb (%eax), %cl /* get byte from stopset */ - testb %cl, %cl /* is NUL char? */ - jz L(1_1) /* yes => start compare loop */ - movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */ - - movb 1(%eax), %cl /* get byte from stopset */ - testb $0xff, %cl /* is NUL char? */ - jz L(1_1) /* yes => start compare loop */ - movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */ - - movb 2(%eax), %cl /* get byte from stopset */ - testb $0xff, %cl /* is NUL char? */ - jz L(1_1) /* yes => start compare loop */ - movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */ - - movb 3(%eax), %cl /* get byte from stopset */ - addl $4, %eax /* increment stopset pointer */ - movb %cl, (%esp,%ecx) /* set corresponding byte in stopset table */ - testb $0xff, %cl /* is NUL char? */ - jnz L(2) /* no => process next dword from stopset */ - -L(1_1): leal -4(%edx), %eax /* prepare loop */ - - /* We use a neat trick for the following loop. Normally we would - have to test for two termination conditions - 1. a character in the stopset was found - and - 2. the end of the string was found - As a sign that the character is in the stopset we store its - value in the table. The value of NUL is NUL so the loop - terminates for NUL in every case. */ - -L(3): addl $4, %eax /* adjust pointer for full loop round */ - - movb (%eax), %cl /* get byte from string */ - testb %cl, (%esp,%ecx) /* is it contained in stopset? */ - jz L(4) /* no => start of token */ - - movb 1(%eax), %cl /* get byte from string */ - testb %cl, (%esp,%ecx) /* is it contained in stopset? */ - jz L(5) /* no => start of token */ - - movb 2(%eax), %cl /* get byte from string */ - testb %cl, (%esp,%ecx) /* is it contained in stopset? */ - jz L(6) /* no => start of token */ - - movb 3(%eax), %cl /* get byte from string */ - testb %cl, (%esp,%ecx) /* is it contained in stopset? */ - jnz L(3) /* yes => start of loop */ - - incl %eax /* adjust pointer */ -L(6): incl %eax -L(5): incl %eax - - /* Now we have to terminate the string. */ - -L(4): leal -4(%eax), %edx /* We use %EDX for the next run. */ - -L(7): addl $4, %edx /* adjust pointer for full loop round */ - - movb (%edx), %cl /* get byte from string */ - cmpb %cl, (%esp,%ecx) /* is it contained in skipset? */ - je L(8) /* yes => return */ - - movb 1(%edx), %cl /* get byte from string */ - cmpb %cl, (%esp,%ecx) /* is it contained in skipset? */ - je L(9) /* yes => return */ - - movb 2(%edx), %cl /* get byte from string */ - cmpb %cl, (%esp,%ecx) /* is it contained in skipset? */ - je L(10) /* yes => return */ - - movb 3(%edx), %cl /* get byte from string */ - cmpb %cl, (%esp,%ecx) /* is it contained in skipset? */ - jne L(7) /* no => start loop again */ - - incl %edx /* adjust pointer */ -L(10): incl %edx -L(9): incl %edx - -L(8): /* Remove the stopset table. */ - addl $256, %esp - cfi_adjust_cfa_offset (-256) - - cmpl %eax, %edx - je L(returnNULL) /* There was no token anymore. */ - - movb $0, (%edx) /* Terminate string. */ - - /* Are we at end of string? */ - cmpb $0, %cl - je L(11) - - incl %edx -L(11): - - /* Store the pointer to the next character. */ -#ifdef USE_AS_STRTOK_R - movl SAVE(%esp), %ecx -#endif - movl %edx, SAVE_PTR - -L(epilogue): -#if !defined USE_AS_STRTOK_R && defined PIC - popl %ebx - cfi_adjust_cfa_offset (-4) - cfi_restore (ebx) -#endif - ret - -L(returnNULL): - xorl %eax, %eax -#ifdef USE_AS_STRTOK_R - movl SAVE(%esp), %ecx -#endif - movl %edx, SAVE_PTR - jmp L(epilogue) - -END (FUNCTION) diff --git a/sysdeps/i386/strtok_r.S b/sysdeps/i386/strtok_r.S deleted file mode 100644 index 5087293..0000000 --- a/sysdeps/i386/strtok_r.S +++ /dev/null @@ -1,5 +0,0 @@ -#define FUNCTION __strtok_r -#define USE_AS_STRTOK_R 1 -#include -weak_alias (__strtok_r, strtok_r) -strong_alias (__strtok_r, __GI___strtok_r) diff --git a/sysdeps/powerpc/powerpc64/strtok.S b/sysdeps/powerpc/powerpc64/strtok.S deleted file mode 100644 index df2474c..0000000 --- a/sysdeps/powerpc/powerpc64/strtok.S +++ /dev/null @@ -1,226 +0,0 @@ -/* Optimized strtok implementation for PowerPC64. - - Copyright (C) 2014-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -/* Performance gains are grabbed through following techniques: - - > hashing of needle. - > hashing avoids scanning of duplicate entries in needle - across the string. - > unrolling when scanning for character in string - across hash table. */ - -/* Algorithm is as below: - 1. A empty hash table/dictionary is created comprising of - 256 ascii character set - 2. When hash entry is found in needle , the hash index - is initialized to 1 - 3. The string is scanned until end and for every character, - its corresponding hash index is compared. - 4. initial length of string (count) until first hit of - accept needle is calculated and moved.(strspn) - 5. The string is again scanned until end and for every character, - its corresponding hash index is compared.(strpbrk) - 6. If hash index is set to 1 for the index of string, - set it to null and set the saveptr to point to the next char. - 7. Otherwise count is incremented and scanning continues - until end of string. */ - -#include -#ifdef USE_AS_STRTOK_R -# define FUNC_NAME __strtok_r -#else -# define FUNC_NAME strtok -#endif - -EALIGN(FUNC_NAME, 4, 0) -#ifdef USE_AS_STRTOK_R - CALL_MCOUNT 3 - cmpdi cr7, r3, 0 /* Is input null? */ - bne cr7, L(inputnotNull) - ld r3, 0(r5) /* Load from r5 */ -#else - CALL_MCOUNT 2 - addis r5, r2, .LANCHOR0@toc@ha - cmpdi cr7, r3, 0 /* Is r3 NULL? */ - bne cr7, L(inputnotNull) - ld r3, .LANCHOR0@toc@l(r5) /* Load from saveptr */ -#endif -L(inputnotNull): - mr r7, r3 - cmpdi cr7, r3, 0 - beq cr7, L(returnNULL) - lbz r8, 0(r3) - cmpdi cr7, r8, 0 - beq cr7, L(returnNULL) - - addi r9, r1, -256 /* r9 is a hash of 256 bytes */ - - /*Iniatliaze hash table with Zeroes */ - li r6, 0 - li r8, 4 - mtctr r8 - mr r10, r9 - .align 4 -L(zerohash): - std r6, 0(r10) - std r6, 8(r10) - std r6, 16(r10) - std r6, 24(r10) - std r6, 32(r10) - std r6, 40(r10) - std r6, 48(r10) - std r6, 56(r10) - addi r10, r10, 64 - bdnz L(zerohash) - - - lbz r10, 0(r4) /* load r10 with needle (r4) */ - li r8, 1 /* r8=1, marker into hash if found in - needle */ - - cmpdi cr7, r10, 0 /* accept needle is NULL */ - beq cr7, L(skipHashing) /* if needle is NULL, skip hashing */ - - .align 4 /* align section to 16 byte boundary */ -L(hashing): - stbx r8, r9, r10 /* update hash with marker for the pivot of - the needle */ - lbzu r10, 1(r4) /* load needle into r10 and update to next */ - cmpdi cr7, r10, 0 /* if needle is has reached NULL, continue */ - bne cr7, L(hashing) /* loop to hash the needle */ - -L(skipHashing): - b L(beginScan) - - .align 4 /* align section to 16 byte boundary */ -L(scanUnroll): - lbzx r8, r9, r8 /* load r8 with hash value at index */ - cmpwi cr7, r8, 0 /* check the hash value */ - beq cr7, L(ret1stIndex) /* we have hit accept needle */ - - lbz r8, 1(r7) /* load string[1] into r8 */ - lbzx r8, r9, r8 /* load r8 with hash value at index */ - cmpwi cr7, r8, 0 /* check the hash value */ - beq cr7, L(ret2ndIndex) /* we have hit accept needle */ - - lbz r8, 2(r7) /* load string[1] into r8 */ - lbzx r8, r9, r8 /* load r8 with hash value at index */ - cmpwi cr7, r8, 0 /* check the hash value */ - beq cr7, L(ret3rdIndex) /* we have hit accept needle */ - - lbz r8, 3(r7) /* load string[1] into r8 */ - addi r7, r7, 4 - lbzx r8, r9, r8 /* load r8 with hash value at index */ - cmpwi cr7, r8, 0 /* check the hash value */ - beq cr7,L(ret4thIndex) /* we have hit accept needle */ - -L(beginScan): - lbz r8, 0(r7) /* load string[0] into r8 */ - addi r6, r7, 1 - addi r11, r7, 2 - addi r4, r7, 3 - cmpdi cr7, r8, 0 /* check if its null */ - bne cr7, L(scanUnroll) /* continue scanning */ - -L(ret1stIndex): - mr r3, r7 - b L(next) -L(ret2ndIndex): - mr r3, r6 - b L(next) -L(ret3rdIndex): - mr r3, r11 - b L(next) -L(ret4thIndex): - mr r3, r4 -L(next): - mr r7, r3 - lbz r8, 0(r7) - cmpdi cr7, r8, 0 - beq cr7, L(returnNULL) - li r8, 1 - li r10, 0 /* load counter = 0 */ - stbx r8, r9, r10 /* update hash for NULL */ - b L(mainloop) - -L(unroll): - lbz r8, 1(r7) /* load string[1] into r8 */ - lbzx r8, r9, r8 /* load r8 with hash value at index */ - cmpwi r7, r8, 1 /* check the hash */ - beq cr7, L(foundat1st) /* we have hit accept needle */ - lbz r8, 2(r7) - lbzx r8, r9, r8 - cmpwi cr7, r8, 1 - beq cr7, L(foundat2nd) - lbz r8, 3(r7) - addi r7, r7, 4 - lbzx r8, r9, r8 - cmpwi cr7, r8, 1 - beq cr7, L(foundat3rd) -L(mainloop): - lbz r8, 0(r7) - addi r6, r7, 1 - addi r11, r7, 2 - addi r4, r7, 3 - lbzx r8, r9, r8 - cmpwi cr7, r8, 1 - bne cr7, L(unroll) /* continue scanning */ - - b L(found) -L(foundat1st): - mr r7, r6 - b L(found) -L(foundat2nd): - mr r7, r11 - b L(found) -L(foundat3rd): - mr r7, r4 -L(found): - lbz r8, 0(r7) - cmpdi cr7, r8, 0 - beq cr7, L(end) - li r10, 0 - stb r10, 0(r7) /* Terminate string */ - addi r7, r7, 1 /* Store the pointer to the next char */ -L(end): -#ifdef USE_AS_STRTOK_R - std r7, 0(r5) /* Update saveptr */ -#else - std r7, .LANCHOR0@toc@l(r5) -#endif - blr /* done */ -L(returnNULL): -#ifndef USE_AS_STRTOK_R - li r7, 0 -#endif - li r3, 0 /* return NULL */ - b L(end) -END(FUNC_NAME) -#ifdef USE_AS_STRTOK_R -libc_hidden_builtin_def (strtok_r) -#else - .section ".bss" - .align 3 - .set .LANCHOR0,. + 0 - .type olds, @object - .size olds, 8 -olds: - .zero 8 -libc_hidden_builtin_def (strtok) -#endif diff --git a/sysdeps/powerpc/powerpc64/strtok_r.S b/sysdeps/powerpc/powerpc64/strtok_r.S deleted file mode 100644 index 1e665e5..0000000 --- a/sysdeps/powerpc/powerpc64/strtok_r.S +++ /dev/null @@ -1,24 +0,0 @@ -/* Optimized strtok_r implementation for PowerPC64. - Copyright (C) 2014-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#define USE_AS_STRTOK_R -#include - -weak_alias (__strtok_r, strtok_r) -libc_hidden_def (__strtok_r) -libc_hidden_builtin_def (strtok_r) diff --git a/sysdeps/x86_64/strtok.S b/sysdeps/x86_64/strtok.S deleted file mode 100644 index 6b32b8a..0000000 --- a/sysdeps/x86_64/strtok.S +++ /dev/null @@ -1,208 +0,0 @@ -/* strtok (str, delim) -- Return next DELIM separated token from STR. - For AMD x86-64. - Copyright (C) 1998-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Based on i686 version contributed by Ulrich Drepper - , 1998. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include "asm-syntax.h" - -/* This file can be used for the strtok and strtok_r functions: - - strtok: - INPUT PARAMETER: - str %rdi - delim %rsi - - strtok_r: - INPUT PARAMETER: - str %rdi - delim %rsi - save_ptr %rdx - - We do a common implementation here. */ - -#ifdef USE_AS_STRTOK_R -# define SAVE_PTR (%r9) -#else - .bss - .local save_ptr - .type save_ptr, @object - .size save_ptr, LP_SIZE -save_ptr: - .space LP_SIZE - -# ifdef PIC -# define SAVE_PTR save_ptr(%rip) -# else -# define SAVE_PTR save_ptr -# endif - -# define FUNCTION strtok -#endif - - .text -ENTRY (FUNCTION) - /* First we create a table with flags for all possible characters. - For the ASCII (7bit/8bit) or ISO-8859-X character sets which are - supported by the C string functions we have 256 characters. - Before inserting marks for the stop characters we clear the whole - table. */ - movq %rdi, %r8 /* Save value. */ - subq $256, %rsp /* Make space for 256 bytes. */ - cfi_adjust_cfa_offset(256) - movl $32, %ecx /* 32*8 bytes = 256 bytes. */ - movq %rsp, %rdi - xorl %eax, %eax /* We store 0s. */ - cld - rep - stosq - - /* Note: %rcx = 0 !!! */ - -#ifdef USE_AS_STRTOK_R - /* The value is stored in the third argument. */ - mov %RDX_LP, %R9_LP /* Save value - see def. of SAVE_PTR. */ - mov (%rdx), %RAX_LP -#else - /* The value is in the local variable defined above. But - we have to take care for PIC code. */ - mov SAVE_PTR, %RAX_LP -#endif - movq %r8, %rdx /* Get start of string. */ - - /* If the pointer is NULL we have to use the stored value of - the last run. */ - cmpq $0, %rdx - cmove %rax, %rdx - testq %rdx, %rdx - jz L(returnNULL) - movq %rsi, %rax /* Get start of delimiter set. */ - -/* For understanding the following code remember that %rcx == 0 now. - Although all the following instruction only modify %cl we always - have a correct zero-extended 64-bit value in %rcx. */ - -L(2): movb (%rax), %cl /* get byte from stopset */ - testb %cl, %cl /* is NUL char? */ - jz L(1) /* yes => start compare loop */ - movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */ - - movb 1(%rax), %cl /* get byte from stopset */ - testb $0xff, %cl /* is NUL char? */ - jz L(1) /* yes => start compare loop */ - movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */ - - movb 2(%rax), %cl /* get byte from stopset */ - testb $0xff, %cl /* is NUL char? */ - jz L(1) /* yes => start compare loop */ - movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */ - - movb 3(%rax), %cl /* get byte from stopset */ - addq $4, %rax /* increment stopset pointer */ - movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */ - testb $0xff, %cl /* is NUL char? */ - jnz L(2) /* no => process next dword from stopset */ - -L(1): - - leaq -4(%rdx), %rax /* prepare loop */ - - /* We use a neat trick for the following loop. Normally we would - have to test for two termination conditions - 1. a character in the stopset was found - and - 2. the end of the string was found - As a sign that the character is in the stopset we store its - value in the table. The value of NUL is NUL so the loop - terminates for NUL in every case. */ - -L(3): addq $4, %rax /* adjust pointer for full loop round */ - - movb (%rax), %cl /* get byte from string */ - testb %cl, (%rsp,%rcx) /* is it contained in stopset? */ - jz L(4) /* no => start of token */ - - movb 1(%rax), %cl /* get byte from string */ - testb %cl, (%rsp,%rcx) /* is it contained in stopset? */ - jz L(5) /* no => start of token */ - - movb 2(%rax), %cl /* get byte from string */ - testb %cl, (%rsp,%rcx) /* is it contained in stopset? */ - jz L(6) /* no => start of token */ - - movb 3(%rax), %cl /* get byte from string */ - testb %cl, (%rsp,%rcx) /* is it contained in stopset? */ - jnz L(3) /* yes => start of loop */ - - incq %rax /* adjust pointer */ -L(6): incq %rax -L(5): incq %rax - - /* Now we have to terminate the string. */ - -L(4): leaq -4(%rax), %rdx /* We use %rDX for the next run. */ - -L(7): addq $4, %rdx /* adjust pointer for full loop round */ - - movb (%rdx), %cl /* get byte from string */ - cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */ - je L(8) /* yes => return */ - - movb 1(%rdx), %cl /* get byte from string */ - cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */ - je L(9) /* yes => return */ - - movb 2(%rdx), %cl /* get byte from string */ - cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */ - je L(10) /* yes => return */ - - movb 3(%rdx), %cl /* get byte from string */ - cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */ - jne L(7) /* no => start loop again */ - - incq %rdx /* adjust pointer */ -L(10): incq %rdx -L(9): incq %rdx - -L(8): cmpq %rax, %rdx - je L(returnNULL) /* There was no token anymore. */ - - movb $0, (%rdx) /* Terminate string. */ - - /* Are we at end of string? */ - cmpb $0, %cl - leaq 1(%rdx), %rcx - cmovne %rcx, %rdx - - /* Store the pointer to the next character. */ - mov %RDX_LP, SAVE_PTR - -L(epilogue): - /* Remove the stopset table. */ - addq $256, %rsp - cfi_adjust_cfa_offset(-256) - retq - -L(returnNULL): - xorl %eax, %eax - /* Store the pointer to the next character. */ - mov %RDX_LP, SAVE_PTR - jmp L(epilogue) - -END (FUNCTION) diff --git a/sysdeps/x86_64/strtok_r.S b/sysdeps/x86_64/strtok_r.S deleted file mode 100644 index f0db78c..0000000 --- a/sysdeps/x86_64/strtok_r.S +++ /dev/null @@ -1,5 +0,0 @@ -#define FUNCTION __strtok_r -#define USE_AS_STRTOK_R 1 -#include -weak_alias (__strtok_r, strtok_r) -strong_alias (__strtok_r, __GI___strtok_r)