From patchwork Tue Jul 12 19:29:06 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Noah Goldstein X-Patchwork-Id: 55983 Return-Path: X-Original-To: patchwork@sourceware.org Delivered-To: patchwork@sourceware.org Received: from server2.sourceware.org (localhost [IPv6:::1]) by sourceware.org (Postfix) with ESMTP id 863C7382A2C5 for ; Tue, 12 Jul 2022 19:29:43 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 863C7382A2C5 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1657654183; bh=u3nEWwQqbLq3vg4MnLETlFlYZJi5DPJ5iZ5TIbtPjeI=; h=To:Subject:Date:In-Reply-To:References:List-Id:List-Unsubscribe: List-Archive:List-Post:List-Help:List-Subscribe:From:Reply-To: From; b=DSwPjgiby8Rf78BTaE29ylMchZ3XqdfAxJpX8QM/zSlQ583RhkUNnP/UqakXQ8p8w ppvOWlhRJAoll67qixvrZOneouKfd1+vy7II1y3PPcFBrebrxiLQ3WSX7yIW8zfsqX 9eta/KJfnFo6zwLUU76oUblB5g1uN6adnzb79eyM= X-Original-To: libc-alpha@sourceware.org Delivered-To: libc-alpha@sourceware.org Received: from mail-pg1-x52f.google.com (mail-pg1-x52f.google.com [IPv6:2607:f8b0:4864:20::52f]) by sourceware.org (Postfix) with ESMTPS id 9944E38768A5 for ; Tue, 12 Jul 2022 19:29:20 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 9944E38768A5 Received: by mail-pg1-x52f.google.com with SMTP id bh13so8480185pgb.4 for ; Tue, 12 Jul 2022 12:29:20 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=u3nEWwQqbLq3vg4MnLETlFlYZJi5DPJ5iZ5TIbtPjeI=; b=2mGX3hRq6x9W2SGPLNY0wH8esS4kolX8K8TN+OsjIDTonSjIbajDGMyl3qvWfVCmhi Cn2OuE1l7YsGDC8oUDKTPmwiCtt0ZldvFAbt/m6B9IniU2EZ3H0BWeH1hHs2AAYet/V0 BIULBAqSmCI61dKDUpFb6K08c3ZNPy0F/3JKS9dPFhUlQwci2JCgCMtwBDcGhJGLefH0 mFgAQBfv3KLnLCbjnjLTdFVGUljGIdB4mnjybM6T2ZS3NlTNX3LKEipUih9h7LVoRj+f ZHvwlHd1doveK7ACaE68ZgdWkB3zYy9iMbylQe9yddTjsLzrf+eg7ZK1B8GT5+Ko85nL +1Pw== X-Gm-Message-State: AJIora/grRrC+TyKfuYBOEMl3Q5odeRTgEV0Ztn4SMrHduYDLcV4+HA8 FbN6+S4nv5+uXpA6c4BQPGqVUFSedxg= X-Google-Smtp-Source: AGRyM1t3tqmHeqGZC/R3rGw0Npo2fggvz1sy2gqnYvnCQA9JdQC5sX82tIi/NkMDlqDGgnlUT8Sp0g== X-Received: by 2002:a63:2a82:0:b0:412:82d9:f015 with SMTP id q124-20020a632a82000000b0041282d9f015mr21363764pgq.447.1657654159057; Tue, 12 Jul 2022 12:29:19 -0700 (PDT) Received: from noah-tgl.. ([192.55.60.37]) by smtp.gmail.com with ESMTPSA id w7-20020a170902e88700b0016c28fbd7e5sm7274704plg.268.2022.07.12.12.29.17 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 12 Jul 2022 12:29:18 -0700 (PDT) To: libc-alpha@sourceware.org Subject: [PATCH v1] x86: Move strcat SSE2 implementation to multiarch/strcat-sse2.S Date: Tue, 12 Jul 2022 12:29:06 -0700 Message-Id: <20220712192910.351121-6-goldstein.w.n@gmail.com> X-Mailer: git-send-email 2.34.1 In-Reply-To: <20220712192910.351121-1-goldstein.w.n@gmail.com> References: <20220712192910.351121-1-goldstein.w.n@gmail.com> MIME-Version: 1.0 X-Spam-Status: No, score=-12.1 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FREEMAIL_FROM, GIT_PATCH_0, KAM_SHORT, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP, T_SCC_BODY_TEXT_LINE autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-Patchwork-Original-From: Noah Goldstein via Libc-alpha From: Noah Goldstein Reply-To: Noah Goldstein Errors-To: libc-alpha-bounces+patchwork=sourceware.org@sourceware.org Sender: "Libc-alpha" This commit doesn't affect libc.so.6, its just housekeeping to prepare for adding explicit ISA level support. Tested build on x86_64 and x86_32 with/without multiarch. --- sysdeps/x86_64/multiarch/strcat-sse2.S | 242 ++++++++++++++++++++++++- sysdeps/x86_64/strcat.S | 239 +----------------------- 2 files changed, 238 insertions(+), 243 deletions(-) diff --git a/sysdeps/x86_64/multiarch/strcat-sse2.S b/sysdeps/x86_64/multiarch/strcat-sse2.S index 449e102438..244c4a6d74 100644 --- a/sysdeps/x86_64/multiarch/strcat-sse2.S +++ b/sysdeps/x86_64/multiarch/strcat-sse2.S @@ -17,12 +17,242 @@ . */ #if IS_IN (libc) +# ifndef STRCAT +# define STRCAT __strcat_sse2 +# endif +#endif -# include -# define strcat __strcat_sse2 +#include + + .text +ENTRY (STRCAT) + movq %rdi, %rcx /* Dest. register. */ + andl $7, %ecx /* mask alignment bits */ + movq %rdi, %rax /* Duplicate destination pointer. */ + movq $0xfefefefefefefeff,%r8 + + /* First step: Find end of destination. */ + jz 4f /* aligned => start loop */ + + neg %ecx /* We need to align to 8 bytes. */ + addl $8,%ecx + /* Search the first bytes directly. */ +0: cmpb $0x0,(%rax) /* is byte NUL? */ + je 2f /* yes => start copy */ + incq %rax /* increment pointer */ + decl %ecx + jnz 0b + + + + /* Now the source is aligned. Scan for NUL byte. */ + .p2align 4 +4: + /* First unroll. */ + movq (%rax), %rcx /* get double word (= 8 bytes) in question */ + addq $8,%rax /* adjust pointer for next word */ + movq %r8, %rdx /* magic value */ + addq %rcx, %rdx /* add the magic value to the word. We get + carry bits reported for each byte which + is *not* 0 */ + jnc 3f /* highest byte is NUL => return pointer */ + xorq %rcx, %rdx /* (word+magic)^word */ + orq %r8, %rdx /* set all non-carry bits */ + incq %rdx /* add 1: if one carry bit was *not* set + the addition will not result in 0. */ + jnz 3f /* found NUL => return pointer */ + + /* Second unroll. */ + movq (%rax), %rcx /* get double word (= 8 bytes) in question */ + addq $8,%rax /* adjust pointer for next word */ + movq %r8, %rdx /* magic value */ + addq %rcx, %rdx /* add the magic value to the word. We get + carry bits reported for each byte which + is *not* 0 */ + jnc 3f /* highest byte is NUL => return pointer */ + xorq %rcx, %rdx /* (word+magic)^word */ + orq %r8, %rdx /* set all non-carry bits */ + incq %rdx /* add 1: if one carry bit was *not* set + the addition will not result in 0. */ + jnz 3f /* found NUL => return pointer */ + + /* Third unroll. */ + movq (%rax), %rcx /* get double word (= 8 bytes) in question */ + addq $8,%rax /* adjust pointer for next word */ + movq %r8, %rdx /* magic value */ + addq %rcx, %rdx /* add the magic value to the word. We get + carry bits reported for each byte which + is *not* 0 */ + jnc 3f /* highest byte is NUL => return pointer */ + xorq %rcx, %rdx /* (word+magic)^word */ + orq %r8, %rdx /* set all non-carry bits */ + incq %rdx /* add 1: if one carry bit was *not* set + the addition will not result in 0. */ + jnz 3f /* found NUL => return pointer */ + + /* Fourth unroll. */ + movq (%rax), %rcx /* get double word (= 8 bytes) in question */ + addq $8,%rax /* adjust pointer for next word */ + movq %r8, %rdx /* magic value */ + addq %rcx, %rdx /* add the magic value to the word. We get + carry bits reported for each byte which + is *not* 0 */ + jnc 3f /* highest byte is NUL => return pointer */ + xorq %rcx, %rdx /* (word+magic)^word */ + orq %r8, %rdx /* set all non-carry bits */ + incq %rdx /* add 1: if one carry bit was *not* set + the addition will not result in 0. */ + jz 4b /* no NUL found => continue loop */ + + .p2align 4 /* Align, it's a jump target. */ +3: subq $8,%rax /* correct pointer increment. */ + + testb %cl, %cl /* is first byte NUL? */ + jz 2f /* yes => return */ + incq %rax /* increment pointer */ + + testb %ch, %ch /* is second byte NUL? */ + jz 2f /* yes => return */ + incq %rax /* increment pointer */ + + testl $0x00ff0000, %ecx /* is third byte NUL? */ + jz 2f /* yes => return pointer */ + incq %rax /* increment pointer */ + + testl $0xff000000, %ecx /* is fourth byte NUL? */ + jz 2f /* yes => return pointer */ + incq %rax /* increment pointer */ + + shrq $32, %rcx /* look at other half. */ + + testb %cl, %cl /* is first byte NUL? */ + jz 2f /* yes => return */ + incq %rax /* increment pointer */ + + testb %ch, %ch /* is second byte NUL? */ + jz 2f /* yes => return */ + incq %rax /* increment pointer */ + + testl $0xff0000, %ecx /* is third byte NUL? */ + jz 2f /* yes => return pointer */ + incq %rax /* increment pointer */ + +2: + /* Second step: Copy source to destination. */ + + movq %rsi, %rcx /* duplicate */ + andl $7,%ecx /* mask alignment bits */ + movq %rax, %rdx /* move around */ + jz 22f /* aligned => start loop */ + + neg %ecx /* align to 8 bytes. */ + addl $8, %ecx + /* Align the source pointer. */ +21: + movb (%rsi), %al /* Fetch a byte */ + testb %al, %al /* Is it NUL? */ + movb %al, (%rdx) /* Store it */ + jz 24f /* If it was NUL, done! */ + incq %rsi + incq %rdx + decl %ecx + jnz 21b + + /* Now the sources is aligned. Unfortunatly we cannot force + to have both source and destination aligned, so ignore the + alignment of the destination. */ + .p2align 4 +22: + /* 1st unroll. */ + movq (%rsi), %rax /* Read double word (8 bytes). */ + addq $8, %rsi /* Adjust pointer for next word. */ + movq %rax, %r9 /* Save a copy for NUL finding. */ + addq %r8, %r9 /* add the magic value to the word. We get + carry bits reported for each byte which + is *not* 0 */ + jnc 23f /* highest byte is NUL => return pointer */ + xorq %rax, %r9 /* (word+magic)^word */ + orq %r8, %r9 /* set all non-carry bits */ + incq %r9 /* add 1: if one carry bit was *not* set + the addition will not result in 0. */ + + jnz 23f /* found NUL => return pointer */ + + movq %rax, (%rdx) /* Write value to destination. */ + addq $8, %rdx /* Adjust pointer. */ + + /* 2nd unroll. */ + movq (%rsi), %rax /* Read double word (8 bytes). */ + addq $8, %rsi /* Adjust pointer for next word. */ + movq %rax, %r9 /* Save a copy for NUL finding. */ + addq %r8, %r9 /* add the magic value to the word. We get + carry bits reported for each byte which + is *not* 0 */ + jnc 23f /* highest byte is NUL => return pointer */ + xorq %rax, %r9 /* (word+magic)^word */ + orq %r8, %r9 /* set all non-carry bits */ + incq %r9 /* add 1: if one carry bit was *not* set + the addition will not result in 0. */ + + jnz 23f /* found NUL => return pointer */ + + movq %rax, (%rdx) /* Write value to destination. */ + addq $8, %rdx /* Adjust pointer. */ + + /* 3rd unroll. */ + movq (%rsi), %rax /* Read double word (8 bytes). */ + addq $8, %rsi /* Adjust pointer for next word. */ + movq %rax, %r9 /* Save a copy for NUL finding. */ + addq %r8, %r9 /* add the magic value to the word. We get + carry bits reported for each byte which + is *not* 0 */ + jnc 23f /* highest byte is NUL => return pointer */ + xorq %rax, %r9 /* (word+magic)^word */ + orq %r8, %r9 /* set all non-carry bits */ + incq %r9 /* add 1: if one carry bit was *not* set + the addition will not result in 0. */ + + jnz 23f /* found NUL => return pointer */ + + movq %rax, (%rdx) /* Write value to destination. */ + addq $8, %rdx /* Adjust pointer. */ + + /* 4th unroll. */ + movq (%rsi), %rax /* Read double word (8 bytes). */ + addq $8, %rsi /* Adjust pointer for next word. */ + movq %rax, %r9 /* Save a copy for NUL finding. */ + addq %r8, %r9 /* add the magic value to the word. We get + carry bits reported for each byte which + is *not* 0 */ + jnc 23f /* highest byte is NUL => return pointer */ + xorq %rax, %r9 /* (word+magic)^word */ + orq %r8, %r9 /* set all non-carry bits */ + incq %r9 /* add 1: if one carry bit was *not* set + the addition will not result in 0. */ + + jnz 23f /* found NUL => return pointer */ + + movq %rax, (%rdx) /* Write value to destination. */ + addq $8, %rdx /* Adjust pointer. */ + jmp 22b /* Next iteration. */ + + /* Do the last few bytes. %rax contains the value to write. + The loop is unrolled twice. */ + .p2align 4 +23: + movb %al, (%rdx) /* 1st byte. */ + testb %al, %al /* Is it NUL. */ + jz 24f /* yes, finish. */ + incq %rdx /* Increment destination. */ + movb %ah, (%rdx) /* 2nd byte. */ + testb %ah, %ah /* Is it NUL?. */ + jz 24f /* yes, finish. */ + incq %rdx /* Increment destination. */ + shrq $16, %rax /* Shift... */ + jmp 23b /* and look at next two bytes in %rax. */ -# undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(strcat) -#endif -#include +24: + movq %rdi, %rax /* Source is return value. */ + retq +END (STRCAT) diff --git a/sysdeps/x86_64/strcat.S b/sysdeps/x86_64/strcat.S index 565a9c785a..fc3e8a9bcf 100644 --- a/sysdeps/x86_64/strcat.S +++ b/sysdeps/x86_64/strcat.S @@ -17,241 +17,6 @@ License along with the GNU C Library; if not, see . */ -#include -#include "asm-syntax.h" - -/* Will be removed when new strcpy implementation gets merged. */ - - .text -ENTRY (strcat) - movq %rdi, %rcx /* Dest. register. */ - andl $7, %ecx /* mask alignment bits */ - movq %rdi, %rax /* Duplicate destination pointer. */ - movq $0xfefefefefefefeff,%r8 - - /* First step: Find end of destination. */ - jz 4f /* aligned => start loop */ - - neg %ecx /* We need to align to 8 bytes. */ - addl $8,%ecx - /* Search the first bytes directly. */ -0: cmpb $0x0,(%rax) /* is byte NUL? */ - je 2f /* yes => start copy */ - incq %rax /* increment pointer */ - decl %ecx - jnz 0b - - - - /* Now the source is aligned. Scan for NUL byte. */ - .p2align 4 -4: - /* First unroll. */ - movq (%rax), %rcx /* get double word (= 8 bytes) in question */ - addq $8,%rax /* adjust pointer for next word */ - movq %r8, %rdx /* magic value */ - addq %rcx, %rdx /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc 3f /* highest byte is NUL => return pointer */ - xorq %rcx, %rdx /* (word+magic)^word */ - orq %r8, %rdx /* set all non-carry bits */ - incq %rdx /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz 3f /* found NUL => return pointer */ - - /* Second unroll. */ - movq (%rax), %rcx /* get double word (= 8 bytes) in question */ - addq $8,%rax /* adjust pointer for next word */ - movq %r8, %rdx /* magic value */ - addq %rcx, %rdx /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc 3f /* highest byte is NUL => return pointer */ - xorq %rcx, %rdx /* (word+magic)^word */ - orq %r8, %rdx /* set all non-carry bits */ - incq %rdx /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz 3f /* found NUL => return pointer */ - - /* Third unroll. */ - movq (%rax), %rcx /* get double word (= 8 bytes) in question */ - addq $8,%rax /* adjust pointer for next word */ - movq %r8, %rdx /* magic value */ - addq %rcx, %rdx /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc 3f /* highest byte is NUL => return pointer */ - xorq %rcx, %rdx /* (word+magic)^word */ - orq %r8, %rdx /* set all non-carry bits */ - incq %rdx /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jnz 3f /* found NUL => return pointer */ - - /* Fourth unroll. */ - movq (%rax), %rcx /* get double word (= 8 bytes) in question */ - addq $8,%rax /* adjust pointer for next word */ - movq %r8, %rdx /* magic value */ - addq %rcx, %rdx /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc 3f /* highest byte is NUL => return pointer */ - xorq %rcx, %rdx /* (word+magic)^word */ - orq %r8, %rdx /* set all non-carry bits */ - incq %rdx /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - jz 4b /* no NUL found => continue loop */ - - .p2align 4 /* Align, it's a jump target. */ -3: subq $8,%rax /* correct pointer increment. */ - - testb %cl, %cl /* is first byte NUL? */ - jz 2f /* yes => return */ - incq %rax /* increment pointer */ - - testb %ch, %ch /* is second byte NUL? */ - jz 2f /* yes => return */ - incq %rax /* increment pointer */ - - testl $0x00ff0000, %ecx /* is third byte NUL? */ - jz 2f /* yes => return pointer */ - incq %rax /* increment pointer */ - - testl $0xff000000, %ecx /* is fourth byte NUL? */ - jz 2f /* yes => return pointer */ - incq %rax /* increment pointer */ - - shrq $32, %rcx /* look at other half. */ - - testb %cl, %cl /* is first byte NUL? */ - jz 2f /* yes => return */ - incq %rax /* increment pointer */ - - testb %ch, %ch /* is second byte NUL? */ - jz 2f /* yes => return */ - incq %rax /* increment pointer */ - - testl $0xff0000, %ecx /* is third byte NUL? */ - jz 2f /* yes => return pointer */ - incq %rax /* increment pointer */ - -2: - /* Second step: Copy source to destination. */ - - movq %rsi, %rcx /* duplicate */ - andl $7,%ecx /* mask alignment bits */ - movq %rax, %rdx /* move around */ - jz 22f /* aligned => start loop */ - - neg %ecx /* align to 8 bytes. */ - addl $8, %ecx - /* Align the source pointer. */ -21: - movb (%rsi), %al /* Fetch a byte */ - testb %al, %al /* Is it NUL? */ - movb %al, (%rdx) /* Store it */ - jz 24f /* If it was NUL, done! */ - incq %rsi - incq %rdx - decl %ecx - jnz 21b - - /* Now the sources is aligned. Unfortunatly we cannot force - to have both source and destination aligned, so ignore the - alignment of the destination. */ - .p2align 4 -22: - /* 1st unroll. */ - movq (%rsi), %rax /* Read double word (8 bytes). */ - addq $8, %rsi /* Adjust pointer for next word. */ - movq %rax, %r9 /* Save a copy for NUL finding. */ - addq %r8, %r9 /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc 23f /* highest byte is NUL => return pointer */ - xorq %rax, %r9 /* (word+magic)^word */ - orq %r8, %r9 /* set all non-carry bits */ - incq %r9 /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - - jnz 23f /* found NUL => return pointer */ - - movq %rax, (%rdx) /* Write value to destination. */ - addq $8, %rdx /* Adjust pointer. */ - - /* 2nd unroll. */ - movq (%rsi), %rax /* Read double word (8 bytes). */ - addq $8, %rsi /* Adjust pointer for next word. */ - movq %rax, %r9 /* Save a copy for NUL finding. */ - addq %r8, %r9 /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc 23f /* highest byte is NUL => return pointer */ - xorq %rax, %r9 /* (word+magic)^word */ - orq %r8, %r9 /* set all non-carry bits */ - incq %r9 /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - - jnz 23f /* found NUL => return pointer */ - - movq %rax, (%rdx) /* Write value to destination. */ - addq $8, %rdx /* Adjust pointer. */ - - /* 3rd unroll. */ - movq (%rsi), %rax /* Read double word (8 bytes). */ - addq $8, %rsi /* Adjust pointer for next word. */ - movq %rax, %r9 /* Save a copy for NUL finding. */ - addq %r8, %r9 /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc 23f /* highest byte is NUL => return pointer */ - xorq %rax, %r9 /* (word+magic)^word */ - orq %r8, %r9 /* set all non-carry bits */ - incq %r9 /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - - jnz 23f /* found NUL => return pointer */ - - movq %rax, (%rdx) /* Write value to destination. */ - addq $8, %rdx /* Adjust pointer. */ - - /* 4th unroll. */ - movq (%rsi), %rax /* Read double word (8 bytes). */ - addq $8, %rsi /* Adjust pointer for next word. */ - movq %rax, %r9 /* Save a copy for NUL finding. */ - addq %r8, %r9 /* add the magic value to the word. We get - carry bits reported for each byte which - is *not* 0 */ - jnc 23f /* highest byte is NUL => return pointer */ - xorq %rax, %r9 /* (word+magic)^word */ - orq %r8, %r9 /* set all non-carry bits */ - incq %r9 /* add 1: if one carry bit was *not* set - the addition will not result in 0. */ - - jnz 23f /* found NUL => return pointer */ - - movq %rax, (%rdx) /* Write value to destination. */ - addq $8, %rdx /* Adjust pointer. */ - jmp 22b /* Next iteration. */ - - /* Do the last few bytes. %rax contains the value to write. - The loop is unrolled twice. */ - .p2align 4 -23: - movb %al, (%rdx) /* 1st byte. */ - testb %al, %al /* Is it NUL. */ - jz 24f /* yes, finish. */ - incq %rdx /* Increment destination. */ - movb %ah, (%rdx) /* 2nd byte. */ - testb %ah, %ah /* Is it NUL?. */ - jz 24f /* yes, finish. */ - incq %rdx /* Increment destination. */ - shrq $16, %rax /* Shift... */ - jmp 23b /* and look at next two bytes in %rax. */ - - -24: - movq %rdi, %rax /* Source is return value. */ - retq -END (strcat) +#define STRCAT strcat +#include "multiarch/strcat-sse2.S" libc_hidden_builtin_def (strcat)