From patchwork Fri May 12 00:37:50 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Zack Weinberg X-Patchwork-Id: 20422 Received: (qmail 33171 invoked by alias); 12 May 2017 00:38:01 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 33029 invoked by uid 89); 12 May 2017 00:37:57 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-25.2 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, RP_MATCHES_RCVD, SPF_PASS autolearn=ham version=3.3.2 spammy=REGISTER, SMS, sms, hpl X-HELO: mailbackend.panix.com From: Zack Weinberg To: libc-alpha@sourceware.org Cc: joseph@codesourcery.com, fweimer@redhat.com, roland@hack.frob.com Subject: [PATCH 2/2] Remove vestiges of NaCl port from ARM assembly files. Date: Thu, 11 May 2017 20:37:50 -0400 Message-Id: <20170512003750.27215-2-zackw@panix.com> In-Reply-To: <20170512003750.27215-1-zackw@panix.com> References: <20170512003750.27215-1-zackw@panix.com> MIME-Version: 1.0 This semi-mechanical patch removes all uses and definitions of the sfi_breg, sfi_pld, and sfi_sp macros from various ARM-specific assembly files. These were only used by NaCl. Please read carefully, I don't have any ARM hardware to test this patch on and I may have screwed up my regular expressions. * sysdeps/arm/sysdep.h (ARM_SFI_MACROS, sfi_breg, sfi_pld, sfi_sp): Delete definitions. * sysdeps/arm/__longjmp.S, sysdeps/arm/add_n.S * sysdeps/arm/addmul_1.S, sysdeps/arm/arm-mcount.S * sysdeps/arm/armv6/rawmemchr.S, sysdeps/arm/armv6/strchr.S * sysdeps/arm/armv6/strcpy.S, sysdeps/arm/armv6/strlen.S * sysdeps/arm/armv6/strrchr.S, sysdeps/arm/armv6t2/memchr.S * sysdeps/arm/armv6t2/strlen.S * sysdeps/arm/armv7/multiarch/memcpy_impl.S * sysdeps/arm/armv7/strcmp.S, sysdeps/arm/dl-tlsdesc.S * sysdeps/arm/memcpy.S, sysdeps/arm/memmove.S * sysdeps/arm/memset.S, sysdeps/arm/setjmp.S * sysdeps/arm/strlen.S, sysdeps/arm/submul_1.S: Remove all uses of sfi_breg, sfi_pld, and sfi_sp. --- sysdeps/arm/__longjmp.S | 40 +-- sysdeps/arm/add_n.S | 27 +- sysdeps/arm/addmul_1.S | 18 +- sysdeps/arm/arm-mcount.S | 3 +- sysdeps/arm/armv6/rawmemchr.S | 18 +- sysdeps/arm/armv6/strchr.S | 18 +- sysdeps/arm/armv6/strcpy.S | 55 ++- sysdeps/arm/armv6/strlen.S | 18 +- sysdeps/arm/armv6/strrchr.S | 6 +- sysdeps/arm/armv6t2/memchr.S | 9 +- sysdeps/arm/armv6t2/strlen.S | 23 +- sysdeps/arm/armv7/multiarch/memcpy_impl.S | 579 ++++++++++-------------------- sysdeps/arm/armv7/strcmp.S | 93 ++--- sysdeps/arm/dl-tlsdesc.S | 28 +- sysdeps/arm/memcpy.S | 124 +++---- sysdeps/arm/memmove.S | 124 +++---- sysdeps/arm/memset.S | 27 +- sysdeps/arm/setjmp.S | 36 +- sysdeps/arm/strlen.S | 6 +- sysdeps/arm/submul_1.S | 18 +- sysdeps/arm/sysdep.h | 52 +-- 21 files changed, 441 insertions(+), 881 deletions(-) diff --git a/sysdeps/arm/__longjmp.S b/sysdeps/arm/__longjmp.S index 2a567b5bfa..5202c728bc 100644 --- a/sysdeps/arm/__longjmp.S +++ b/sysdeps/arm/__longjmp.S @@ -28,8 +28,7 @@ ENTRY (__longjmp) mov ip, r0 #ifdef CHECK_SP - sfi_breg ip, \ - ldr r4, [\B] /* jmpbuf's sp */ + ldr r4, [ip] /* jmpbuf's sp */ cfi_undefined (r4) #ifdef PTR_DEMANGLE PTR_DEMANGLE (r4, r4, a3, a4) @@ -38,28 +37,22 @@ ENTRY (__longjmp) #endif #ifdef PTR_DEMANGLE - sfi_breg ip, \ - ldr a4, [\B], #4 + ldr a4, [ip], #4 PTR_DEMANGLE (a4, a4, a3, r4) cfi_undefined (r4) - sfi_breg ip, \ - ldr r4, [\B], #4 + ldr r4, [ip], #4 PTR_DEMANGLE2 (r4, r4, a3) #else - sfi_breg ip, \ - ldr a4, [\B], #4 - sfi_breg ip, \ - ldr r4, [\B], #4 + ldr a4, [ip], #4 + ldr r4, [ip], #4 cfi_undefined (r4) #endif /* longjmp probe expects longjmp first argument (4@r0), second argument (-4@r1), and target address (4@r4), respectively. */ LIBC_PROBE (longjmp, 3, 4@r0, -4@r1, 4@r4) - sfi_sp \ mov sp, a4 mov lr, r4 - sfi_breg ip, \ - ldmia \B!, JMP_BUF_REGLIST + ldmia ip!, JMP_BUF_REGLIST cfi_restore (v1) cfi_restore (v2) cfi_restore (v3) @@ -97,8 +90,7 @@ ENTRY (__longjmp) /* Restore the VFP registers. */ /* Following instruction is vldmia ip!, {d8-d15}. */ - sfi_breg r12, \ - ldc p11, cr8, [\B], #64 + ldc p11, cr8, [r12], #64 .Lno_vfp: #ifndef ARM_ASSUME_NO_IWMMXT @@ -107,18 +99,12 @@ ENTRY (__longjmp) /* Restore the call-preserved iWMMXt registers. */ /* Following instructions are wldrd wr10, [ip], #8 (etc.) */ - sfi_breg r12, \ - ldcl p1, cr10, [\B], #8 - sfi_breg r12, \ - ldcl p1, cr11, [\B], #8 - sfi_breg r12, \ - ldcl p1, cr12, [\B], #8 - sfi_breg r12, \ - ldcl p1, cr13, [\B], #8 - sfi_breg r12, \ - ldcl p1, cr14, [\B], #8 - sfi_breg r12, \ - ldcl p1, cr15, [\B], #8 + ldcl p1, cr10, [r12], #8 + ldcl p1, cr11, [r12], #8 + ldcl p1, cr12, [r12], #8 + ldcl p1, cr13, [r12], #8 + ldcl p1, cr14, [r12], #8 + ldcl p1, cr15, [r12], #8 .Lno_iwmmxt: #endif diff --git a/sysdeps/arm/add_n.S b/sysdeps/arm/add_n.S index b601357e64..811a769959 100644 --- a/sysdeps/arm/add_n.S +++ b/sysdeps/arm/add_n.S @@ -52,40 +52,31 @@ ENTRY (FUNC) add lr, r1, r3, lsl #2 /* compute end src1 */ beq 1f - sfi_breg r1, \ - ldr r4, [\B], #4 /* do one to make count even */ - sfi_breg r2, \ - ldr r5, [\B], #4 + ldr r4, [r1], #4 /* do one to make count even */ + ldr r5, [r2], #4 OPC r4, r4, r5 teq r1, lr /* end of count? (preserve carry) */ - sfi_breg r0, \ - str r4, [\B], #4 + str r4, [r0], #4 beq 9f 1: tst r3, #2 /* count & 2 == 2? */ beq 2f - sfi_breg r1, \ - ldm \B!, { r4, r5 } /* do two to make count 0 mod 4 */ - sfi_breg r2, \ - ldm \B!, { r6, r7 } + ldm r1!, { r4, r5 } /* do two to make count 0 mod 4 */ + ldm r2!, { r6, r7 } OPC r4, r4, r6 OPC r5, r5, r7 teq r1, lr /* end of count? */ - sfi_breg r0, \ - stm \B!, { r4, r5 } + stm r0!, { r4, r5 } beq 9f 2: - sfi_breg r1, \ - ldm \B!, { r3, r5, r7, r10 } /* do four each loop */ - sfi_breg r2, \ - ldm \B!, { r4, r6, r8, ip } + ldm r1!, { r3, r5, r7, r10 } /* do four each loop */ + ldm r2!, { r4, r6, r8, ip } OPC r3, r3, r4 OPC r5, r5, r6 OPC r7, r7, r8 OPC r10, r10, ip teq r1, lr - sfi_breg r0, \ - stm \B!, { r3, r5, r7, r10 } + stm r0!, { r3, r5, r7, r10 } bne 2b 9: diff --git a/sysdeps/arm/addmul_1.S b/sysdeps/arm/addmul_1.S index a681c242ef..c4e54f8723 100644 --- a/sysdeps/arm/addmul_1.S +++ b/sysdeps/arm/addmul_1.S @@ -37,21 +37,16 @@ ENTRY (__mpn_addmul_1) cfi_rel_offset (r6, 8) cfi_rel_offset (r7, 12) - sfi_breg r1, \ - ldr r6, [\B], #4 - sfi_breg r0, \ - ldr r5, [\B] + ldr r6, [r1], #4 + ldr r5, [r0] mov r4, #0 /* init carry in */ b 1f 0: - sfi_breg r1, \ - ldr r6, [\B], #4 /* load next ul */ + ldr r6, [r1], #4 /* load next ul */ adds r7, r4, r5 /* (out, c) = cl + lpl */ - sfi_breg r0, \ - ldr r5, [\B, #4] /* load next rl */ + ldr r5, [r0, #4] /* load next rl */ adc r4, ip, #0 /* cl = hpl + c */ - sfi_breg r0, \ - str r7, [\B], #4 + str r7, [r0], #4 1: mov ip, #0 /* zero-extend rl */ umlal r5, ip, r6, r3 /* (hpl, lpl) = ul * vl + rl */ @@ -59,8 +54,7 @@ ENTRY (__mpn_addmul_1) bne 0b adds r4, r4, r5 /* (out, c) = cl + llpl */ - sfi_breg r0, \ - str r4, [\B] + str r4, [r0] adc r0, ip, #0 /* return hpl + c */ pop { r4, r5, r6, r7 } diff --git a/sysdeps/arm/arm-mcount.S b/sysdeps/arm/arm-mcount.S index b08a8dd0d8..df2601b4a6 100644 --- a/sysdeps/arm/arm-mcount.S +++ b/sysdeps/arm/arm-mcount.S @@ -90,8 +90,7 @@ ENTRY(__mcount_arm_compat) cfi_rel_offset (lr, 20) movs r0, fp ittt ne - sfi_breg r0, \ - ldrne r0, [\B, #-4] + ldrne r0, [r0, #-4] movsne r1, lr blne __mcount_internal # if defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__) diff --git a/sysdeps/arm/armv6/rawmemchr.S b/sysdeps/arm/armv6/rawmemchr.S index b4d4eb2e39..2511f105bd 100644 --- a/sysdeps/arm/armv6/rawmemchr.S +++ b/sysdeps/arm/armv6/rawmemchr.S @@ -25,8 +25,7 @@ ENTRY (__rawmemchr) @ r0 = start of string @ r1 = character to match @ returns a pointer to the match, which must be present. - sfi_breg r0, \ - ldrb r2, [\B] @ load first byte asap + ldrb r2, [r0] @ load first byte asap @ To cater to long strings, we want to search through a few @ characters until we reach an aligned pointer. To cater to @@ -42,8 +41,7 @@ ENTRY (__rawmemchr) bxeq lr @ Loop until we find ... -1: sfi_breg r0, \ - ldrb r2, [\B, #1]! +1: ldrb r2, [r0, #1]! subs r3, r3, #1 @ ... the alignment point it ne cmpne r2, r1 @ ... or C @@ -56,16 +54,15 @@ ENTRY (__rawmemchr) add r0, r0, #1 @ So now we're aligned. - sfi_breg r0, \ - ldrd r2, r3, [\B], #8 + ldrd r2, r3, [r0], #8 orr r1, r1, r1, lsl #8 @ Replicate C to all bytes #ifdef ARCH_HAS_T2 movw ip, #0x0101 - sfi_pld r0, #64 + pld [r0, #64] movt ip, #0x0101 #else ldr ip, =0x01010101 - sfi_pld r0, #64 + pld [r0, #64] #endif orr r1, r1, r1, lsl #16 @@ -77,11 +74,10 @@ ENTRY (__rawmemchr) eor r3, r3, r1 uqsub8 r2, ip, r2 @ Find C uqsub8 r3, ip, r3 - sfi_pld r0, #128 + pld [r0, #128] orrs r3, r3, r2 @ Test both words for found it eq - sfi_breg r0, \ - ldrdeq r2, r3, [\B], #8 + ldrdeq r2, r3, [r0], #8 beq 2b @ Found something. Disambiguate between first and second words. diff --git a/sysdeps/arm/armv6/strchr.S b/sysdeps/arm/armv6/strchr.S index f6fd192144..bfd0a6b237 100644 --- a/sysdeps/arm/armv6/strchr.S +++ b/sysdeps/arm/armv6/strchr.S @@ -25,8 +25,7 @@ ENTRY (strchr) @ r0 = start of string @ r1 = character to match @ returns NULL for no match, or a pointer to the match - sfi_breg r0, \ - ldrb r2, [\B] @ load the first byte asap + ldrb r2, [r0] @ load the first byte asap uxtb r1, r1 @ To cater to long strings, we want to search through a few @@ -43,8 +42,7 @@ ENTRY (strchr) beq 99f @ Loop until we find ... -1: sfi_breg r0, \ - ldrb r2, [\B, #1]! +1: ldrb r2, [r0, #1]! subs r3, r3, #1 @ ... the aligment point it ne cmpne r2, r1 @ ... or the character @@ -67,16 +65,15 @@ ENTRY (strchr) cfi_rel_offset (r6, 8) cfi_rel_offset (r7, 12) - sfi_breg r0, \ - ldrd r2, r3, [\B], #8 + ldrd r2, r3, [r0], #8 orr r1, r1, r1, lsl #8 @ Replicate C to all bytes #ifdef ARCH_HAS_T2 movw ip, #0x0101 - sfi_pld r0, #64 + pld [r0, #64] movt ip, #0x0101 #else ldr ip, =0x01010101 - sfi_pld r0, #64 + pld [r0, #64] #endif orr r1, r1, r1, lsl #16 @@ -90,14 +87,13 @@ ENTRY (strchr) uqsub8 r5, ip, r3 eor r7, r3, r1 uqsub8 r6, ip, r6 @ Find C - sfi_pld r0, #128 @ Prefetch 2 lines ahead + pld [r0, #128] @ Prefetch 2 lines ahead uqsub8 r7, ip, r7 orr r4, r4, r6 @ Combine found for EOS and C orr r5, r5, r7 orrs r6, r4, r5 @ Combine the two words it eq - sfi_breg r0, \ - ldrdeq r2, r3, [\B], #8 + ldrdeq r2, r3, [r0], #8 beq 2b @ Found something. Disambiguate between first and second words. diff --git a/sysdeps/arm/armv6/strcpy.S b/sysdeps/arm/armv6/strcpy.S index 239418c658..1b98dbce30 100644 --- a/sysdeps/arm/armv6/strcpy.S +++ b/sysdeps/arm/armv6/strcpy.S @@ -44,8 +44,8 @@ ENTRY (strcpy) @ Signal strcpy with DEST in IP. mov ip, r0 0: - sfi_pld r0 - sfi_pld r1 + pld [r0, #0] + pld [r1, #0] @ To cater to long strings, we want 8 byte alignment in the source. @ To cater to small strings, we don't want to start that right away. @@ -54,11 +54,9 @@ ENTRY (strcpy) rsb r3, r3, #16 @ Loop until we find ... -1: sfi_breg r1, \ - ldrb r2, [\B], #1 +1: ldrb r2, [r1], #1 subs r3, r3, #1 @ ... the alignment point - sfi_breg r0, \ - strb r2, [\B], #1 + strb r2, [r0], #1 it ne cmpne r2, #0 @ ... or EOS bne 1b @@ -68,10 +66,9 @@ ENTRY (strcpy) beq .Lreturn @ Load the next two words asap - sfi_breg r1, \ - ldrd r2, r3, [\B], #8 - sfi_pld r0, #64 - sfi_pld r1, #64 + ldrd r2, r3, [r1], #8 + pld [r0, #64] + pld [r1, #64] @ For longer strings, we actaully need a stack frame. push { r4, r5, r6, r7 } @@ -99,18 +96,15 @@ ENTRY (strcpy) .balign 16 2: uqsub8 r4, r7, r2 @ Find EOS uqsub8 r5, r7, r3 - sfi_pld r1, #128 + pld [r1, #128] cmp r4, #0 @ EOS in first word? - sfi_pld r0, #128 + pld [r0, #128] bne 3f - sfi_breg r0, \ - str r2, [\B], #4 + str r2, [r0], #4 cmp r5, #0 @ EOS in second word? bne 4f - sfi_breg r0, \ - str r3, [\B], #4 - sfi_breg r1, \ - ldrd r2, r3, [\B], #8 + str r3, [r0], #4 + ldrd r2, r3, [r1], #8 b 2b 3: sub r1, r1, #4 @ backup to first word @@ -120,11 +114,9 @@ ENTRY (strcpy) @ Note that we generally back up and re-read source bytes, @ but we'll not re-write dest bytes. .Lbyte_loop: - sfi_breg r1, \ - ldrb r2, [\B], #1 + ldrb r2, [r1], #1 cmp r2, #0 - sfi_breg r0, \ - strb r2, [\B], #1 + strb r2, [r0], #1 bne .Lbyte_loop pop { r4, r5, r6, r7 } @@ -169,8 +161,7 @@ ENTRY (strcpy) @ Store a few bytes from the first word. @ At the same time we align r0 and shift out bytes from r2. .rept 4-\unalign - sfi_breg r0, \ - strb r2, [\B], #1 + strb r2, [r0], #1 lsr r2, r2, #8 .endr #ifdef __ARMEB__ @@ -185,23 +176,20 @@ ENTRY (strcpy) orr r2, r2, r3, lsh_gt #(\unalign*8) @ Save leftover bytes from the two words lsh_ls r6, r3, #((4-\unalign)*8) - sfi_breg r0, \ - str r2, [\B], #4 + str r2, [r0], #4 @ The "real" start of the unaligned copy loop. - sfi_breg r1, \ - ldrd r2, r3, [\B], #8 @ Load 8 more bytes + ldrd r2, r3, [r1], #8 @ Load 8 more bytes uqsub8 r4, r7, r2 @ Find EOS - sfi_pld r1, #128 + pld [r1, #128] uqsub8 r5, r7, r3 - sfi_pld r0, #128 + pld [r0, #128] cmp r4, #0 @ EOS in first word? bne 3f @ Combine the leftover and the first word orr r6, r6, r2, lsh_gt #(\unalign*8) @ Discard used bytes from the first word. lsh_ls r2, r2, #((4-\unalign)*8) - sfi_breg r0, \ - str r6, [\B], #4 + str r6, [r0], #4 b 1b @ Found EOS in one of the words; adjust backward 3: sub r1, r1, #4 @@ -212,8 +200,7 @@ ENTRY (strcpy) rev r2, r2 #endif .rept \unalign - sfi_breg r0, \ - strb r2, [\B], #1 + strb r2, [r0], #1 lsr r2, r2, #8 .endr b .Lbyte_loop diff --git a/sysdeps/arm/armv6/strlen.S b/sysdeps/arm/armv6/strlen.S index f807f810fb..64a971f2de 100644 --- a/sysdeps/arm/armv6/strlen.S +++ b/sysdeps/arm/armv6/strlen.S @@ -23,8 +23,7 @@ ENTRY (strlen) @ r0 = start of string - sfi_breg r0, \ - ldrb r2, [\B] @ load the first byte asap + ldrb r2, [r0] @ load the first byte asap @ To cater to long strings, we want to search through a few @ characters until we reach an aligned pointer. To cater to @@ -39,8 +38,7 @@ ENTRY (strlen) beq 99f @ Loop until we find ... -1: sfi_breg r0, \ - ldrb r2, [\B, #1]! +1: ldrb r2, [r0, #1]! subs r3, r3, #1 @ ... the aligment point it ne cmpne r2, #0 @ ... or EOS @@ -52,15 +50,14 @@ ENTRY (strlen) add r0, r0, #1 @ So now we're aligned. - sfi_breg r0, \ - ldrd r2, r3, [\B], #8 + ldrd r2, r3, [r0], #8 #ifdef ARCH_HAS_T2 movw ip, #0x0101 - sfi_pld r0, #64 + pld [r0, #64] movt ip, #0x0101 #else ldr ip, =0x01010101 - sfi_pld r0, #64 + pld [r0, #64] #endif @ Loop searching for EOS, 8 bytes at a time. @@ -70,11 +67,10 @@ ENTRY (strlen) .balign 16 2: uqsub8 r2, ip, r2 @ Find EOS uqsub8 r3, ip, r3 - sfi_pld r0, #128 @ Prefetch 2 lines ahead + pld [r0, #128] @ Prefetch 2 lines ahead orrs r3, r3, r2 @ Combine the two words it eq - sfi_breg r0, \ - ldrdeq r2, r3, [\B], #8 + ldrdeq r2, r3, [r0], #8 beq 2b @ Found something. Disambiguate between first and second words. diff --git a/sysdeps/arm/armv6/strrchr.S b/sysdeps/arm/armv6/strrchr.S index d6db032a38..e6eea01816 100644 --- a/sysdeps/arm/armv6/strrchr.S +++ b/sysdeps/arm/armv6/strrchr.S @@ -33,8 +33,7 @@ ENTRY (strrchr) @ Loop a few times until we're aligned. tst r3, #7 beq 2f -1: sfi_breg r3, \ - ldrb r2, [\B], #1 +1: ldrb r2, [r3], #1 cmp r2, r1 @ Find the character it eq subeq r0, r3, #1 @@ -65,8 +64,7 @@ ENTRY (strrchr) @ Loop searching for EOS and C, 8 bytes at a time. @ Any time we find a match in a word, we copy the address of @ the word to r0, and the found bits to r2. -3: sfi_breg r3, \ - ldrd r4, r5, [\B], #8 +3: ldrd r4, r5, [r3], #8 @ Subtracting (unsigned saturating) from 1 means result of 1 for @ any byte that was originally zero and 0 otherwise. Therefore @ we consider the lsb of each byte the "found" bit. diff --git a/sysdeps/arm/armv6t2/memchr.S b/sysdeps/arm/armv6t2/memchr.S index c012d73d38..fb4dc8efa3 100644 --- a/sysdeps/arm/armv6t2/memchr.S +++ b/sysdeps/arm/armv6t2/memchr.S @@ -65,8 +65,7 @@ ENTRY(memchr) @ Work up to an aligned point 5: - sfi_breg r0, \ - ldrb r3, [\B],#1 + ldrb r3, [r0],#1 subs r2, r2, #1 cmp r3, r1 beq 50f @ If it matches exit found @@ -91,8 +90,7 @@ ENTRY(memchr) movs r3, #0 15: - sfi_breg r0, \ - ldrd r4,r5, [\B],#8 + ldrd r4,r5, [r0],#8 #ifndef NO_THUMB subs r6, r6, #8 #endif @@ -130,8 +128,7 @@ ENTRY(memchr) #endif 21: @ Post aligned section, or just a short call - sfi_breg r0, \ - ldrb r3,[\B],#1 + ldrb r3,[r0],#1 #ifndef NO_THUMB subs r2,r2,#1 eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub diff --git a/sysdeps/arm/armv6t2/strlen.S b/sysdeps/arm/armv6t2/strlen.S index 4795c54ae5..c72a1e73be 100644 --- a/sysdeps/arm/armv6t2/strlen.S +++ b/sysdeps/arm/armv6t2/strlen.S @@ -67,7 +67,7 @@ .text .p2align 6 ENTRY(strlen) - sfi_pld srcin, #0 + pld [srcin, #0] strd r4, r5, [sp, #-8]! cfi_adjust_cfa_offset (8) cfi_rel_offset (r4, 0) @@ -76,15 +76,14 @@ ENTRY(strlen) bic src, srcin, #7 mvn const_m1, #0 ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */ - sfi_pld src, #32 + pld [src, #32] bne.w .Lmisaligned8 mov const_0, #0 mov result, #-8 .Lloop_aligned: /* Bytes 0-7. */ - sfi_breg src, \ - ldrd data1a, data1b, [\B] - sfi_pld src, #64 + ldrd data1a, data1b, [src] + pld [src, #64] add result, result, #8 .Lstart_realigned: uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ @@ -94,8 +93,7 @@ ENTRY(strlen) cbnz data1b, .Lnull_found /* Bytes 8-15. */ - sfi_breg src, \ - ldrd data1a, data1b, [\B, #8] + ldrd data1a, data1b, [src, #8] uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ add result, result, #8 sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */ @@ -104,8 +102,7 @@ ENTRY(strlen) cbnz data1b, .Lnull_found /* Bytes 16-23. */ - sfi_breg src, \ - ldrd data1a, data1b, [\B, #16] + ldrd data1a, data1b, [src, #16] uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ add result, result, #8 sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */ @@ -114,8 +111,7 @@ ENTRY(strlen) cbnz data1b, .Lnull_found /* Bytes 24-31. */ - sfi_breg src, \ - ldrd data1a, data1b, [\B, #24] + ldrd data1a, data1b, [src, #24] add src, src, #32 uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ add result, result, #8 @@ -143,13 +139,12 @@ ENTRY(strlen) .Lmisaligned8: cfi_restore_state - sfi_breg src, \ - ldrd data1a, data1b, [\B] + ldrd data1a, data1b, [src] and tmp2, tmp1, #3 rsb result, tmp1, #0 lsl tmp2, tmp2, #3 /* Bytes -> bits. */ tst tmp1, #4 - sfi_pld src, #64 + pld [src, #64] S2HI tmp2, const_m1, tmp2 #ifdef NO_THUMB mvn tmp1, tmp2 diff --git a/sysdeps/arm/armv7/multiarch/memcpy_impl.S b/sysdeps/arm/armv7/multiarch/memcpy_impl.S index 5d5a3cefaa..c1b9fb0ab5 100644 --- a/sysdeps/arm/armv7/multiarch/memcpy_impl.S +++ b/sysdeps/arm/armv7/multiarch/memcpy_impl.S @@ -226,71 +226,40 @@ #ifdef USE_VFP .macro cpy_line_vfp vreg, base - sfi_breg dst, \ - vstr \vreg, [\B, #\base] - sfi_breg src, \ - vldr \vreg, [\B, #\base] - sfi_breg dst, \ - vstr d0, [\B, #\base + 8] - sfi_breg src, \ - vldr d0, [\B, #\base + 8] - sfi_breg dst, \ - vstr d1, [\B, #\base + 16] - sfi_breg src, \ - vldr d1, [\B, #\base + 16] - sfi_breg dst, \ - vstr d2, [\B, #\base + 24] - sfi_breg src, \ - vldr d2, [\B, #\base + 24] - sfi_breg dst, \ - vstr \vreg, [\B, #\base + 32] - sfi_breg src, \ - vldr \vreg, [\B, #\base + prefetch_lines * 64 - 32] - sfi_breg dst, \ - vstr d0, [\B, #\base + 40] - sfi_breg src, \ - vldr d0, [\B, #\base + 40] - sfi_breg dst, \ - vstr d1, [\B, #\base + 48] - sfi_breg src, \ - vldr d1, [\B, #\base + 48] - sfi_breg dst, \ - vstr d2, [\B, #\base + 56] - sfi_breg src, \ - vldr d2, [\B, #\base + 56] + vstr \vreg, [dst, #\base] + vldr \vreg, [src, #\base] + vstr d0, [dst, #\base + 8] + vldr d0, [src, #\base + 8] + vstr d1, [dst, #\base + 16] + vldr d1, [src, #\base + 16] + vstr d2, [dst, #\base + 24] + vldr d2, [src, #\base + 24] + vstr \vreg, [dst, #\base + 32] + vldr \vreg, [src, #\base + prefetch_lines * 64 - 32] + vstr d0, [dst, #\base + 40] + vldr d0, [src, #\base + 40] + vstr d1, [dst, #\base + 48] + vldr d1, [src, #\base + 48] + vstr d2, [dst, #\base + 56] + vldr d2, [src, #\base + 56] .endm .macro cpy_tail_vfp vreg, base - sfi_breg dst, \ - vstr \vreg, [\B, #\base] - sfi_breg src, \ - vldr \vreg, [\B, #\base] - sfi_breg dst, \ - vstr d0, [\B, #\base + 8] - sfi_breg src, \ - vldr d0, [\B, #\base + 8] - sfi_breg dst, \ - vstr d1, [\B, #\base + 16] - sfi_breg src, \ - vldr d1, [\B, #\base + 16] - sfi_breg dst, \ - vstr d2, [\B, #\base + 24] - sfi_breg src, \ - vldr d2, [\B, #\base + 24] - sfi_breg dst, \ - vstr \vreg, [\B, #\base + 32] - sfi_breg dst, \ - vstr d0, [\B, #\base + 40] - sfi_breg src, \ - vldr d0, [\B, #\base + 40] - sfi_breg dst, \ - vstr d1, [\B, #\base + 48] - sfi_breg src, \ - vldr d1, [\B, #\base + 48] - sfi_breg dst, \ - vstr d2, [\B, #\base + 56] - sfi_breg src, \ - vldr d2, [\B, #\base + 56] + vstr \vreg, [dst, #\base] + vldr \vreg, [src, #\base] + vstr d0, [dst, #\base + 8] + vldr d0, [src, #\base + 8] + vstr d1, [dst, #\base + 16] + vldr d1, [src, #\base + 16] + vstr d2, [dst, #\base + 24] + vldr d2, [src, #\base + 24] + vstr \vreg, [dst, #\base + 32] + vstr d0, [dst, #\base + 40] + vldr d0, [src, #\base + 40] + vstr d1, [dst, #\base + 48] + vldr d1, [src, #\base + 48] + vstr d2, [dst, #\base + 56] + vldr d2, [src, #\base + 56] .endm #endif @@ -316,26 +285,16 @@ ENTRY(memcpy) vst1.8 {d0}, [\reg]! .endm - /* These are used by the NaCl sfi_breg macro. */ - .macro _sfi_breg_dmask_neon_load_d0 reg - _sfi_dmask \reg - .endm - .macro _sfi_breg_dmask_neon_store_d0 reg - _sfi_dmask \reg - .endm - and tmp1, count, #0x38 .macro dispatch_step i - sfi_breg src, neon_load_d0 \B - sfi_breg dst, neon_store_d0 \B + neon_load_d0 src + neon_store_d0 dst .endm dispatch_7_dword tst count, #4 - sfi_breg src, \ - ldrne tmp1, [\B], #4 - sfi_breg dst, \ - strne tmp1, [\B], #4 + ldrne tmp1, [src], #4 + strne tmp1, [dst], #4 #else /* Copy up to 15 full words of data. May not be aligned. */ /* Cannot use VFP for unaligned data. */ @@ -344,23 +303,17 @@ ENTRY(memcpy) add src, src, tmp1 /* Jump directly into the sequence below at the correct offset. */ .macro dispatch_step i - sfi_breg src, \ - ldr tmp1, [\B, #-(\i * 4)] - sfi_breg dst, \ - str tmp1, [\B, #-(\i * 4)] + ldr tmp1, [src, #-(\i * 4)] + str tmp1, [dst, #-(\i * 4)] .endm dispatch_15_word #endif lsls count, count, #31 - sfi_breg src, \ - ldrhcs tmp1, [\B], #2 - sfi_breg src, \ - ldrbne src, [\B] /* Src is dead, use as a scratch. */ - sfi_breg dst, \ - strhcs tmp1, [\B], #2 - sfi_breg dst, \ - strbne src, [\B] + ldrhcs tmp1, [src], #2 + ldrbne src, [src] /* Src is dead, use as a scratch. */ + strhcs tmp1, [dst], #2 + strbne src, [dst] bx lr .Lcpy_not_short: @@ -388,19 +341,13 @@ ENTRY(memcpy) beq 1f rsbs tmp2, tmp2, #0 sub count, count, tmp2, lsr #29 - sfi_breg src, \ - ldrmi tmp1, [\B], #4 - sfi_breg dst, \ - strmi tmp1, [\B], #4 + ldrmi tmp1, [src], #4 + strmi tmp1, [dst], #4 lsls tmp2, tmp2, #2 - sfi_breg src, \ - ldrhcs tmp1, [\B], #2 - sfi_breg src, \ - ldrbne tmp2, [\B], #1 - sfi_breg dst, \ - strhcs tmp1, [\B], #2 - sfi_breg dst, \ - strbne tmp2, [\B], #1 + ldrhcs tmp1, [src], #2 + ldrbne tmp2, [src], #1 + strhcs tmp1, [dst], #2 + strbne tmp2, [dst], #1 1: subs tmp2, count, #64 /* Use tmp2 for count. */ @@ -412,40 +359,24 @@ ENTRY(memcpy) .Lcpy_body_medium: /* Count in tmp2. */ #ifdef USE_VFP 1: - sfi_breg src, \ - vldr d0, [\B, #0] + vldr d0, [src, #0] subs tmp2, tmp2, #64 - sfi_breg src, \ - vldr d1, [\B, #8] - sfi_breg dst, \ - vstr d0, [\B, #0] - sfi_breg src, \ - vldr d0, [\B, #16] - sfi_breg dst, \ - vstr d1, [\B, #8] - sfi_breg src, \ - vldr d1, [\B, #24] - sfi_breg dst, \ - vstr d0, [\B, #16] - sfi_breg src, \ - vldr d0, [\B, #32] - sfi_breg dst, \ - vstr d1, [\B, #24] - sfi_breg src, \ - vldr d1, [\B, #40] - sfi_breg dst, \ - vstr d0, [\B, #32] - sfi_breg src, \ - vldr d0, [\B, #48] - sfi_breg dst, \ - vstr d1, [\B, #40] - sfi_breg src, \ - vldr d1, [\B, #56] - sfi_breg dst, \ - vstr d0, [\B, #48] + vldr d1, [src, #8] + vstr d0, [dst, #0] + vldr d0, [src, #16] + vstr d1, [dst, #8] + vldr d1, [src, #24] + vstr d0, [dst, #16] + vldr d0, [src, #32] + vstr d1, [dst, #24] + vldr d1, [src, #40] + vstr d0, [dst, #32] + vldr d0, [src, #48] + vstr d1, [dst, #40] + vldr d1, [src, #56] + vstr d0, [dst, #48] add src, src, #64 - sfi_breg dst, \ - vstr d1, [\B, #56] + vstr d1, [dst, #56] add dst, dst, #64 bge 1b tst tmp2, #0x3f @@ -456,48 +387,30 @@ ENTRY(memcpy) add dst, dst, tmp1 add src, src, tmp1 .macro dispatch_step i - sfi_breg src, \ - vldr d0, [\B, #-(\i * 8)] - sfi_breg dst, \ - vstr d0, [\B, #-(\i * 8)] + vldr d0, [src, #-(\i * 8)] + vstr d0, [dst, #-(\i * 8)] .endm dispatch_7_dword #else sub src, src, #8 sub dst, dst, #8 1: - sfi_breg src, \ - ldrd A_l, A_h, [\B, #8] - sfi_breg dst, \ - strd A_l, A_h, [\B, #8] - sfi_breg src, \ - ldrd A_l, A_h, [\B, #16] - sfi_breg dst, \ - strd A_l, A_h, [\B, #16] - sfi_breg src, \ - ldrd A_l, A_h, [\B, #24] - sfi_breg dst, \ - strd A_l, A_h, [\B, #24] - sfi_breg src, \ - ldrd A_l, A_h, [\B, #32] - sfi_breg dst, \ - strd A_l, A_h, [\B, #32] - sfi_breg src, \ - ldrd A_l, A_h, [\B, #40] - sfi_breg dst, \ - strd A_l, A_h, [\B, #40] - sfi_breg src, \ - ldrd A_l, A_h, [\B, #48] - sfi_breg dst, \ - strd A_l, A_h, [\B, #48] - sfi_breg src, \ - ldrd A_l, A_h, [\B, #56] - sfi_breg dst, \ - strd A_l, A_h, [\B, #56] - sfi_breg src, \ - ldrd A_l, A_h, [\B, #64]! - sfi_breg dst, \ - strd A_l, A_h, [\B, #64]! + ldrd A_l, A_h, [src, #8] + strd A_l, A_h, [dst, #8] + ldrd A_l, A_h, [src, #16] + strd A_l, A_h, [dst, #16] + ldrd A_l, A_h, [src, #24] + strd A_l, A_h, [dst, #24] + ldrd A_l, A_h, [src, #32] + strd A_l, A_h, [dst, #32] + ldrd A_l, A_h, [src, #40] + strd A_l, A_h, [dst, #40] + ldrd A_l, A_h, [src, #48] + strd A_l, A_h, [dst, #48] + ldrd A_l, A_h, [src, #56] + strd A_l, A_h, [dst, #56] + ldrd A_l, A_h, [src, #64]! + strd A_l, A_h, [dst, #64]! subs tmp2, tmp2, #64 bge 1b tst tmp2, #0x3f @@ -524,28 +437,20 @@ ENTRY(memcpy) add dst, dst, tmp1 add src, src, tmp1 .macro dispatch_step i - sfi_breg src, \ - ldrd A_l, A_h, [\B, #-(\i * 8)] - sfi_breg dst, \ - strd A_l, A_h, [\B, #-(\i * 8)] + ldrd A_l, A_h, [src, #-(\i * 8)] + strd A_l, A_h, [dst, #-(\i * 8)] .endm dispatch_7_dword #endif tst tmp2, #4 - sfi_breg src, \ - ldrne tmp1, [\B], #4 - sfi_breg dst, \ - strne tmp1, [\B], #4 + ldrne tmp1, [src], #4 + strne tmp1, [dst], #4 lsls tmp2, tmp2, #31 /* Count (tmp2) now dead. */ - sfi_breg src, \ - ldrhcs tmp1, [\B], #2 - sfi_breg src, \ - ldrbne tmp2, [\B] - sfi_breg dst, \ - strhcs tmp1, [\B], #2 - sfi_breg dst, \ - strbne tmp2, [\B] + ldrhcs tmp1, [src], #2 + ldrbne tmp2, [src] + strhcs tmp1, [dst], #2 + strbne tmp2, [dst] .Ldone: ldr tmp2, [sp], #FRAME_SIZE @@ -565,23 +470,15 @@ ENTRY(memcpy) copy position into a register. This should act like a PLD operation but we won't have to repeat the transfer. */ - sfi_breg src, \ - vldr d3, [\B, #0] - sfi_breg src, \ - vldr d4, [\B, #64] - sfi_breg src, \ - vldr d5, [\B, #128] - sfi_breg src, \ - vldr d6, [\B, #192] - sfi_breg src, \ - vldr d7, [\B, #256] + vldr d3, [src, #0] + vldr d4, [src, #64] + vldr d5, [src, #128] + vldr d6, [src, #192] + vldr d7, [src, #256] - sfi_breg src, \ - vldr d0, [\B, #8] - sfi_breg src, \ - vldr d1, [\B, #16] - sfi_breg src, \ - vldr d2, [\B, #24] + vldr d0, [src, #8] + vldr d1, [src, #16] + vldr d2, [src, #24] add src, src, #32 subs tmp2, tmp2, #prefetch_lines * 64 * 2 @@ -606,31 +503,19 @@ ENTRY(memcpy) add src, src, #3 * 64 add dst, dst, #3 * 64 cpy_tail_vfp d6, 0 - sfi_breg dst, \ - vstr d7, [\B, #64] - sfi_breg src, \ - vldr d7, [\B, #64] - sfi_breg dst, \ - vstr d0, [\B, #64 + 8] - sfi_breg src, \ - vldr d0, [\B, #64 + 8] - sfi_breg dst, \ - vstr d1, [\B, #64 + 16] - sfi_breg src, \ - vldr d1, [\B, #64 + 16] - sfi_breg dst, \ - vstr d2, [\B, #64 + 24] - sfi_breg src, \ - vldr d2, [\B, #64 + 24] - sfi_breg dst, \ - vstr d7, [\B, #64 + 32] + vstr d7, [dst, #64] + vldr d7, [src, #64] + vstr d0, [dst, #64 + 8] + vldr d0, [src, #64 + 8] + vstr d1, [dst, #64 + 16] + vldr d1, [src, #64 + 16] + vstr d2, [dst, #64 + 24] + vldr d2, [src, #64 + 24] + vstr d7, [dst, #64 + 32] add src, src, #96 - sfi_breg dst, \ - vstr d0, [\B, #64 + 40] - sfi_breg dst, \ - vstr d1, [\B, #64 + 48] - sfi_breg dst, \ - vstr d2, [\B, #64 + 56] + vstr d0, [dst, #64 + 40] + vstr d1, [dst, #64 + 48] + vstr d2, [dst, #64 + 56] add dst, dst, #128 add tmp2, tmp2, #prefetch_lines * 64 b .Lcpy_body_medium @@ -641,83 +526,59 @@ ENTRY(memcpy) /* Pre-bias src and dst. */ sub src, src, #8 sub dst, dst, #8 - sfi_pld src, #8 - sfi_pld src, #72 + pld [src, #8] + pld [src, #72] subs tmp2, tmp2, #64 - sfi_pld src, #136 - sfi_breg src, \ - ldrd A_l, A_h, [\B, #8] + pld [src, #136] + ldrd A_l, A_h, [src, #8] strd B_l, B_h, [sp, #8] cfi_rel_offset (B_l, 8) cfi_rel_offset (B_h, 12) - sfi_breg src, \ - ldrd B_l, B_h, [\B, #16] + ldrd B_l, B_h, [src, #16] strd C_l, C_h, [sp, #16] cfi_rel_offset (C_l, 16) cfi_rel_offset (C_h, 20) - sfi_breg src, \ - ldrd C_l, C_h, [\B, #24] + ldrd C_l, C_h, [src, #24] strd D_l, D_h, [sp, #24] cfi_rel_offset (D_l, 24) cfi_rel_offset (D_h, 28) - sfi_pld src, #200 - sfi_breg src, \ - ldrd D_l, D_h, [\B, #32]! + pld [src, #200] + ldrd D_l, D_h, [src, #32]! b 1f .p2align 6 2: - sfi_pld src, #232 - sfi_breg dst, \ - strd A_l, A_h, [\B, #40] - sfi_breg src, \ - ldrd A_l, A_h, [\B, #40] - sfi_breg dst, \ - strd B_l, B_h, [\B, #48] - sfi_breg src, \ - ldrd B_l, B_h, [\B, #48] - sfi_breg dst, \ - strd C_l, C_h, [\B, #56] - sfi_breg src, \ - ldrd C_l, C_h, [\B, #56] - sfi_breg dst, \ - strd D_l, D_h, [\B, #64]! - sfi_breg src, \ - ldrd D_l, D_h, [\B, #64]! + pld [src, #232] + strd A_l, A_h, [dst, #40] + ldrd A_l, A_h, [src, #40] + strd B_l, B_h, [dst, #48] + ldrd B_l, B_h, [src, #48] + strd C_l, C_h, [dst, #56] + ldrd C_l, C_h, [src, #56] + strd D_l, D_h, [dst, #64]! + ldrd D_l, D_h, [src, #64]! subs tmp2, tmp2, #64 1: - sfi_breg dst, \ - strd A_l, A_h, [\B, #8] - sfi_breg src, \ - ldrd A_l, A_h, [\B, #8] - sfi_breg dst, \ - strd B_l, B_h, [\B, #16] - sfi_breg src, \ - ldrd B_l, B_h, [\B, #16] - sfi_breg dst, \ - strd C_l, C_h, [\B, #24] - sfi_breg src, \ - ldrd C_l, C_h, [\B, #24] - sfi_breg dst, \ - strd D_l, D_h, [\B, #32] - sfi_breg src, \ - ldrd D_l, D_h, [\B, #32] + strd A_l, A_h, [dst, #8] + ldrd A_l, A_h, [src, #8] + strd B_l, B_h, [dst, #16] + ldrd B_l, B_h, [src, #16] + strd C_l, C_h, [dst, #24] + ldrd C_l, C_h, [src, #24] + strd D_l, D_h, [dst, #32] + ldrd D_l, D_h, [src, #32] bcs 2b /* Save the remaining bytes and restore the callee-saved regs. */ - sfi_breg dst, \ - strd A_l, A_h, [\B, #40] + strd A_l, A_h, [dst, #40] add src, src, #40 - sfi_breg dst, \ - strd B_l, B_h, [\B, #48] + strd B_l, B_h, [dst, #48] ldrd B_l, B_h, [sp, #8] cfi_restore (B_l) cfi_restore (B_h) - sfi_breg dst, \ - strd C_l, C_h, [\B, #56] + strd C_l, C_h, [dst, #56] ldrd C_l, C_h, [sp, #16] cfi_restore (C_l) cfi_restore (C_h) - sfi_breg dst, \ - strd D_l, D_h, [\B, #64] + strd D_l, D_h, [dst, #64] ldrd D_l, D_h, [sp, #24] cfi_restore (D_l) cfi_restore (D_h) @@ -734,35 +595,29 @@ ENTRY(memcpy) cfi_remember_state .Lcpy_notaligned: - sfi_pld src - sfi_pld src, #64 + pld [src, #0] + pld [src, #64] /* There's at least 64 bytes to copy, but there is no mutual alignment. */ /* Bring DST to 64-bit alignment. */ lsls tmp2, dst, #29 - sfi_pld src, #(2 * 64) + pld [src, #(2 * 64)] beq 1f rsbs tmp2, tmp2, #0 sub count, count, tmp2, lsr #29 - sfi_breg src, \ - ldrmi tmp1, [\B], #4 - sfi_breg dst, \ - strmi tmp1, [\B], #4 + ldrmi tmp1, [src], #4 + strmi tmp1, [dst], #4 lsls tmp2, tmp2, #2 - sfi_breg src, \ - ldrbne tmp1, [\B], #1 - sfi_breg src, \ - ldrhcs tmp2, [\B], #2 - sfi_breg dst, \ - strbne tmp1, [\B], #1 - sfi_breg dst, \ - strhcs tmp2, [\B], #2 + ldrbne tmp1, [src], #1 + ldrhcs tmp2, [src], #2 + strbne tmp1, [dst], #1 + strhcs tmp2, [dst], #2 1: - sfi_pld src, #(3 * 64) + pld [src, #(3 * 64)] subs count, count, #64 ldrmi tmp2, [sp], #FRAME_SIZE bmi .Ltail63unaligned - sfi_pld src, #(4 * 64) + pld [src, #(4 * 64)] #ifdef USE_NEON /* These need an extra layer of macro just to work around a @@ -775,132 +630,88 @@ ENTRY(memcpy) vst1.8 {\reglist}, [ALIGN (\basereg, 64)]! .endm - /* These are used by the NaCl sfi_breg macro. */ - .macro _sfi_breg_dmask_neon_load_multi reg - _sfi_dmask \reg - .endm - .macro _sfi_breg_dmask_neon_store_multi reg - _sfi_dmask \reg - .endm - - sfi_breg src, neon_load_multi d0-d3, \B - sfi_breg src, neon_load_multi d4-d7, \B + neon_load_multi d0-d3, src + neon_load_multi d4-d7, src subs count, count, #64 bmi 2f 1: - sfi_pld src, #(4 * 64) - sfi_breg dst, neon_store_multi d0-d3, \B - sfi_breg src, neon_load_multi d0-d3, \B - sfi_breg dst, neon_store_multi d4-d7, \B - sfi_breg src, neon_load_multi d4-d7, \B + pld [src, #(4 * 64)] + neon_store_multi d0-d3, dst + neon_load_multi d0-d3, src + neon_store_multi d4-d7, dst + neon_load_multi d4-d7, src subs count, count, #64 bpl 1b 2: - sfi_breg dst, neon_store_multi d0-d3, \B - sfi_breg dst, neon_store_multi d4-d7, \B + neon_store_multi d0-d3, dst + neon_store_multi d4-d7, dst ands count, count, #0x3f #else /* Use an SMS style loop to maximize the I/O bandwidth. */ sub src, src, #4 sub dst, dst, #8 subs tmp2, count, #64 /* Use tmp2 for count. */ - sfi_breg src, \ - ldr A_l, [\B, #4] - sfi_breg src, \ - ldr A_h, [\B, #8] + ldr A_l, [src, #4] + ldr A_h, [src, #8] strd B_l, B_h, [sp, #8] cfi_rel_offset (B_l, 8) cfi_rel_offset (B_h, 12) - sfi_breg src, \ - ldr B_l, [\B, #12] - sfi_breg src, \ - ldr B_h, [\B, #16] + ldr B_l, [src, #12] + ldr B_h, [src, #16] strd C_l, C_h, [sp, #16] cfi_rel_offset (C_l, 16) cfi_rel_offset (C_h, 20) - sfi_breg src, \ - ldr C_l, [\B, #20] - sfi_breg src, \ - ldr C_h, [\B, #24] + ldr C_l, [src, #20] + ldr C_h, [src, #24] strd D_l, D_h, [sp, #24] cfi_rel_offset (D_l, 24) cfi_rel_offset (D_h, 28) - sfi_breg src, \ - ldr D_l, [\B, #28] - sfi_breg src, \ - ldr D_h, [\B, #32]! + ldr D_l, [src, #28] + ldr D_h, [src, #32]! b 1f .p2align 6 2: - sfi_pld src, #(5 * 64) - (32 - 4) - sfi_breg dst, \ - strd A_l, A_h, [\B, #40] - sfi_breg src, \ - ldr A_l, [\B, #36] - sfi_breg src, \ - ldr A_h, [\B, #40] - sfi_breg dst, \ - strd B_l, B_h, [\B, #48] - sfi_breg src, \ - ldr B_l, [\B, #44] - sfi_breg src, \ - ldr B_h, [\B, #48] - sfi_breg dst, \ - strd C_l, C_h, [\B, #56] - sfi_breg src, \ - ldr C_l, [\B, #52] - sfi_breg src, \ - ldr C_h, [\B, #56] - sfi_breg dst, \ - strd D_l, D_h, [\B, #64]! - sfi_breg src, \ - ldr D_l, [\B, #60] - sfi_breg src, \ - ldr D_h, [\B, #64]! + pld [src, #(5 * 64) - (32 - 4)] + strd A_l, A_h, [dst, #40] + ldr A_l, [src, #36] + ldr A_h, [src, #40] + strd B_l, B_h, [dst, #48] + ldr B_l, [src, #44] + ldr B_h, [src, #48] + strd C_l, C_h, [dst, #56] + ldr C_l, [src, #52] + ldr C_h, [src, #56] + strd D_l, D_h, [dst, #64]! + ldr D_l, [src, #60] + ldr D_h, [src, #64]! subs tmp2, tmp2, #64 1: - sfi_breg dst, \ - strd A_l, A_h, [\B, #8] - sfi_breg src, \ - ldr A_l, [\B, #4] - sfi_breg src, \ - ldr A_h, [\B, #8] - sfi_breg dst, \ - strd B_l, B_h, [\B, #16] - sfi_breg src, \ - ldr B_l, [\B, #12] - sfi_breg src, \ - ldr B_h, [\B, #16] - sfi_breg dst, \ - strd C_l, C_h, [\B, #24] - sfi_breg src, \ - ldr C_l, [\B, #20] - sfi_breg src, \ - ldr C_h, [\B, #24] - sfi_breg dst, \ - strd D_l, D_h, [\B, #32] - sfi_breg src, \ - ldr D_l, [\B, #28] - sfi_breg src, \ - ldr D_h, [\B, #32] + strd A_l, A_h, [dst, #8] + ldr A_l, [src, #4] + ldr A_h, [src, #8] + strd B_l, B_h, [dst, #16] + ldr B_l, [src, #12] + ldr B_h, [src, #16] + strd C_l, C_h, [dst, #24] + ldr C_l, [src, #20] + ldr C_h, [src, #24] + strd D_l, D_h, [dst, #32] + ldr D_l, [src, #28] + ldr D_h, [src, #32] bcs 2b /* Save the remaining bytes and restore the callee-saved regs. */ - sfi_breg dst, \ - strd A_l, A_h, [\B, #40] + strd A_l, A_h, [dst, #40] add src, src, #36 - sfi_breg dst, \ - strd B_l, B_h, [\B, #48] + strd B_l, B_h, [dst, #48] ldrd B_l, B_h, [sp, #8] cfi_restore (B_l) cfi_restore (B_h) - sfi_breg dst, \ - strd C_l, C_h, [\B, #56] + strd C_l, C_h, [dst, #56] ldrd C_l, C_h, [sp, #16] cfi_restore (C_l) cfi_restore (C_h) - sfi_breg dst, \ - strd D_l, D_h, [\B, #64] + strd D_l, D_h, [dst, #64] ldrd D_l, D_h, [sp, #24] cfi_restore (D_l) cfi_restore (D_h) diff --git a/sysdeps/arm/armv7/strcmp.S b/sysdeps/arm/armv7/strcmp.S index c8fab4ba0f..25d055754e 100644 --- a/sysdeps/arm/armv7/strcmp.S +++ b/sysdeps/arm/armv7/strcmp.S @@ -178,10 +178,8 @@ #endif ENTRY (strcmp) #if STRCMP_PRECHECK == 1 - sfi_breg src1, \ - ldrb r2, [\B] - sfi_breg src2, \ - ldrb r3, [\B] + ldrb r2, [src1] + ldrb r3, [src2] cmp r2, #1 it cs cmpcs r2, r3 @@ -211,11 +209,9 @@ ENTRY (strcmp) and tmp2, tmp1, #3 bic src2, src2, #7 lsl tmp2, tmp2, #3 /* Bytes -> bits. */ - sfi_breg src1, \ - ldrd data1a, data1b, [\B], #16 + ldrd data1a, data1b, [src1], #16 tst tmp1, #4 - sfi_breg src2, \ - ldrd data2a, data2b, [\B], #16 + ldrd data2a, data2b, [src2], #16 prepare_mask tmp1, tmp2 apply_mask data1a, tmp1 apply_mask data2a, tmp1 @@ -231,10 +227,8 @@ ENTRY (strcmp) .p2align 5,,12 /* Don't start in the tail bytes of a cache line. */ .p2align 2 /* Always word aligned. */ .Lloop_aligned8: - sfi_breg src1, \ - ldrd data1a, data1b, [\B], #16 - sfi_breg src2, \ - ldrd data2a, data2b, [\B], #16 + ldrd data1a, data1b, [src1], #16 + ldrd data2a, data2b, [src2], #16 .Lstart_realigned8: uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ eor syndrome_a, data1a, data2a @@ -245,10 +239,8 @@ ENTRY (strcmp) sel syndrome_b, syndrome_b, const_m1 cbnz syndrome_b, .Ldiff_in_b - sfi_breg src1, \ - ldrd data1a, data1b, [\B, #-8] - sfi_breg src2, \ - ldrd data2a, data2b, [\B, #-8] + ldrd data1a, data1b, [src1, #-8] + ldrd data2a, data2b, [src2, #-8] uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ eor syndrome_a, data1a, data2a sel syndrome_a, syndrome_a, const_m1 @@ -279,19 +271,15 @@ ENTRY (strcmp) /* Unrolled by a factor of 2, to reduce the number of post-increment operations. */ .Lloop_aligned4: - sfi_breg src1, \ - ldr data1, [\B], #8 - sfi_breg src2, \ - ldr data2, [\B], #8 + ldr data1, [src1], #8 + ldr data2, [src2], #8 .Lstart_realigned4: uadd8 syndrome, data1, const_m1 /* Only need GE bits. */ eor syndrome, data1, data2 sel syndrome, syndrome, const_m1 cbnz syndrome, .Laligned4_done - sfi_breg src1, \ - ldr data1, [\B, #-4] - sfi_breg src2, \ - ldr data2, [\B, #-4] + ldr data1, [src1, #-4] + ldr data2, [src2, #-4] uadd8 syndrome, data1, const_m1 eor syndrome, data1, data2 sel syndrome, syndrome, const_m1 @@ -307,11 +295,9 @@ ENTRY (strcmp) masking off the unwanted loaded data to prevent a difference. */ lsl tmp1, tmp1, #3 /* Bytes -> bits. */ bic src1, src1, #3 - sfi_breg src1, \ - ldr data1, [\B], #8 + ldr data1, [src1], #8 bic src2, src2, #3 - sfi_breg src2, \ - ldr data2, [\B], #8 + ldr data2, [src2], #8 prepare_mask tmp1, tmp1 apply_mask data1, tmp1 @@ -324,30 +310,26 @@ ENTRY (strcmp) sub src2, src2, tmp1 bic src1, src1, #3 lsls tmp1, tmp1, #31 - sfi_breg src1, \ - ldr data1, [\B], #4 + ldr data1, [src1], #4 beq .Laligned_m2 bcs .Laligned_m1 #if STRCMP_PRECHECK == 0 - sfi_breg src2, \ - ldrb data2, [\B, #1] + ldrb data2, [src2, #1] uxtb tmp1, data1, ror #BYTE1_OFFSET subs tmp1, tmp1, data2 bne .Lmisaligned_exit cbz data2, .Lmisaligned_exit .Laligned_m2: - sfi_breg src2, \ - ldrb data2, [\B, #2] + ldrb data2, [src2, #2] uxtb tmp1, data1, ror #BYTE2_OFFSET subs tmp1, tmp1, data2 bne .Lmisaligned_exit cbz data2, .Lmisaligned_exit .Laligned_m1: - sfi_breg src2, \ - ldrb data2, [\B, #3] + ldrb data2, [src2, #3] uxtb tmp1, data1, ror #BYTE3_OFFSET subs tmp1, tmp1, data2 bne .Lmisaligned_exit @@ -356,16 +338,14 @@ ENTRY (strcmp) #else /* STRCMP_PRECHECK */ /* If we've done the pre-check, then we don't need to check the first byte again here. */ - sfi_breg src2, \ - ldrb data2, [\B, #2] + ldrb data2, [src2, #2] uxtb tmp1, data1, ror #BYTE2_OFFSET subs tmp1, tmp1, data2 bne .Lmisaligned_exit cbz data2, .Lmisaligned_exit .Laligned_m2: - sfi_breg src2, \ - ldrb data2, [\B, #3] + ldrb data2, [src2, #3] uxtb tmp1, data1, ror #BYTE3_OFFSET subs tmp1, tmp1, data2 bne .Lmisaligned_exit @@ -391,13 +371,11 @@ ENTRY (strcmp) cfi_restore_state /* src1 is word aligned, but src2 has no common alignment with it. */ - sfi_breg src1, \ - ldr data1, [\B], #4 + ldr data1, [src1], #4 lsls tmp1, src2, #31 /* C=src2[1], Z=src2[0]. */ bic src2, src2, #3 - sfi_breg src2, \ - ldr data2, [\B], #4 + ldr data2, [src2], #4 bhi .Loverlap1 /* C=1, Z=0 => src2[1:0] = 0b11. */ bcs .Loverlap2 /* C=1, Z=1 => src2[1:0] = 0b10. */ @@ -409,13 +387,11 @@ ENTRY (strcmp) sel syndrome, syndrome, const_m1 bne 4f cbnz syndrome, 5f - sfi_breg src2, \ - ldr data2, [\B], #4 + ldr data2, [src2], #4 eor tmp1, tmp1, data1 cmp tmp1, data2, S2HI #24 bne 6f - sfi_breg src1, \ - ldr data1, [\B], #4 + ldr data1, [src1], #4 b .Loverlap3 4: S2LO data2, data2, #8 @@ -427,8 +403,7 @@ ENTRY (strcmp) /* We can only get here if the MSB of data1 contains 0, so fast-path the exit. */ - sfi_breg src2, \ - ldrb result, [\B] + ldrb result, [src2] ldrd r4, r5, [sp], #16 cfi_remember_state cfi_def_cfa_offset (0) @@ -454,13 +429,11 @@ ENTRY (strcmp) sel syndrome, syndrome, const_m1 bne 4f cbnz syndrome, 5f - sfi_breg src2, \ - ldr data2, [\B], #4 + ldr data2, [src2], #4 eor tmp1, tmp1, data1 cmp tmp1, data2, S2HI #16 bne 6f - sfi_breg src1, \ - ldr data1, [\B], #4 + ldr data1, [src1], #4 b .Loverlap2 4: S2LO data2, data2, #16 @@ -469,8 +442,7 @@ ENTRY (strcmp) ands syndrome, syndrome, const_m1, S2LO #16 bne .Lstrcmp_done_equal - sfi_breg src2, \ - ldrh data2, [\B] + ldrh data2, [src2] S2LO data1, data1, #16 #ifdef __ARM_BIG_ENDIAN lsl data2, data2, #16 @@ -490,13 +462,11 @@ ENTRY (strcmp) sel syndrome, syndrome, const_m1 bne 4f cbnz syndrome, 5f - sfi_breg src2, \ - ldr data2, [\B], #4 + ldr data2, [src2], #4 eor tmp1, tmp1, data1 cmp tmp1, data2, S2HI #8 bne 6f - sfi_breg src1, \ - ldr data1, [\B], #4 + ldr data1, [src1], #4 b .Loverlap1 4: S2LO data2, data2, #24 @@ -504,8 +474,7 @@ ENTRY (strcmp) 5: tst syndrome, #LSB bne .Lstrcmp_done_equal - sfi_breg src2, \ - ldr data2, [\B] + ldr data2, [src2] 6: S2LO data1, data1, #8 bic data2, data2, #MSB diff --git a/sysdeps/arm/dl-tlsdesc.S b/sysdeps/arm/dl-tlsdesc.S index 5caf2c154d..e7bed02188 100644 --- a/sysdeps/arm/dl-tlsdesc.S +++ b/sysdeps/arm/dl-tlsdesc.S @@ -32,8 +32,7 @@ eabi_fnstart .align 2 _dl_tlsdesc_return: - sfi_breg r0, \ - ldr r0, [\B] + ldr r0, [r0] BX (lr) eabi_fnend cfi_endproc @@ -92,30 +91,23 @@ _dl_tlsdesc_dynamic: cfi_rel_offset (r3,4) cfi_rel_offset (r4,8) cfi_rel_offset (lr,12) - sfi_breg r0, \ - ldr r1, [\B] /* td */ + ldr r1, [r0] /* td */ GET_TLS (lr) mov r4, r0 /* r4 = tp */ - sfi_breg r0, \ - ldr r0, [\B] - sfi_breg r1, \ - ldr r2, [\B, #8] /* gen_count */ - sfi_breg r0, \ - ldr r3, [\B] + ldr r0, [r0] + ldr r2, [r1, #8] /* gen_count */ + ldr r3, [r0] cmp r2, r3 bhi 1f - sfi_breg r1, \ - ldr r3, [\B] + ldr r3, [r1] #ifndef ARM_NO_INDEX_REGISTER ldr r2, [r0, r3, lsl #3] #else add lr, r0, r3, lsl #3 - sfi_breg lr, \ - ldr r2, [\B] + ldr r2, [lr] #endif cmn r2, #1 ittt ne - sfi_breg r1, \ ldrne r3, [r1, #4] addne r3, r2, r3 rsbne r0, r4, r3 @@ -178,8 +170,7 @@ _dl_tlsdesc_lazy_resolver: pop {r2} cfi_adjust_cfa_offset (-4) cfi_restore (r2) - sfi_breg r0, \ - ldr r1, [\B, #4] + ldr r1, [r0, #4] BX (r1) eabi_fnend cfi_endproc @@ -220,8 +211,7 @@ _dl_tlsdesc_resolve_hold: cfi_restore (r2) cfi_restore (r1) cfi_restore (r0) - sfi_breg r0, \ - ldr r1, [\B, #4] + ldr r1, [r0, #4] BX (r1) eabi_fnend cfi_endproc diff --git a/sysdeps/arm/memcpy.S b/sysdeps/arm/memcpy.S index bdb830a8fe..62e48c3e4a 100644 --- a/sysdeps/arm/memcpy.S +++ b/sysdeps/arm/memcpy.S @@ -70,7 +70,7 @@ ENTRY(memcpy) subs r2, r2, #4 blt 8f ands ip, r0, #3 - PLD( sfi_pld r1, #0 ) + PLD( pld [r1, #0] ) bne 9f ands ip, r1, #3 bne 10f @@ -97,19 +97,17 @@ ENTRY(memcpy) CALGN( bx r4 ) #endif - PLD( sfi_pld r1, #0 ) + PLD( pld [r1, #0] ) 2: PLD( subs r2, r2, #96 ) - PLD( sfi_pld r1, #28 ) + PLD( pld [r1, #28] ) PLD( blt 4f ) - PLD( sfi_pld r1, #60 ) - PLD( sfi_pld r1, #92 ) + PLD( pld [r1, #60] ) + PLD( pld [r1, #92] ) -3: PLD( sfi_pld r1, #124 ) -4: sfi_breg r1, \ - ldmia \B!, {r3, r4, r5, r6, r7, r8, ip, lr} +3: PLD( pld [r1, #124] ) +4: ldmia r1!, {r3, r4, r5, r6, r7, r8, ip, lr} subs r2, r2, #32 - sfi_breg r0, \ - stmia \B!, {r3, r4, r5, r6, r7, r8, ip, lr} + stmia r0!, {r3, r4, r5, r6, r7, r8, ip, lr} bge 3b PLD( cmn r2, #96 ) PLD( bge 4b ) @@ -136,26 +134,19 @@ ENTRY(memcpy) .p2align ARM_BX_ALIGN_LOG2 6: nop .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r1, \ - ldr r3, [\B], #4 + ldr r3, [r1], #4 .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r1, \ - ldr r4, [\B], #4 + ldr r4, [r1], #4 .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r1, \ - ldr r5, [\B], #4 + ldr r5, [r1], #4 .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r1, \ - ldr r6, [\B], #4 + ldr r6, [r1], #4 .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r1, \ - ldr r7, [\B], #4 + ldr r7, [r1], #4 .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r1, \ - ldr r8, [\B], #4 + ldr r8, [r1], #4 .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r1, \ - ldr lr, [\B], #4 + ldr lr, [r1], #4 #ifndef ARM_ALWAYS_BX add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) @@ -172,26 +163,19 @@ ENTRY(memcpy) .p2align ARM_BX_ALIGN_LOG2 66: nop .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r0, \ - str r3, [\B], #4 + str r3, [r0], #4 .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r0, \ - str r4, [\B], #4 + str r4, [r0], #4 .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r0, \ - str r5, [\B], #4 + str r5, [r0], #4 .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r0, \ - str r6, [\B], #4 + str r6, [r0], #4 .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r0, \ - str r7, [\B], #4 + str r7, [r0], #4 .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r0, \ - str r8, [\B], #4 + str r8, [r0], #4 .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r0, \ - str lr, [\B], #4 + str lr, [r0], #4 #ifdef ARM_ALWAYS_BX pop {r10} @@ -209,18 +193,12 @@ ENTRY(memcpy) cfi_restore (r8) 8: movs r2, r2, lsl #31 - sfi_breg r1, \ - ldrbne r3, [\B], #1 - sfi_breg r1, \ - ldrbcs r4, [\B], #1 - sfi_breg r1, \ - ldrbcs ip, [\B] - sfi_breg r0, \ - strbne r3, [\B], #1 - sfi_breg r0, \ - strbcs r4, [\B], #1 - sfi_breg r0, \ - strbcs ip, [\B] + ldrbne r3, [r1], #1 + ldrbcs r4, [r1], #1 + ldrbcs ip, [r1] + strbne r3, [r0], #1 + strbcs r4, [r0], #1 + strbcs ip, [r0] #if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \ || defined (ARM_ALWAYS_BX)) @@ -237,27 +215,20 @@ ENTRY(memcpy) 9: rsb ip, ip, #4 cmp ip, #2 - sfi_breg r1, \ - ldrbgt r3, [\B], #1 - sfi_breg r1, \ - ldrbge r4, [\B], #1 - sfi_breg r1, \ - ldrb lr, [\B], #1 - sfi_breg r0, \ - strbgt r3, [\B], #1 - sfi_breg r0, \ - strbge r4, [\B], #1 + ldrbgt r3, [r1], #1 + ldrbge r4, [r1], #1 + ldrb lr, [r1], #1 + strbgt r3, [r0], #1 + strbge r4, [r0], #1 subs r2, r2, ip - sfi_breg r0, \ - strb lr, [\B], #1 + strb lr, [r0], #1 blt 8b ands ip, r1, #3 beq 1b 10: bic r1, r1, #3 cmp ip, #2 - sfi_breg r1, \ - ldr lr, [\B], #4 + ldr lr, [r1], #4 beq 17f bgt 18f @@ -281,20 +252,18 @@ ENTRY(memcpy) cfi_rel_offset (r8, 12) cfi_rel_offset (r10, 16) - PLD( sfi_pld r1, #0 ) + PLD( pld [r1, #0] ) PLD( subs r2, r2, #96 ) - PLD( sfi_pld r1, #28 ) + PLD( pld [r1, #28] ) PLD( blt 13f ) - PLD( sfi_pld r1, #60 ) - PLD( sfi_pld r1, #92 ) + PLD( pld [r1, #60] ) + PLD( pld [r1, #92] ) -12: PLD( sfi_pld r1, #124 ) -13: sfi_breg r1, \ - ldmia \B!, {r4, r5, r6, r7} +12: PLD( pld [r1, #124] ) +13: ldmia r1!, {r4, r5, r6, r7} mov r3, lr, PULL #\pull subs r2, r2, #32 - sfi_breg r1, \ - ldmia \B!, {r8, r10, ip, lr} + ldmia r1!, {r8, r10, ip, lr} orr r3, r3, r4, PUSH #\push mov r4, r4, PULL #\pull orr r4, r4, r5, PUSH #\push @@ -310,8 +279,7 @@ ENTRY(memcpy) orr r10, r10, ip, PUSH #\push mov ip, ip, PULL #\pull orr ip, ip, lr, PUSH #\push - sfi_breg r0, \ - stmia \B!, {r3, r4, r5, r6, r7, r8, r10, ip} + stmia r0!, {r3, r4, r5, r6, r7, r8, r10, ip} bge 12b PLD( cmn r2, #96 ) PLD( bge 13b ) @@ -328,12 +296,10 @@ ENTRY(memcpy) beq 16f 15: mov r3, lr, PULL #\pull - sfi_breg r1, \ - ldr lr, [\B], #4 + ldr lr, [r1], #4 subs ip, ip, #4 orr r3, r3, lr, PUSH #\push - sfi_breg r0, \ - str r3, [\B], #4 + str r3, [r0], #4 bgt 15b CALGN( cmp r2, #0 ) CALGN( bge 11b ) diff --git a/sysdeps/arm/memmove.S b/sysdeps/arm/memmove.S index f5247f46d8..b18aa329d3 100644 --- a/sysdeps/arm/memmove.S +++ b/sysdeps/arm/memmove.S @@ -87,7 +87,7 @@ ENTRY(memmove) subs r2, r2, #4 blt 8f ands ip, r0, #3 - PLD( sfi_pld r1, #-4 ) + PLD( pld [r1, #-4] ) bne 9f ands ip, r1, #3 bne 10f @@ -113,19 +113,17 @@ ENTRY(memmove) CALGN( bx r4 ) #endif - PLD( sfi_pld r1, #-4 ) + PLD( pld [r1, #-4] ) 2: PLD( subs r2, r2, #96 ) - PLD( sfi_pld r1, #-32 ) + PLD( pld [r1, #-32] ) PLD( blt 4f ) - PLD( sfi_pld r1, #-64 ) - PLD( sfi_pld r1, #-96 ) + PLD( pld [r1, #-64] ) + PLD( pld [r1, #-96] ) -3: PLD( sfi_pld r1, #-128 ) -4: sfi_breg r1, \ - ldmdb \B!, {r3, r4, r5, r6, r7, r8, ip, lr} +3: PLD( pld [r1, #-128] ) +4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr} subs r2, r2, #32 - sfi_breg r0, \ - stmdb \B!, {r3, r4, r5, r6, r7, r8, ip, lr} + stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr} bge 3b PLD( cmn r2, #96 ) PLD( bge 4b ) @@ -152,26 +150,19 @@ ENTRY(memmove) .p2align ARM_BX_ALIGN_LOG2 6: nop .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r1, \ - ldr r3, [\B, #-4]! + ldr r3, [r1, #-4]! .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r1, \ - ldr r4, [\B, #-4]! + ldr r4, [r1, #-4]! .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r1, \ - ldr r5, [\B, #-4]! + ldr r5, [r1, #-4]! .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r1, \ - ldr r6, [\B, #-4]! + ldr r6, [r1, #-4]! .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r1, \ - ldr r7, [\B, #-4]! + ldr r7, [r1, #-4]! .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r1, \ - ldr r8, [\B, #-4]! + ldr r8, [r1, #-4]! .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r1, \ - ldr lr, [\B, #-4]! + ldr lr, [r1, #-4]! #ifndef ARM_ALWAYS_BX add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) @@ -188,26 +179,19 @@ ENTRY(memmove) .p2align ARM_BX_ALIGN_LOG2 66: nop .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r0, \ - str r3, [\B, #-4]! + str r3, [r0, #-4]! .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r0, \ - str r4, [\B, #-4]! + str r4, [r0, #-4]! .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r0, \ - str r5, [\B, #-4]! + str r5, [r0, #-4]! .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r0, \ - str r6, [\B, #-4]! + str r6, [r0, #-4]! .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r0, \ - str r7, [\B, #-4]! + str r7, [r0, #-4]! .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r0, \ - str r8, [\B, #-4]! + str r8, [r0, #-4]! .p2align ARM_BX_ALIGN_LOG2 - sfi_breg r0, \ - str lr, [\B, #-4]! + str lr, [r0, #-4]! #ifdef ARM_ALWAYS_BX pop {r10} @@ -225,18 +209,12 @@ ENTRY(memmove) cfi_restore (r8) 8: movs r2, r2, lsl #31 - sfi_breg r1, \ - ldrbne r3, [\B, #-1]! - sfi_breg r1, \ - ldrbcs r4, [\B, #-1]! - sfi_breg r1, \ - ldrbcs ip, [\B, #-1] - sfi_breg r0, \ - strbne r3, [\B, #-1]! - sfi_breg r0, \ - strbcs r4, [\B, #-1]! - sfi_breg r0, \ - strbcs ip, [\B, #-1] + ldrbne r3, [r1, #-1]! + ldrbcs r4, [r1, #-1]! + ldrbcs ip, [r1, #-1] + strbne r3, [r0, #-1]! + strbcs r4, [r0, #-1]! + strbcs ip, [r0, #-1] #if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \ || defined (ARM_ALWAYS_BX)) @@ -252,27 +230,20 @@ ENTRY(memmove) cfi_restore_state 9: cmp ip, #2 - sfi_breg r1, \ - ldrbgt r3, [\B, #-1]! - sfi_breg r1, \ - ldrbge r4, [\B, #-1]! - sfi_breg r1, \ - ldrb lr, [\B, #-1]! - sfi_breg r0, \ - strbgt r3, [\B, #-1]! - sfi_breg r0, \ - strbge r4, [\B, #-1]! + ldrbgt r3, [r1, #-1]! + ldrbge r4, [r1, #-1]! + ldrb lr, [r1, #-1]! + strbgt r3, [r0, #-1]! + strbge r4, [r0, #-1]! subs r2, r2, ip - sfi_breg r0, \ - strb lr, [\B, #-1]! + strb lr, [r0, #-1]! blt 8b ands ip, r1, #3 beq 1b 10: bic r1, r1, #3 cmp ip, #2 - sfi_breg r1, \ - ldr r3, [\B, #0] + ldr r3, [r1, #0] beq 17f blt 18f @@ -296,20 +267,18 @@ ENTRY(memmove) cfi_rel_offset (r8, 12) cfi_rel_offset (r10, 16) - PLD( sfi_pld r1, #-4 ) + PLD( pld [r1, #-4] ) PLD( subs r2, r2, #96 ) - PLD( sfi_pld r1, #-32 ) + PLD( pld [r1, #-32] ) PLD( blt 13f ) - PLD( sfi_pld r1, #-64 ) - PLD( sfi_pld r1, #-96 ) + PLD( pld [r1, #-64] ) + PLD( pld [r1, #-96] ) -12: PLD( sfi_pld r1, #-128 ) -13: sfi_breg r1, \ - ldmdb \B!, {r7, r8, r10, ip} +12: PLD( pld [r1, #-128] ) +13: ldmdb r1!, {r7, r8, r10, ip} mov lr, r3, PUSH #\push subs r2, r2, #32 - sfi_breg r1, \ - ldmdb \B!, {r3, r4, r5, r6} + ldmdb r1!, {r3, r4, r5, r6} orr lr, lr, ip, PULL #\pull mov ip, ip, PUSH #\push orr ip, ip, r10, PULL #\pull @@ -325,8 +294,7 @@ ENTRY(memmove) orr r5, r5, r4, PULL #\pull mov r4, r4, PUSH #\push orr r4, r4, r3, PULL #\pull - sfi_breg r0, \ - stmdb \B!, {r4 - r8, r10, ip, lr} + stmdb r0!, {r4 - r8, r10, ip, lr} bge 12b PLD( cmn r2, #96 ) PLD( bge 13b ) @@ -343,12 +311,10 @@ ENTRY(memmove) beq 16f 15: mov lr, r3, PUSH #\push - sfi_breg r1, \ - ldr r3, [\B, #-4]! + ldr r3, [r1, #-4]! subs ip, ip, #4 orr lr, lr, r3, PULL #\pull - sfi_breg r0, \ - str lr, [\B, #-4]! + str lr, [r0, #-4]! bgt 15b CALGN( cmp r2, #0 ) CALGN( bge 11b ) diff --git a/sysdeps/arm/memset.S b/sysdeps/arm/memset.S index d369c20a80..95946360bf 100644 --- a/sysdeps/arm/memset.S +++ b/sysdeps/arm/memset.S @@ -32,8 +32,7 @@ ENTRY(memset) 1: tst r3, #3 @ aligned yet? - sfi_breg r3, \ - strbne r1, [\B], #1 + strbne r1, [r3], #1 subne r2, r2, #1 bne 1b @@ -44,33 +43,25 @@ ENTRY(memset) 1: subs r2, r2, #8 - sfi_breg r3, \ - stmiacs \B!, {r1, ip} @ store up to 32 bytes per loop iteration + stmiacs r3!, {r1, ip} @ store up to 32 bytes per loop iteration subscs r2, r2, #8 - sfi_breg r3, \ - stmiacs \B!, {r1, ip} + stmiacs r3!, {r1, ip} subscs r2, r2, #8 - sfi_breg r3, \ - stmiacs \B!, {r1, ip} + stmiacs r3!, {r1, ip} subscs r2, r2, #8 - sfi_breg r3, \ - stmiacs \B!, {r1, ip} + stmiacs r3!, {r1, ip} bcs 1b and r2, r2, #7 2: subs r2, r2, #1 @ store up to 4 bytes per loop iteration - sfi_breg r3, \ - strbcs r1, [\B], #1 + strbcs r1, [r3], #1 subscs r2, r2, #1 - sfi_breg r3, \ - strbcs r1, [\B], #1 + strbcs r1, [r3], #1 subscs r2, r2, #1 - sfi_breg r3, \ - strbcs r1, [\B], #1 + strbcs r1, [r3], #1 subscs r2, r2, #1 - sfi_breg r3, \ - strbcs r1, [\B], #1 + strbcs r1, [r3], #1 bcs 2b DO_RET(lr) diff --git a/sysdeps/arm/setjmp.S b/sysdeps/arm/setjmp.S index c809253d1b..2235890936 100644 --- a/sysdeps/arm/setjmp.S +++ b/sysdeps/arm/setjmp.S @@ -36,20 +36,15 @@ ENTRY (__sigsetjmp) #ifdef PTR_MANGLE mov a4, sp PTR_MANGLE2 (a4, a4, a3) - sfi_breg ip, \ - str a4, [\B], #4 + str a4, [ip], #4 PTR_MANGLE2 (a4, lr, a3) - sfi_breg ip, \ - str a4, [\B], #4 + str a4, [ip], #4 #else - sfi_breg ip, \ - str sp, [\B], #4 - sfi_breg ip, \ - str lr, [\B], #4 + str sp, [ip], #4 + str lr, [ip], #4 #endif /* Save registers */ - sfi_breg ip, \ - stmia \B!, JMP_BUF_REGLIST + stmia ip!, JMP_BUF_REGLIST #if !defined ARM_ASSUME_NO_IWMMXT || defined __SOFTFP__ # define NEED_HWCAP 1 @@ -80,8 +75,7 @@ ENTRY (__sigsetjmp) Don't use VFP instructions directly because this code is used in non-VFP multilibs. */ /* Following instruction is vstmia ip!, {d8-d15}. */ - sfi_breg ip, \ - stc p11, cr8, [\B], #64 + stc p11, cr8, [ip], #64 .Lno_vfp: #ifndef ARM_ASSUME_NO_IWMMXT @@ -90,18 +84,12 @@ ENTRY (__sigsetjmp) /* Save the call-preserved iWMMXt registers. */ /* Following instructions are wstrd wr10, [ip], #8 (etc.) */ - sfi_breg r12, \ - stcl p1, cr10, [\B], #8 - sfi_breg r12, \ - stcl p1, cr11, [\B], #8 - sfi_breg r12, \ - stcl p1, cr12, [\B], #8 - sfi_breg r12, \ - stcl p1, cr13, [\B], #8 - sfi_breg r12, \ - stcl p1, cr14, [\B], #8 - sfi_breg r12, \ - stcl p1, cr15, [\B], #8 + stcl p1, cr10, [r12], #8 + stcl p1, cr11, [r12], #8 + stcl p1, cr12, [r12], #8 + stcl p1, cr13, [r12], #8 + stcl p1, cr14, [r12], #8 + stcl p1, cr15, [r12], #8 .Lno_iwmmxt: #endif diff --git a/sysdeps/arm/strlen.S b/sysdeps/arm/strlen.S index 009142be70..fb1589bbe6 100644 --- a/sysdeps/arm/strlen.S +++ b/sysdeps/arm/strlen.S @@ -30,8 +30,7 @@ ENTRY(strlen) bic r1, r0, $3 @ addr of word containing first byte - sfi_breg r1, \ - ldr r2, [\B], $4 @ get the first word + ldr r2, [r1], $4 @ get the first word ands r3, r0, $3 @ how many bytes are duff? rsb r0, r3, $0 @ get - that number into counter. beq Laligned @ skip into main check routine if no @@ -55,8 +54,7 @@ Laligned: @ here, we have a word in r2. Does it tstne r2, $0x00ff0000 @ tstne r2, $0xff000000 @ addne r0, r0, $4 @ if not, the string is 4 bytes longer - sfi_breg r1, \ - ldrne r2, [\B], $4 @ and we continue to the next word + ldrne r2, [r1], $4 @ and we continue to the next word bne Laligned @ Llastword: @ drop through to here once we find a #ifdef __ARMEB__ diff --git a/sysdeps/arm/submul_1.S b/sysdeps/arm/submul_1.S index 34606dde51..24d39d93b8 100644 --- a/sysdeps/arm/submul_1.S +++ b/sysdeps/arm/submul_1.S @@ -37,24 +37,19 @@ ENTRY (__mpn_submul_1) cfi_rel_offset (r6, 8) cfi_rel_offset (r7, 12) - sfi_breg r1, \ - ldr r6, [\B], #4 - sfi_breg r0, \ - ldr r7, [\B] + ldr r6, [r1], #4 + ldr r7, [r0] mov r4, #0 /* init carry in */ b 1f 0: - sfi_breg r1, \ - ldr r6, [\B], #4 /* load next ul */ + ldr r6, [r1], #4 /* load next ul */ adds r5, r5, r4 /* (lpl, c) = lpl + cl */ adc r4, ip, #0 /* cl = hpl + c */ subs r5, r7, r5 /* (lpl, !c) = rl - lpl */ - sfi_breg r0, \ - ldr r7, [\B, #4] /* load next rl */ + ldr r7, [r0, #4] /* load next rl */ it cc addcc r4, r4, #1 /* cl += !c */ - sfi_breg r0, \ - str r5, [\B], #4 + str r5, [r0], #4 1: umull r5, ip, r6, r3 /* (hpl, lpl) = ul * vl */ subs r2, r2, #1 @@ -63,8 +58,7 @@ ENTRY (__mpn_submul_1) adds r5, r5, r4 /* (lpl, c) = lpl + cl */ adc r4, ip, #0 /* cl = hpl + c */ subs r5, r7, r5 /* (lpl, !c) = rl - lpl */ - sfi_breg r0, \ - str r5, [\B], #4 + str r5, [r0], #4 it cc addcc r4, r4, #1 /* cl += !c */ mov r0, r4 /* return carry */ diff --git a/sysdeps/arm/sysdep.h b/sysdeps/arm/sysdep.h index 990e636892..6d60c34df1 100644 --- a/sysdeps/arm/sysdep.h +++ b/sysdeps/arm/sysdep.h @@ -150,7 +150,6 @@ second version uses it. */ # define LDST_INDEXED_NOINDEX(OP, R, T, X, Y) \ add T, X, Y; \ - sfi_breg T, \ OP R, [T] # define LDST_INDEXED_INDEX(OP, R, X, Y) \ OP R, [X, Y] @@ -198,7 +197,7 @@ # define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \ movw T, #:lower16:SYMBOL; \ movt T, #:upper16:SYMBOL; \ - sfi_breg T, ldr R, [\B, $CONSTANT] + ldr R, [T, $CONSTANT] # elif defined (ARCH_HAS_T2) && defined (PIC) && ARM_PCREL_MOVW_OK # define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \ movw R, #:lower16:_GLOBAL_OFFSET_TABLE_ - 97f - PC_OFS; \ @@ -212,7 +211,7 @@ 97: add R, R, pc; \ 98: LDST_PC_INDEXED (ldr, T, T, T); \ LDST_INDEXED (ldr, R, T, R, T); \ - sfi_breg R, ldr R, [\B, $CONSTANT] + ldr R, [R, $CONSTANT] # else # define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \ ldr T, 99f; \ @@ -277,53 +276,6 @@ cfi_restore_state # endif /* ARCH_HAS_HARD_TP */ -# ifndef ARM_SFI_MACROS -# define ARM_SFI_MACROS 1 -/* This assembly macro is prepended to any load/store instruction, - pulling the base register out of the addressing mode syntax and - making it the first operand of the macro. For example: - ldr r0, [r1] - becomes: - sfi_breg r1, ldr r0, [\B] - The \B stands in for the base register that is the first operand - to the macro, so we can avoid error-prone repetition of the base - register in two places on the line. - - This is used for all memory access through a base register other - than PC or SP. It's intended to support SFI schemes such as - Native Client, where the OS will enforce that all load/store - instructions use a special form. In any such configuration, - another sysdep.h file will have defined ARM_SFI_MACROS and - provided its own assembly macros with the same interface. */ - - .macro sfi_breg basereg, insn, operands:vararg - .macro _sfi_breg_doit B - \insn \operands - .endm - _sfi_breg_doit \basereg - .purgem _sfi_breg_doit - .endm - -/* This assembly macro replaces the "pld" instruction. - The syntax: - sfi_pld REGISTER, #OFFSET - is exactly equivalent to: - sfi_breg REGISTER, pld [\B, #OFFSET] - (and ", #OFFSET" is optional). We have a separate macro - only to work around a bug in GAS versions prior to 2.23.2, - that misparses the sfi_breg macro expansion in this case. */ - - .macro sfi_pld basereg, offset=#0 - pld [\basereg, \offset] - .endm - -/* This macro precedes any instruction that directly changes the SP. - It's not needed for push/pop or for any kind of load or store that - implicitly changes the SP via the ! syntax. */ -# define sfi_sp /* Nothing to do. */ - -# endif - /* These are the directives used for EABI unwind info. Wrap them in macros so another configuration's sysdep.h file can define them away if it doesn't use EABI unwind info. */