Patchwork arm: Remove ununsed ARM code in optimized implementation

login
register
mail settings
Submitter Adhemerval Zanella Netto
Date April 16, 2018, 11:22 a.m.
Message ID <1523877749-8457-1-git-send-email-adhemerval.zanella@linaro.org>
Download mbox | patch
Permalink /patch/26751/
State New
Headers show

Comments

Adhemerval Zanella Netto - April 16, 2018, 11:22 a.m.
This patch removes the ununsed ARM code path for armv6t2 memchr and
strlen and armv7 memch and strcmp.  In all implementations, the ARM
code is not used in any possible build (unless glibc is explicit
build with the non-documented NO_THUMB define) and for armv7
the resulting code either produces wrong results (memchr) and throw
build error (strcmp).

Checked on arm-linux-gnueabihf built targeting both armv6 and
armv7.

	* sysdeps/arm/armv6t2/memchr.S (memchr): Remove ARM code path.
	* sysdeps/arm/armv6t2/strlen.S (memchr): Likewise.
	* sysdeps/arm/armv7/multiarch/memchr_neon.S (memchr): Likewise.
	* sysdeps/arm/armv7/strcmp.S (strcmp): Likewise.
---
 ChangeLog                                 |  7 +++++++
 sysdeps/arm/armv6t2/memchr.S              | 23 -----------------------
 sysdeps/arm/armv6t2/strlen.S              | 25 +------------------------
 sysdeps/arm/armv7/multiarch/memchr_neon.S | 16 ----------------
 sysdeps/arm/armv7/strcmp.S                | 23 -----------------------
 5 files changed, 8 insertions(+), 86 deletions(-)
Phil Blundell - April 16, 2018, 2:55 p.m.
On Mon, 2018-04-16 at 08:22 -0300, Adhemerval Zanella wrote:
> This patch removes the ununsed ARM code path for armv6t2 memchr and
> strlen and armv7 memch and strcmp.  In all implementations, the ARM
> code is not used in any possible build (unless glibc is explicit
> build with the non-documented NO_THUMB define) and for armv7
> the resulting code either produces wrong results (memchr) and throw
> build error (strcmp).

Thanks, looks good to me.

> Checked on arm-linux-gnueabihf built targeting both armv6 and
> armv7.

Can you confirm there were no changes in the generated binaries?

p.
Adhemerval Zanella Netto - April 16, 2018, 6:07 p.m.
On 16/04/2018 11:55, Phil Blundell wrote:
> On Mon, 2018-04-16 at 08:22 -0300, Adhemerval Zanella wrote:
>> This patch removes the ununsed ARM code path for armv6t2 memchr and
>> strlen and armv7 memch and strcmp.  In all implementations, the ARM
>> code is not used in any possible build (unless glibc is explicit
>> build with the non-documented NO_THUMB define) and for armv7
>> the resulting code either produces wrong results (memchr) and throw
>> build error (strcmp).
> 
> Thanks, looks good to me.
> 
>> Checked on arm-linux-gnueabihf built targeting both armv6 and
>> armv7.
> 
> Can you confirm there were no changes in the generated binaries?
> 
> p.
I saw no different on shared libraries built with and without this
patch. I will apply this patch shortly.

Patch

diff --git a/sysdeps/arm/armv6t2/memchr.S b/sysdeps/arm/armv6t2/memchr.S
index bdd385b..1d6eee0 100644
--- a/sysdeps/arm/armv6t2/memchr.S
+++ b/sysdeps/arm/armv6t2/memchr.S
@@ -42,12 +42,8 @@ 
 	.syntax unified
 
 	.text
-#ifdef NO_THUMB
-	.arm
-#else
 	.thumb
 	.thumb_func
-#endif
 	.global memchr
 	.type memchr,%function
 ENTRY(memchr)
@@ -91,22 +87,14 @@  ENTRY(memchr)
 
 15:
 	ldrd 	r4,r5, [r0],#8
-#ifndef NO_THUMB
 	subs	r6, r6, #8
-#endif
 	eor	r4,r4, r1	@ Get it so that r4,r5 have 00's where the bytes match the target
 	eor	r5,r5, r1
 	uadd8	r4, r4, r7	@ Parallel add 0xff - sets the GE bits for anything that wasn't 0
 	sel	r4, r3, r7	@ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
 	uadd8	r5, r5, r7	@ Parallel add 0xff - sets the GE bits for anything that wasn't 0
 	sel	r5, r4, r7	@ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
-#ifndef NO_THUMB
 	cbnz	r5, 60f
-#else
-	cmp	r5, #0
-	bne	60f
-	subs	r6, r6, #8
-#endif
 	bne	15b		@ (Flags from the subs above) If not run out of bytes then go around again
 
 	pop	{r4,r5,r6,r7}
@@ -120,24 +108,13 @@  ENTRY(memchr)
 	and	r2,r2,#7	@ Leave the count remaining as the number after the double words have been done
 
 20:
-#ifndef NO_THUMB
 	cbz	r2, 40f		@ 0 length or hit the end already then not found
-#else
-	cmp	r2, #0
-	beq	40f
-#endif
 
 21:  @ Post aligned section, or just a short call
 	ldrb	r3,[r0],#1
-#ifndef NO_THUMB
 	subs	r2,r2,#1
 	eor	r3,r3,r1	@ r3 = 0 if match - doesn't break flags from sub
 	cbz	r3, 50f
-#else
-	eors	r3, r3, r1
-	beq	50f
-	subs	r2, r2, #1
-#endif
 	bne	21b		@ on r2 flags
 
 40:
diff --git a/sysdeps/arm/armv6t2/strlen.S b/sysdeps/arm/armv6t2/strlen.S
index 6988183..a34ef20 100644
--- a/sysdeps/arm/armv6t2/strlen.S
+++ b/sysdeps/arm/armv6t2/strlen.S
@@ -21,7 +21,7 @@ 
 
  */
 
-#include <arm-features.h>               /* This might #define NO_THUMB.  */
+#include <arm-features.h>
 #include <sysdep.h>
 
 #ifdef __ARMEB__
@@ -32,24 +32,8 @@ 
 #define S2HI		lsl
 #endif
 
-#ifndef NO_THUMB
 /* This code is best on Thumb.  */
 	.thumb
-#else
-/* Using bne.w explicitly is desirable in Thumb mode because it helps
-   align the following label without a nop.  In ARM mode there is no
-   such difference.  */
-.macro bne.w label
-	bne \label
-.endm
-
-/* This clobbers the condition codes, which the real Thumb cbnz instruction
-   does not do.  But it doesn't matter for any of the uses here.  */
-.macro cbnz reg, label
-	cmp \reg, #0
-	bne \label
-.endm
-#endif
 
 /* Parameters and result.  */
 #define srcin		r0
@@ -146,16 +130,9 @@  ENTRY(strlen)
 	tst	tmp1, #4
 	pld	[src, #64]
 	S2HI	tmp2, const_m1, tmp2
-#ifdef NO_THUMB
-	mvn	tmp1, tmp2
-	orr	data1a, data1a, tmp1
-	itt	ne
-	orrne	data1b, data1b, tmp1
-#else
 	orn	data1a, data1a, tmp2
 	itt	ne
 	ornne	data1b, data1b, tmp2
-#endif
 	movne	data1a, const_m1
 	mov	const_0, #0
 	b	.Lstart_realigned
diff --git a/sysdeps/arm/armv7/multiarch/memchr_neon.S b/sysdeps/arm/armv7/multiarch/memchr_neon.S
index 1b2ae75..6fbf9b8 100644
--- a/sysdeps/arm/armv7/multiarch/memchr_neon.S
+++ b/sysdeps/arm/armv7/multiarch/memchr_neon.S
@@ -68,11 +68,7 @@ 
  * allows to identify exactly which byte has matched.
  */
 
-#ifndef NO_THUMB
 	.thumb_func
-#else
-	.arm
-#endif
 	.p2align 4,,15
 
 ENTRY(memchr)
@@ -132,12 +128,7 @@  ENTRY(memchr)
 	/* The first block can also be the last */
 	bls		.Lmasklast
 	/* Have we found something already? */
-#ifndef NO_THUMB
 	cbnz		synd, .Ltail
-#else
-	cmp		synd, #0
-	bne		.Ltail
-#endif
 
 
 .Lloopintro:
@@ -176,16 +167,9 @@  ENTRY(memchr)
 	vpadd.i8	vdata0_0, vdata0_0, vdata1_0
 	vpadd.i8	vdata0_0, vdata0_0, vdata0_0
 	vmov		synd, vdata0_0[0]
-#ifndef NO_THUMB
 	cbz		synd, .Lnotfound
 	bhi		.Ltail	/* Uses the condition code from
 				   subs cntin, cntin, #32 above.  */
-#else
-	cmp		synd, #0
-	beq		.Lnotfound
-	cmp		cntin, #0
-	bhi		.Ltail
-#endif
 
 
 .Lmasklast:
diff --git a/sysdeps/arm/armv7/strcmp.S b/sysdeps/arm/armv7/strcmp.S
index 060b865..2626fdf 100644
--- a/sysdeps/arm/armv7/strcmp.S
+++ b/sysdeps/arm/armv7/strcmp.S
@@ -83,8 +83,6 @@ 
 #define syndrome	tmp2
 
 
-#ifndef NO_THUMB
-/* This code is best on Thumb.  */
 	.thumb
 
 /* In Thumb code we can't use MVN with a register shift, but we do have ORN.  */
@@ -94,27 +92,6 @@ 
 .macro apply_mask data_reg, mask_reg
 	orn \data_reg, \data_reg, \mask_reg
 .endm
-#else
-/* In ARM code we don't have ORN, but we can use MVN with a register shift.  */
-.macro prepare_mask mask_reg, nbits_reg
-	mvn \mask_reg, const_m1, S2HI \nbits_reg
-.endm
-.macro apply_mask data_reg, mask_reg
-	orr \data_reg, \data_reg, \mask_reg
-.endm
-
-/* These clobber the condition codes, which the real Thumb cbz/cbnz
-   instructions do not.  But it doesn't matter for any of the uses here.  */
-.macro cbz reg, label
-	cmp \reg, #0
-	beq \label
-.endm
-.macro cbnz reg, label
-	cmp \reg, #0
-	bne \label
-.endm
-#endif
-
 
 	/* Macro to compute and return the result value for word-aligned
 	   cases.  */