[SPARC] supporting more than 2g in niagara4 memcpy and memset

Message ID 8761ellth2.fsf@oracle.com
State Changes Requested, archived
Headers

Commit Message

Jose E. Marchesi Nov. 11, 2014, 2:09 p.m. UTC
  Hi.

The niagara4-specific versions of memcpy and memset do signed 32bit
compares with the size of the buffer to copy/set.  Anything larger than
2G will have the 32st bit set and will be treated as negative.

The problem is easy to reproduce in a T4, T5 or T7 machine:

p1 = (char*) mmap (NULL, 0xe0000000,
                   PROT_READ | PROT_WRITE, MAP_PRIVATE |
                   MAP_ANONYMOUS, -1, 0);
p2 = (char*) mmap (NULL, 0xe0000000,
                   PROT_READ | PROT_WRITE, MAP_PRIVATE |
                   MAP_ANONYMOUS, -1, 0);
memset (p1, 0xc, 0xe0000000);      /* Only 3 bytes are set.  */
dst = memcpy (p2, p1, 0xe0000000); /* Only 3 bytes are copied.  */

The patch below fixes the issue by using the XCC condition codes in the
proper branch instructions.

Tested in sparc64-unknown-linux-gnu.
No regressions observed.

Thanks.

2014-11-11  Jose E. Marchesi  <jose.marchesi@oracle.com>

	* sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S: Use the XCC
	condition codes when comparing buffer sizes.
	* sysdeps/sparc/sparc64/multiarch/memset-niagara4.S: Likewise.
  

Comments

David Miller Nov. 11, 2014, 11:17 p.m. UTC | #1
From: jose.marchesi@oracle.com (Jose E. Marchesi)
Date: Tue, 11 Nov 2014 15:09:13 +0100

> Tested in sparc64-unknown-linux-gnu.
> No regressions observed.

Can I please convince you guys to test 32-bit stuff more strongly than
64-bit?  Please?

Both targets are important, but 32-bit sparcv9 multiarch is used more
than 64-bit is.

Becuase you didn't tag the sparc32/sparcv9/multiarch/foo-niagara4.S
files with the proper defines, 32-bit sparcv9 multiarch builds will
use %xcc and BPR too, which we absolutely do not want.

In the future if I don't see it mentioned that you tested 32-bit
targets, I'm going to ask you to resubmit after you have doen so.

I've made the same polite request with your sparc binutils submissions
so it is very disappointing to see you fall into the same bad habit
with your GLIBC work.

Thanks.
  
Jose E. Marchesi Nov. 12, 2014, 7:52 a.m. UTC | #2
Hi David.
    
    Becuase you didn't tag the sparc32/sparcv9/multiarch/foo-niagara4.S
    files with the proper defines, 32-bit sparcv9 multiarch builds will
    use %xcc and BPR too, which we absolutely do not want.

Oops, I missed that.
    
    In the future if I don't see it mentioned that you tested 32-bit
    targets, I'm going to ask you to resubmit after you have doen so.
    
    I've made the same polite request with your sparc binutils submissions
    so it is very disappointing to see you fall into the same bad habit
    with your GLIBC work.

Since I got your request I am testing everything in sparc-*-linux-*
targets as well as sparc64-*-linux-*... but for this patch, which was
written long ago.  Sorry about that :(
  
Ondrej Bilka Nov. 27, 2014, 4:47 p.m. UTC | #3
David, could you say if this fix is ok?

On Wed, Nov 12, 2014 at 08:52:35AM +0100, Jose E. Marchesi wrote:
> 
> Hi David.
>     
>     Becuase you didn't tag the sparc32/sparcv9/multiarch/foo-niagara4.S
>     files with the proper defines, 32-bit sparcv9 multiarch builds will
>     use %xcc and BPR too, which we absolutely do not want.
> 
> Oops, I missed that.
>     
>     In the future if I don't see it mentioned that you tested 32-bit
>     targets, I'm going to ask you to resubmit after you have doen so.
>     
>     I've made the same polite request with your sparc binutils submissions
>     so it is very disappointing to see you fall into the same bad habit
>     with your GLIBC work.
> 
> Since I got your request I am testing everything in sparc-*-linux-*
> targets as well as sparc64-*-linux-*... but for this patch, which was
> written long ago.  Sorry about that :(
  
David Miller Nov. 28, 2014, 2:50 a.m. UTC | #4
From: Ondřej Bílka <neleai@seznam.cz>
Date: Thu, 27 Nov 2014 17:47:10 +0100

> David, could you say if this fix is ok?

It's not ok, it needs to either remove the conditional XCC
stuff, or define something appropriate in the sparcv9 32-bit
case.
  

Patch

diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S
index 63e0d83..d183dcd 100644
--- a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S
+++ b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S
@@ -46,6 +46,11 @@ 
 #define STORE(type,src,addr)	type src, [addr]
 #define STORE_INIT(src,addr)	stxa src, [addr] STORE_ASI
 
+#ifndef XCC
+#define USE_BPR
+#define XCC xcc
+#endif
+
 #if !defined NOT_IN_libc
 
 	.register	%g2,#scratch
@@ -69,12 +74,12 @@  ENTRY(__memcpy_niagara4)
 #endif
 	brz,pn		%o2, .Lexit
 	 cmp		%o2, 3
-	ble,pn		%icc, .Ltiny
+	ble,pn		%XCC, .Ltiny
 	 cmp		%o2, 19
-	ble,pn		%icc, .Lsmall
+	ble,pn		%XCC, .Lsmall
 	 or		%o0, %o1, %g2
 	cmp		%o2, 128
-	bl,pn		%icc, .Lmedium
+	bl,pn		%XCC, .Lmedium
 	 nop
 
 .Llarge:/* len >= 0x80 */
@@ -152,14 +157,14 @@  ENTRY(__memcpy_niagara4)
 	add		%o0, 0x08, %o0
 	EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
 	add		%o0, 0x08, %o0
-	bne,pt		%icc, 1b
+	bne,pt		%XCC, 1b
 	 LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
 
 	membar		#StoreLoad | #StoreStore
 
 	brz,pn		%o2, .Lexit
 	 cmp		%o2, 19
-	ble,pn		%icc, .Lsmall_unaligned
+	ble,pn		%XCC, .Lsmall_unaligned
 	 nop
 	ba,a,pt		%icc, .Lmedium_noprefetch
 
@@ -200,7 +205,7 @@  ENTRY(__memcpy_niagara4)
 	EX_ST(STORE(std, %f28, %o0 + 0x30))
 	EX_ST(STORE(std, %f30, %o0 + 0x38))
 	add		%o0, 0x40, %o0
-	bne,pt		%icc, 1b
+	bne,pt		%XCC, 1b
 	 LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
 	VISExitHalf
 
diff --git a/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S b/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S
index c090c50..15c931a 100644
--- a/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S
+++ b/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S
@@ -21,6 +21,11 @@ 
 
 #define ASI_BLK_INIT_QUAD_LDD_P	0xe2
 
+#ifndef XCC
+#define USE_BPR
+#define XCC xcc
+#endif
+	
 #if !defined NOT_IN_libc
 
 	.register	%g2, #scratch
@@ -46,7 +51,7 @@  END(__memset_niagara4)
 ENTRY(__bzero_niagara4)
 	clr		%o4
 1:	cmp		%o1, 16
-	ble		%icc, .Ltiny
+	ble		%XCC, .Ltiny
 	 mov		%o0, %o3
 	sub		%g0, %o0, %g1
 	and		%g1, 0x7, %g1
@@ -58,7 +63,7 @@  ENTRY(__bzero_niagara4)
 	 add		%o0, 1, %o0
 .Laligned8:
 	cmp		%o1, 64 + (64 - 8)
-	ble		.Lmedium
+	ble		%XCC, .Lmedium
 	 sub		%g0, %o0, %g1
 	andcc		%g1, (64 - 1), %g1
 	brz,pn		%g1, .Laligned64
@@ -75,7 +80,7 @@  ENTRY(__bzero_niagara4)
 1:	stxa		%o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
 	subcc		%g1, 0x40, %g1
 	stxa		%o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
-	bne,pt		%icc, 1b
+	bne,pt		%XCC, 1b
 	 add		%o0, 0x40, %o0
 .Lpostloop:
 	cmp		%o1, 8
@@ -116,7 +121,7 @@  ENTRY(__bzero_niagara4)
 	stxa		%o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
 	stxa		%o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
 	stxa		%o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P
-	bne,pt		%icc, 1b
+	bne,pt		%XCC, 1b
 	 add		%o0, 0x30, %o0
 	ba,a,pt		%icc, .Lpostloop
 END(__bzero_niagara4)