From patchwork Sun Feb 19 21:30:01 2017
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Jim Wilson <jim.wilson@linaro.org>
X-Patchwork-Id: 19307
Received: (qmail 125180 invoked by alias); 19 Feb 2017 21:30:31 -0000
Mailing-List: contact gdb-patches-help@sourceware.org; run by ezmlm
Precedence: bulk
List-Id: <gdb-patches.sourceware.org>
List-Unsubscribe: <mailto:gdb-patches-unsubscribe-##L=##H@sourceware.org>
List-Subscribe: <mailto:gdb-patches-subscribe@sourceware.org>
List-Archive: <http://sourceware.org/ml/gdb-patches/>
List-Post: <mailto:gdb-patches@sourceware.org>
List-Help: <mailto:gdb-patches-help@sourceware.org>,
	<http://sourceware.org/ml/#faqs>
Sender: gdb-patches-owner@sourceware.org
Delivered-To: mailing list gdb-patches@sourceware.org
Received: (qmail 121670 invoked by uid 89); 19 Feb 2017 21:30:19 -0000
Authentication-Results: sourceware.org; auth=none
X-Virus-Found: No
X-Spam-SWARE-Status: No, score=-26.9 required=5.0 tests=BAYES_00, GIT_PATCH_0,
	GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, RCVD_IN_DNSWL_NONE,
	SPF_PASS autolearn=ham version=3.3.2 spammy=2039, xtls
X-HELO: mail-wm0-f44.google.com
Received: from mail-wm0-f44.google.com (HELO mail-wm0-f44.google.com)
	(74.125.82.44) by sourceware.org
	(qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP;
	Sun, 19 Feb 2017 21:30:05 +0000
Received: by mail-wm0-f44.google.com with SMTP id c85so61911594wmi.1 for
	<gdb-patches@sourceware.org>; Sun, 19 Feb 2017 13:30:04 -0800 (PST)
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net;
	s=20161025;
	h=x-gm-message-state:mime-version:from:date:message-id:subject:to;
	bh=tnWZ3A3XjLBiGSjWsfAlZ7THo5oUCf9zgdlcTu3uXwc=;
	b=NxsD/uvwd9lu7SlDEZsX6nJv4JBjx53zStYGAiHwu5sYW/ZkrOeoQqC9Z1nyMcJnV5
	QnYpI8+f30wd2S0gyDZypa+Pj/QMu6h8MCgh+U3da8bZ/QgUWXBkr4k9fmbcMVScgbwO
	p23olzyPu2lavCo1C/avsm+UifCU188Xu5MJspykhju1Bf8/AGb8FI7E1o4ntTlme2zE
	IxYBQM/+cjSepVrky/LI9FTdln9+8NNyr9lirMjcONMK2hwERhMohxzWn4R42cfci+5Y
	I+TcIyU1EZK5huQ/a7uVJ2QB+OHZK7Eah3Kha2pizA5ippzJMVdizHhx6A6n8JBBf9Ss
	xMmg==
X-Gm-Message-State: 
 AMke39li3S0QK3103/zONqSjDb1ojoXPKsxFSSpOHb6PQabhWRddg775Arg8hC/ptij4BAPLNI3T6Z1cgnjphuo/
X-Received: by 10.28.14.65 with SMTP id 62mr15428915wmo.46.1487539802650;
	Sun, 19 Feb 2017 13:30:02 -0800 (PST)
MIME-Version: 1.0
Received: by 10.223.135.80 with HTTP; Sun, 19 Feb 2017 13:30:01 -0800 (PST)
From: Jim Wilson <jim.wilson@linaro.org>
Date: Sun, 19 Feb 2017 13:30:01 -0800
Message-ID: 
 <CABXYE2WciJXO08+r8L+0gEGp=jUP6vtQmWV_pfYbkRJ=jNr30Q@mail.gmail.com>
Subject: [PATCH] aarch64 sim sxtl/uxtl fix, plus another addv fix
To: gdb-patches@sourceware.org

This started with the observation that sxtl/uxtl were broken for the
8b/16b cases.  The code is multiplying bias (which is equal to 0 or 2)
by 3, when it should be by 4.  3 would have been correct for
exponentiation, but not for multiply.

The new testcase fails without the patch, and works with the patch.
This patch reduces gcc C testsuite failures from 1558 to 1510 (-48).

When writing the testcase, and verifying on hardware, I noticed that
the addv instruction was still broken.  I had fixed a bug in it
before, I found another one.  It isn't truncating results to the input
size, e.g. the 8h case should produce a result truncated to 8 bits,
but writes 64 bits to the dest register.  This was fixed by moving the
val variable declaration inside each case to give it an appropriate
type size.

Since I had used addv in a number of testcases, this required fixing 4
existing testcases to check for the correct addv result.  This fix
unfortunately doesn't help reduce the number of gcc C testsuite
failures, which remain unchanged by this addv fix at 1510.  This is
probably why I didn't notice it before.

Jim

2017-02-19  Jim Wilson  <jim.wilson@linaro.org>

	sim/aarch64/
	* simulator.c (do_vec_ADDV): Mov val declaration inside each case,
	with type set to input type size.
	(do_vec_xtl): Change bias from 3 to 4 for byte case.

	sim/testsuite/sim/aarch64/
	* bit.s: Change cmp immediates to account for addv bug fix.
	* cmtst.s, ldn_single.s, stn_single.s: Likewise.
	* xtl.s: New.

diff --git a/sim/aarch64/simulator.c b/sim/aarch64/simulator.c
index 7c28219..d31cb10 100644
--- a/sim/aarch64/simulator.c
+++ b/sim/aarch64/simulator.c
@@ -3433,7 +3433,6 @@ do_vec_ADDV (sim_cpu *cpu)
   unsigned vm = INSTR (9, 5);
   unsigned rd = INSTR (4, 0);
   unsigned i;
-  uint64_t val = 0;
   int      full = INSTR (30, 30);
 
   NYI_assert (29, 24, 0x0E);
@@ -3443,24 +3442,33 @@ do_vec_ADDV (sim_cpu *cpu)
   switch (INSTR (23, 22))
     {
     case 0:
-      for (i = 0; i < (full ? 16 : 8); i++)
-	val += aarch64_get_vec_u8 (cpu, vm, i);
-      aarch64_set_vec_u64 (cpu, rd, 0, val);
-      return;
+      {
+	uint8_t val = 0;
+	for (i = 0; i < (full ? 16 : 8); i++)
+	  val += aarch64_get_vec_u8 (cpu, vm, i);
+	aarch64_set_vec_u64 (cpu, rd, 0, val);
+	return;
+      }
 
     case 1:
-      for (i = 0; i < (full ? 8 : 4); i++)
-	val += aarch64_get_vec_u16 (cpu, vm, i);
-      aarch64_set_vec_u64 (cpu, rd, 0, val);
-      return;
+      {
+	uint16_t val = 0;
+	for (i = 0; i < (full ? 8 : 4); i++)
+	  val += aarch64_get_vec_u16 (cpu, vm, i);
+	aarch64_set_vec_u64 (cpu, rd, 0, val);
+	return;
+      }
 
     case 2:
-      if (! full)
-	HALT_UNALLOC;
-      for (i = 0; i < 4; i++)
-	val += aarch64_get_vec_u32 (cpu, vm, i);
-      aarch64_set_vec_u64 (cpu, rd, 0, val);
-      return;
+      {
+	uint32_t val = 0;
+	if (! full)
+	  HALT_UNALLOC;
+	for (i = 0; i < 4; i++)
+	  val += aarch64_get_vec_u32 (cpu, vm, i);
+	aarch64_set_vec_u64 (cpu, rd, 0, val);
+	return;
+      }
 
     case 3:
       HALT_UNALLOC;
@@ -5694,7 +5702,7 @@ do_vec_xtl (sim_cpu *cpu)
 	  NYI_assert (19, 19, 1);
 
 	  shift = INSTR (18, 16);
-	  bias *= 3;
+	  bias *= 4;
 	  for (i = 0; i < 8; i++)
 	    v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
 	  for (i = 0; i < 8; i++)
@@ -5730,7 +5738,7 @@ do_vec_xtl (sim_cpu *cpu)
 	  NYI_assert (19, 19, 1);
 
 	  shift = INSTR (18, 16);
-	  bias *= 3;
+	  bias *= 4;
 	  for (i = 0; i < 8; i++)
 	    v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
 	  for (i = 0; i < 8; i++)
diff --git a/sim/testsuite/sim/aarch64/bit.s b/sim/testsuite/sim/aarch64/bit.s
index 650d317..01a1d4e 100644
--- a/sim/testsuite/sim/aarch64/bit.s
+++ b/sim/testsuite/sim/aarch64/bit.s
@@ -34,56 +34,56 @@ mask:
 	bif v3.8b, v1.8b, v2.8b
 	addv b4, v3.8b
 	mov x1, v4.d[0]
-	cmp x1, #306
+	cmp x1, #50
 	bne .Lfailure
 
 	mov v3.16b, v0.16b
 	bif v3.16b, v1.16b, v2.16b
 	addv b4, v3.16b
 	mov x1, v4.d[0]
-	cmp x1, #1020
+	cmp x1, #252
 	bne .Lfailure
 
 	mov v3.8b, v0.8b
 	bit v3.8b, v1.8b, v2.8b
 	addv b4, v3.8b
 	mov x1, v4.d[0]
-	cmp x1, #306
+	cmp x1, #50
 	bne .Lfailure
 
 	mov v3.16b, v0.16b
 	bit v3.16b, v1.16b, v2.16b
 	addv b4, v3.16b
 	mov x1, v4.d[0]
-	cmp x1, #1037
+	cmp x1, #13
 	bne .Lfailure
 
 	mov v3.8b, v2.8b
 	bsl v3.8b, v0.8b, v1.8b
 	addv b4, v3.8b
 	mov x1, v4.d[0]
-	cmp x1, #306
+	cmp x1, #50
 	bne .Lfailure
 
 	mov v3.16b, v2.16b
 	bsl v3.16b, v0.16b, v1.16b
 	addv b4, v3.16b
 	mov x1, v4.d[0]
-	cmp x1, #1020
+	cmp x1, #252
 	bne .Lfailure
 
 	mov v3.8b, v0.8b
 	eor v3.8b, v1.8b, v2.8b
 	addv b4, v3.8b
 	mov x1, v4.d[0]
-	cmp x1, #1020
+	cmp x1, #252
 	bne .Lfailure
 
 	mov v3.16b, v0.16b
 	eor v3.16b, v1.16b, v2.16b
 	addv b4, v3.16b
 	mov x1, v4.d[0]
-	cmp x1, #2039
+	cmp x1, #247
 	bne .Lfailure
 
 	pass
diff --git a/sim/testsuite/sim/aarch64/cmtst.s b/sim/testsuite/sim/aarch64/cmtst.s
index 64c8b27..7e6a4c3 100644
--- a/sim/testsuite/sim/aarch64/cmtst.s
+++ b/sim/testsuite/sim/aarch64/cmtst.s
@@ -40,13 +40,13 @@ inputd2:
 	cmtst v2.8b, v0.8b, v1.8b
 	addv b3, v2.8b
 	mov x1, v3.d[0]
-	cmp x1, #0x5fa
+	cmp x1, #0xfa
 	bne .Lfailure
 
 	cmtst v2.16b, v0.16b, v1.16b
 	addv b3, v2.16b
 	mov x1, v3.d[0]
-	cmp x1, #0xbf4
+	cmp x1, #0xf4
 	bne .Lfailure
 
 	adrp x0, inputh
@@ -56,14 +56,14 @@ inputd2:
 	cmtst v2.4h, v0.4h, v1.4h
 	addv h3, v2.4h
 	mov x1, v3.d[0]
-	mov x2, #0x1fffe
+	mov x2, #0xfffe
 	cmp x1, x2
 	bne .Lfailure
 
 	cmtst v2.8h, v0.8h, v1.8h
 	addv h3, v2.8h
 	mov x1, v3.d[0]
-	mov x2, #0x3fffc
+	mov x2, #0xfffc
 	cmp x1, x2
 	bne .Lfailure
 
@@ -82,7 +82,7 @@ inputd2:
 	cmtst v2.4s, v0.4s, v1.4s
 	addv s3, v2.4s
 	mov x1, v3.d[0]
-	mov x2, #0x1fffffffe
+	mov x2, #0xfffffffe
 	cmp x1, x2
 	bne .Lfailure
 
diff --git a/sim/testsuite/sim/aarch64/ldn_single.s b/sim/testsuite/sim/aarch64/ldn_single.s
index 3102e9e..4c460fb 100644
--- a/sim/testsuite/sim/aarch64/ldn_single.s
+++ b/sim/testsuite/sim/aarch64/ldn_single.s
@@ -48,7 +48,7 @@ input:
 	mov x6, v3.d[0]
 	cmp x5, #221
 	bne .Lfailure
-	cmp x6, #307
+	cmp x6, #51
 	bne .Lfailure
 
 	mov x2, x0
@@ -68,7 +68,7 @@ input:
 	bne .Lfailure
 	cmp x5, #200
 	bne .Lfailure
-	cmp x6, #264
+	cmp x6, #8
 	bne .Lfailure
 
 	mov x2, x0
@@ -90,9 +90,9 @@ input:
 	bne .Lfailure
 	cmp x5, #232
 	bne .Lfailure
-	cmp x6, #296
+	cmp x6, #40
 	bne .Lfailure
-	cmp x7, #360
+	cmp x7, #104
 	bne .Lfailure
 
 	pass
diff --git a/sim/testsuite/sim/aarch64/stn_single.s b/sim/testsuite/sim/aarch64/stn_single.s
index 5527c84..2bd19cf 100644
--- a/sim/testsuite/sim/aarch64/stn_single.s
+++ b/sim/testsuite/sim/aarch64/stn_single.s
@@ -63,7 +63,7 @@ output:
 	mov x6, v5.d[0]
 	cmp x5, #136
 	bne .Lfailure
-	cmp x6, #264
+	cmp x6, #8
 	bne .Lfailure
 
 	mov x2, x1
@@ -114,7 +114,7 @@ output:
 	bne .Lfailure
 	cmp x6, #232
 	bne .Lfailure
-	cmp x7, #296
+	cmp x7, #40
 	bne .Lfailure
 
 	pass
diff --git a/sim/testsuite/sim/aarch64/xtl.s b/sim/testsuite/sim/aarch64/xtl.s
new file mode 100644
index 0000000..16ef892
--- /dev/null
+++ b/sim/testsuite/sim/aarch64/xtl.s
@@ -0,0 +1,101 @@
+#mach: aarch64
+
+# Check the extend long instructions: sxtl, sxtl2, uxtl, uxtl2.
+
+.include "testutils.inc"
+
+	.data
+	.align 4
+input:
+	.word 0x04030201
+	.word 0x08070605
+	.word 0xfcfdfeff
+	.word 0xf8f9fafb
+
+	start
+	adrp x0, input
+	ldr q0, [x0, #:lo12:input]
+
+	uxtl v1.8h, v0.8b
+	uxtl2 v2.8h, v0.16b
+	addv h3, v1.8h
+	addv h4, v2.8h
+	mov x1, v3.d[0]
+	mov x2, v4.d[0]
+	cmp x1, #36
+	bne .Lfailure
+	cmp x2, #2012
+	bne .Lfailure
+
+	uxtl v1.4s, v0.4h
+	uxtl2 v2.4s, v0.8h
+	addv s3, v1.4s
+	addv s4, v2.4s
+	mov x1, v3.d[0]
+	mov x2, v4.d[0]
+	mov x3, #5136
+	cmp x1, x3
+	bne .Lfailure
+	mov x4, #0xeff0
+	movk x4, 0x3, lsl #16
+	cmp x2, x4
+	bne .Lfailure
+
+	uxtl v1.2d, v0.2s
+	uxtl2 v2.2d, v0.4s
+	addv s3, v1.4s
+	addv s4, v2.4s
+	mov x1, v3.d[0]
+	mov x2, v4.d[0]
+	mov x3, #0x0806
+	movk x3, #0x0c0a, lsl #16
+	cmp x1, x3
+	bne .Lfailure
+	mov x4, #0xf9fa
+	movk x4, #0xf5f7, lsl #16
+	cmp x2, x4
+	bne .Lfailure
+
+	sxtl v1.8h, v0.8b
+	sxtl2 v2.8h, v0.16b
+	addv h3, v1.8h
+	addv h4, v2.8h
+	mov x1, v3.d[0]
+	mov x2, v4.d[0]
+	cmp x1, #36
+	bne .Lfailure
+	mov x3, #0xffdc
+	cmp x2, x3
+	bne .Lfailure
+
+	sxtl v1.4s, v0.4h
+	sxtl2 v2.4s, v0.8h
+	addv s3, v1.4s
+	addv s4, v2.4s
+	mov x1, v3.d[0]
+	mov x2, v4.d[0]
+	mov x3, #5136
+	cmp x1, x3
+	bne .Lfailure
+	mov x4, #0xeff0
+	movk x4, 0xffff, lsl #16
+	bne .Lfailure
+
+	sxtl v1.2d, v0.2s
+	sxtl2 v2.2d, v0.4s
+	addv s3, v1.4s
+	addv s4, v2.4s
+	mov x1, v3.d[0]
+	mov x2, v4.d[0]
+	mov x3, #0x0806
+	movk x3, #0x0c0a, lsl #16
+	cmp x1, x3
+	bne .Lfailure
+	mov x4, #0xf9f8
+	movk x4, #0xf5f7, lsl #16
+	cmp x2, x4
+	bne .Lfailure
+
+	pass
+.Lfailure:
+	fail