aarch64 sim umulh/smulh fixes

Message ID CABXYE2W-FipDhWq9Cg5F0GKhkakuVGHR_S+egKvwbsD7mKsJxA@mail.gmail.com
State New, archived
Headers

Commit Message

Jim Wilson March 3, 2017, 9:25 p.m. UTC
  Umulh gets the wrong result when the middle cross products generate a
carry out, as it is being added in the wrong place.  This is fixed by
shifting it left by 32.  Smulh gets the wrong result if the low part
multiply isn't zero.  In this case, we want to invert instead of
negating.  It also fails if the high part ends up 0 and the low part
isn't, since multiply by -1 doesn't do anything in that case.  Fixed
by using the ~ operator instead, and then adding in carry if the low
part is 0.

The new testcase works with the patch, and fails without.  The patch
reduces gcc C testsuite unexpected failures from 1473 to 1440 (-33).

This also fixes bugs in last weeks sumov.s testcase.  Somehow I
accidentally committed the unfinished version of it.  The corrected
version after this patch works.

Jim
  

Patch

2017-03-03  Jim Wilson  <jim.wilson@linaro.org>

	sim/aarch64/
	* simulator.c (mul64hi): Shift carry left by 32.
	(smulh): Change signum to negate.  If negate, invert result, and add
	carry bit if low part of multiply result is zero.

	sim/testsuite/sim/aarch64/
	* sumov.s: Correct compare test values.
	* sumulh.s: New.

diff --git a/sim/aarch64/simulator.c b/sim/aarch64/simulator.c
index 1756ba1..8a8df7a 100644
--- a/sim/aarch64/simulator.c
+++ b/sim/aarch64/simulator.c
@@ -13020,6 +13020,8 @@  mul64hi (uint64_t value1, uint64_t value2)
 
   /* Drop lowest 32 bits of middle cross-product.  */
   result = resultmid1 >> 32;
+  /* Move carry bit to just above middle cross-product highest bit.  */
+  carry = carry << 32;
 
   /* Add top cross-product plus and any carry.  */
   result += xproducthi + carry;
@@ -13042,7 +13044,7 @@  smulh (sim_cpu *cpu)
   int64_t  value2 = aarch64_get_reg_u64 (cpu, rm, NO_SP);
   uint64_t uvalue1;
   uint64_t uvalue2;
-  int64_t  signum = 1;
+  int  negate = 0;
 
   if (ra != R31)
     HALT_UNALLOC;
@@ -13051,7 +13053,7 @@  smulh (sim_cpu *cpu)
      the fix the sign up afterwards.  */
   if (value1 < 0)
     {
-      signum *= -1L;
+      negate = !negate;
       uvalue1 = -value1;
     }
   else
@@ -13061,7 +13063,7 @@  smulh (sim_cpu *cpu)
 
   if (value2 < 0)
     {
-      signum *= -1L;
+      negate = !negate;
       uvalue2 = -value2;
     }
   else
@@ -13070,9 +13072,18 @@  smulh (sim_cpu *cpu)
     }
 
   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
+
   uresult = mul64hi (uvalue1, uvalue2);
   result = uresult;
-  result *= signum;
+
+  if (negate)
+    {
+      /* Multiply 128-bit result by -1, which means highpart gets inverted,
+	 and has carry in added only if low part is 0.  */
+      result = ~result;
+      if ((uvalue1 * uvalue2) == 0)
+	result += 1;
+    }
 
   aarch64_set_reg_s64 (cpu, rd, NO_SP, result);
 }
diff --git a/sim/testsuite/sim/aarch64/sumov.s b/sim/testsuite/sim/aarch64/sumov.s
index 69021cb..7180c6a 100644
--- a/sim/testsuite/sim/aarch64/sumov.s
+++ b/sim/testsuite/sim/aarch64/sumov.s
@@ -34,7 +34,7 @@  input:
 	smov w1, v0.h[4]
 	cmp w0, #0x0201
 	bne .Lfailure
-	cmp w1, #-2315
+	cmp w1, #-3343
 	bne .Lfailure
 
 	smov x0, v0.h[1]
@@ -50,8 +50,9 @@  input:
 	movk x2, #0x0807, lsl #16
 	cmp x0, x2
 	bne .Lfailure
-	mov x3, #0xf6f5
-	movk x3, #0xf8f7, lsl #16
+	mov w3, #0xf6f5
+	movk w3, #0xf8f7, lsl #16
+	sxtw x3, w3
 	cmp x1, x3
 	bne .Lfailure
 
@@ -64,9 +65,10 @@  input:
 
 	umov w0, v0.h[0]
 	umov w1, v0.h[4]
-	cmp w0, #0201
+	cmp w0, #0x0201
 	bne .Lfailure
-	cmp w1, #0xf2f1
+	mov w2, #0xf2f1
+	cmp w1, w2
 	bne .Lfailure
 
 	umov w0, v0.s[0]
diff --git a/sim/testsuite/sim/aarch64/sumulh.s b/sim/testsuite/sim/aarch64/sumulh.s
new file mode 100644
index 0000000..17f1ecd
--- /dev/null
+++ b/sim/testsuite/sim/aarch64/sumulh.s
@@ -0,0 +1,56 @@ 
+# mach: aarch64
+
+# Check the multiply highpart instructions: smulh, umulh.
+
+# Test -2*2, -1<<32*-1<<32, -2*-2, and 2*2.
+
+.include "testutils.inc"
+
+	.data
+	.align 4
+
+	start
+
+	mov x0, #-2
+	mov x1, #2
+	smulh x2, x0, x1
+	cmp x2, #-1
+	bne .Lfailure
+	umulh x3, x0, x1
+	cmp x3, #1
+	bne .Lfailure
+
+	mov w0, #-1
+	lsl x0, x0, #32 // 0xffffffff00000000
+	mov x1, x0
+	smulh x2, x0, x1
+	cmp x2, #1
+	bne .Lfailure
+	umulh x3, x0, x1
+	mov w4, #-2
+	lsl x4, x4, #32
+	add x4, x4, #1  // 0xfffffffe00000001
+	cmp x3, x4
+	bne .Lfailure
+
+	mov x0, #-2
+	mov x1, #-2
+	smulh x2, x0, x1
+	cmp x2, #0
+	bne .Lfailure
+	umulh x3, x0, x1
+	cmp x3, #-4
+	bne .Lfailure
+
+	mov x0, #2
+	mov x1, #2
+	smulh x2, x0, x1
+	cmp x2, #0
+	bne .Lfailure
+	umulh x3, x0, x1
+	cmp x3, #0
+	bne .Lfailure
+
+	pass
+.Lfailure:
+	fail