aarch64 sim bit/bif bug fix

Message ID CABXYE2UVXL+xv3qK9YzGeqNhtxDnSOs0FV1qjNc99okrvhGceg@mail.gmail.com
State New, archived
Headers

Commit Message

Jim Wilson Feb. 5, 2017, 2:03 a.m. UTC
  This fixes the bit and bif instructions.  The code is using u32
set/get, but 16/8 loop bounds, which gives out of range errors.  Also,
it is computing the value wrong, as it is a bitwise operation, not an
operation checking byte/word values for zero/non-zero.  I rewrote the
code a little to make the operation more obvious.  It now looks more
like bsl which is a closely related instruction.

The testcase fails without the patch and works with the patch.  GCC C
testsuite failures drop from 1701 to 1590 (-111).

Jim
  

Comments

Nick Clifton Feb. 13, 2017, 3:36 p.m. UTC | #1
Hi Jim,

> This fixes the bit and bif instructions.  The code is using u32
> set/get, but 16/8 loop bounds, which gives out of range errors.  Also,
> it is computing the value wrong, as it is a bitwise operation, not an
> operation checking byte/word values for zero/non-zero.  I rewrote the
> code a little to make the operation more obvious.  It now looks more
> like bsl which is a closely related instruction.
> 
> The testcase fails without the patch and works with the patch.  GCC C
> testsuite failures drop from 1701 to 1590 (-111).

Approved - please apply.

Cheers
  Nick
  

Patch

	sim/aarch64/
	* simulator.c (do_vec_bit): Change loop limits from 16 and 8 to 4 and
	2.  Move test_false if inside loop.  Fix logic for computing result
	stored to vd.

	sim/testsuite/sim/aarch64
	* bit.s: New.

diff --git a/sim/aarch64/simulator.c b/sim/aarch64/simulator.c
index a44e70a..13a2b1f 100644
--- a/sim/aarch64/simulator.c
+++ b/sim/aarch64/simulator.c
@@ -4085,17 +4085,17 @@  do_vec_bit (sim_cpu *cpu)
   NYI_assert (15, 10, 0x07);
 
   TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
-  if (test_false)
-    {
-      for (i = 0; i < (full ? 16 : 8); i++)
-	if (aarch64_get_vec_u32 (cpu, vn, i) == 0)
-	  aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
-    }
-  else
+  for (i = 0; i < (full ? 4 : 2); i++)
     {
-      for (i = 0; i < (full ? 16 : 8); i++)
-	if (aarch64_get_vec_u32 (cpu, vn, i) != 0)
-	  aarch64_set_vec_u32 (cpu, vd, i, aarch64_get_vec_u32 (cpu, vm, i));
+      uint32_t vd_val = aarch64_get_vec_u32 (cpu, vd, i);
+      uint32_t vn_val = aarch64_get_vec_u32 (cpu, vn, i);
+      uint32_t vm_val = aarch64_get_vec_u32 (cpu, vm, i);
+      if (test_false)
+	aarch64_set_vec_u32 (cpu, vd, i,
+			     (vd_val & vm_val) | (vn_val & ~vm_val));
+      else
+	aarch64_set_vec_u32 (cpu, vd, i,
+			     (vd_val & ~vm_val) | (vn_val & vm_val));
     }
 }
 
diff --git a/sim/testsuite/sim/aarch64/bit.s b/sim/testsuite/sim/aarch64/bit.s
new file mode 100644
index 0000000..650d317
--- /dev/null
+++ b/sim/testsuite/sim/aarch64/bit.s
@@ -0,0 +1,91 @@ 
+# mach: aarch64
+
+# Check the bitwise vector instructions: bif, bit, bsl, eor.
+
+.include "testutils.inc"
+
+	.data
+	.align 4
+inputa:
+	.word 0x04030201
+	.word 0x08070605
+	.word 0x0c0b0a09
+	.word 0x100f0e0d
+inputb:
+	.word 0x40302010
+	.word 0x80706050
+	.word 0xc0b0a090
+	.word 0x01f0e0d0
+mask:
+	.word 0xFF00FF00
+	.word 0x00FF00FF
+	.word 0xF0F0F0F0
+	.word 0x0F0F0F0F
+
+	start
+	adrp x0, inputa
+	ldr q0, [x0, #:lo12:inputa]
+	adrp x0, inputb
+	ldr q1, [x0, #:lo12:inputb]
+	adrp x0, mask
+	ldr q2, [x0, #:lo12:mask]
+
+	mov v3.8b, v0.8b
+	bif v3.8b, v1.8b, v2.8b
+	addv b4, v3.8b
+	mov x1, v4.d[0]
+	cmp x1, #306
+	bne .Lfailure
+
+	mov v3.16b, v0.16b
+	bif v3.16b, v1.16b, v2.16b
+	addv b4, v3.16b
+	mov x1, v4.d[0]
+	cmp x1, #1020
+	bne .Lfailure
+
+	mov v3.8b, v0.8b
+	bit v3.8b, v1.8b, v2.8b
+	addv b4, v3.8b
+	mov x1, v4.d[0]
+	cmp x1, #306
+	bne .Lfailure
+
+	mov v3.16b, v0.16b
+	bit v3.16b, v1.16b, v2.16b
+	addv b4, v3.16b
+	mov x1, v4.d[0]
+	cmp x1, #1037
+	bne .Lfailure
+
+	mov v3.8b, v2.8b
+	bsl v3.8b, v0.8b, v1.8b
+	addv b4, v3.8b
+	mov x1, v4.d[0]
+	cmp x1, #306
+	bne .Lfailure
+
+	mov v3.16b, v2.16b
+	bsl v3.16b, v0.16b, v1.16b
+	addv b4, v3.16b
+	mov x1, v4.d[0]
+	cmp x1, #1020
+	bne .Lfailure
+
+	mov v3.8b, v0.8b
+	eor v3.8b, v1.8b, v2.8b
+	addv b4, v3.8b
+	mov x1, v4.d[0]
+	cmp x1, #1020
+	bne .Lfailure
+
+	mov v3.16b, v0.16b
+	eor v3.16b, v1.16b, v2.16b
+	addv b4, v3.16b
+	mov x1, v4.d[0]
+	cmp x1, #2039
+	bne .Lfailure
+
+	pass
+.Lfailure:
+	fail