@@ -4816,6 +4816,39 @@ i386_record_vex (struct i386_record_s *ir, uint8_t vex_w, uint8_t vex_r,
switch (opcode)
{
+ case 0x10: /* VMOVS[S|D] XMM, mem. */
+ /* VMOVUP[S|D] XMM, mem. */
+ case 0x28: /* VMOVAP[S|D] XMM, mem. */
+ /* Moving from memory region or XMM registers into an XMM register. */
+ i386_record_modrm (ir);
+ record_full_arch_list_add_reg (ir->regcache,
+ ir->regmap[X86_RECORD_XMM0_REGNUM]
+ + ir->reg + vex_r * 8);
+ break;
+ case 0x11: /* VMOVS[S|D] mem, XMM. */
+ /* VMOVUP[S|D] mem, XMM. */
+ case 0x29: /* VMOVAP[S|D] mem, XMM. */
+ /* Moving from memory region into an XMM register. */
+ /* This can also be used for XMM -> XMM in some scenarios. */
+ i386_record_modrm (ir);
+ if (ir->mod == 3)
+ {
+ /* In this case the destination register is encoded differently
+ to any other AVX instruction I've seen so far. In this one,
+ VEX.B is the most important bit of the destination. */
+ record_full_arch_list_add_reg (ir->regcache,
+ ir->regmap[X86_RECORD_XMM0_REGNUM]
+ + ir->rm + ir->rex_b * 8);
+ }
+ else
+ {
+ /* VEX.pp stores whether we're moving a single or double precision
+ float. It just happens that pp is exactly one less than
+ log2(size) so we can use that directly. */
+ ir->ot = ir->pp;
+ i386_record_lea_modrm (ir);
+ }
+ break;
case 0x6e: /* VMOVD XMM, reg/mem */
/* This is moving from a regular register or memory region into an
XMM register. */
@@ -107,6 +107,67 @@ vmov_test ()
asm volatile ("vmovdqu %ymm2, %ymm15");
asm volatile ("vmovdqa %ymm15, %ymm0");
+ /* Testing vmov [ss|sd] instructions. */
+ /* Note, vmovss only works with XMM registers, not YMM registers,
+ according to the intel manual. Also, initializing the variables
+ uses xmm0 in my machine, so we can't test with it, so use xmm1
+ instead. */
+
+ /* Move single precision floats to and from memory. */
+ float f1 = 1.5, f2 = 4.2;
+ asm volatile ("vmovss %0, %%xmm1" : : "m"(f1));
+ asm volatile ("vmovss %0, %%xmm15": : "m"(f2));
+ asm volatile ("vmovss %%xmm1, %0" : "=m"(f2));
+ asm volatile ("vmovss %%xmm15, %0": "=m"(f1));
+
+ asm volatile ("vmovss %xmm15, %xmm1, %xmm2");
+ asm volatile ("vmovss %xmm15, %xmm1, %xmm8");
+ asm volatile ("vmovss %xmm1, %xmm2, %xmm15");
+ asm volatile ("vmovss %xmm2, %xmm15, %xmm1");
+
+ /* Testing double precision floats. */
+ double d1 = -1.5, d2 = -2.5;
+ asm volatile ("vmovsd %0, %%xmm1" : : "m"(d1));
+ asm volatile ("vmovsd %0, %%xmm15": : "m"(d2));
+ asm volatile ("vmovsd %%xmm1, %0" : "=m"(d2));
+ asm volatile ("vmovsd %%xmm15, %0": "=m"(d1));
+
+ asm volatile ("vmovsd %xmm15, %xmm1, %xmm2");
+ asm volatile ("vmovsd %xmm15, %xmm1, %xmm8");
+ asm volatile ("vmovsd %xmm1, %xmm2, %xmm15");
+ asm volatile ("vmovsd %xmm2, %xmm15, %xmm1");
+
+ /* "reset" all the buffers. This doesn't zero them all, but
+ it zeroes the start which lets us ensure the tests see
+ some changes. */
+ asm volatile ("vmovq %%xmm3, %0": "=m" (buf1));
+ asm volatile ("vmovq %%xmm3, %0": "=m" (global_buf1));
+ asm volatile ("vmovq %%xmm3, %0": "=m" (*dyn_buf1));
+
+ /* Testing vmovu[ps|pd] instructions. Even though there are aligned
+ versions of these instructions like vmovdq[u|a], they have different
+ opcodes, meaning they'll need to be tested separately. */
+
+ asm volatile ("vmovups %0, %%xmm0" : : "m"(buf0));
+ asm volatile ("vmovupd %0, %%xmm15" : : "m"(buf1));
+ asm volatile ("vmovupd %%xmm0, %0" : : "m"(buf1));
+ asm volatile ("vmovups %%xmm15, %0" : : "m"(buf1));
+
+ asm volatile ("vmovups %0, %%xmm0" : : "m"(global_buf0));
+ asm volatile ("vmovupd %0, %%xmm15" : : "m"(global_buf1));
+ asm volatile ("vmovupd %%xmm0, %0" : : "m"(global_buf1));
+ asm volatile ("vmovups %%xmm15, %0" : : "m"(global_buf1));
+
+ asm volatile ("vmovups %0, %%xmm0" : : "m"(*dyn_buf0));
+ asm volatile ("vmovupd %0, %%xmm15" : : "m"(*dyn_buf1));
+ asm volatile ("vmovupd %%xmm0, %0" : : "m"(*dyn_buf1));
+ asm volatile ("vmovups %%xmm15, %0" : : "m"(*dyn_buf1));
+
+ asm volatile ("vmovaps %0, %%xmm0" : : "m"(*dyn_buf0));
+ asm volatile ("vmovapd %0, %%xmm15" : : "m"(*dyn_buf1));
+ asm volatile ("vmovapd %%xmm0, %0" : : "m"(*dyn_buf1));
+ asm volatile ("vmovaps %%xmm15, %0" : : "m"(*dyn_buf1));
+
/* We have a return statement to deal with
epilogue in different compilers. */
return 0; /* end vmov_test */
@@ -145,6 +145,63 @@ global decimal
if {[record_full_function "vmov"] == true} {
# Now execute backwards, checking all instructions.
+
+ test_one_memory "vmovaps" "dyn_buf1" \
+ "0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28" true
+ test_one_memory "vmovapd" "dyn_buf1" \
+ "0x54, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x28" true
+ test_one_register "vmovapd" "xmm15" ".*" "dynamic buffer: "
+ test_one_register "vmovaps" "xmm0" ".*" "dynamic buffer: "
+
+ test_one_memory "vmovups" "dyn_buf1" \
+ "0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28" true
+ test_one_memory "vmovupd" "dyn_buf1" \
+ "0x54, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x28" true
+ test_one_register "vmovupd" "xmm15" ".*" "dynamic buffer: "
+ test_one_register "vmovups" "xmm0" ".*" "dynamic buffer: "
+
+ test_one_memory "vmovups" "global_buf1" \
+ "0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18"
+ test_one_memory "vmovupd" "global_buf1" \
+ "0x54, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x18"
+ test_one_register "vmovupd" "xmm15" ".*" "global buffer: "
+ test_one_register "vmovups" "xmm0" ".*" "global buffer: "
+
+ test_one_memory "vmovups" "buf1" \
+ "0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38"
+ test_one_memory "vmovupd" "buf1" \
+ "0x54, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x38"
+ test_one_register "vmovupd" "xmm15" "0xbff8000000000000" "local buffer: "
+ test_one_register "vmovups" "xmm0" "0xc004000000000000" "local buffer: "
+
+ gdb_test "rs 3" "vmovq %%xmm3.*"
+
+ test_one_register "vmovsd" "xmm1" "0xbff8000000000000" "from register: "
+ test_one_register "vmovsd" "xmm15" "0xc004000000000000" "from register: "
+ test_one_register "vmovsd" "xmm8" "0x40866666" "from register: "
+ test_one_register "vmovsd" "xmm2" "0x40866666" "from register: "
+
+ test_one_memory "vmovsd" "d1" "0xbff8000000000000"
+ test_one_memory "vmovsd" "d2" "0xc004000000000000"
+ test_one_register "vmovsd" "xmm15" "0x3fc00000"
+ test_one_register "vmovsd" "xmm1" "0x40866666"
+
+ # Reverse step over the line initializing the floats.
+ gdb_test "rs" "double .*" "step over double initialization"
+
+ test_one_register "vmovss" "xmm1" "0x3fc00000" "from register: "
+ test_one_register "vmovss" "xmm15" "0x40866666" "from register: "
+ test_one_register "vmovss" "xmm8" "0" "from register: "
+ test_one_register "vmovss" "xmm2" "0" "from register: "
+
+ test_one_memory "vmovss" "f1" "0x3fc00000"
+ test_one_memory "vmovss" "f2" "0x40866666"
+ test_one_register "vmovss" "xmm15" "0x0"
+ test_one_register "vmovss" "xmm1" "0x0"
+
+ # Reverse step over the line initializing the floats.
+ gdb_test "rs" "float .*" "step over float initialization"
+
test_one_register "vmovdqa" "ymm0" \
"0x2f2e2d2c2b2a29282726252423222120, 0x2f2e2d2c2b2a29282726252423222120" \
"from register: "