[v4,6/7] gdb/record: support AVX instructions VMOVDQ(U|A) when recording

Message ID 20240923151541.616723-8-guinevere@redhat.com
State New
Headers
Series Support for recording some AVX instructions |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gdb_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_gdb_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_gdb_check--master-arm success Test passed
linaro-tcwg-bot/tcwg_gdb_check--master-aarch64 success Test passed

Commit Message

Guinevere Larsen Sept. 23, 2024, 3:15 p.m. UTC
  This commit adds support for the instructions VMOVDQU and VMOVDQA, used
to move values to/from 256 bit registers. Unfortunately, the
programmer's manual is very incomplete (if not wrong) about these
instructions, so the logic had to be reverse engineered from how gcc
actually encodes the instruction.

This commit also changes the memory regions from the test to store 256
bits, so its easier to test the instructions and that we're recording
ymm registers correctly.
---
 gdb/i386-tdep.c                               | 49 +++++++++++++++++++
 gdb/testsuite/gdb.reverse/i386-avx-reverse.c  | 42 ++++++++++++++--
 .../gdb.reverse/i386-avx-reverse.exp          | 28 +++++++++++
 3 files changed, 115 insertions(+), 4 deletions(-)
  

Patch

diff --git a/gdb/i386-tdep.c b/gdb/i386-tdep.c
index e4e808cf4b0..6d3b98dc302 100644
--- a/gdb/i386-tdep.c
+++ b/gdb/i386-tdep.c
@@ -5071,6 +5071,55 @@  i386_record_vex (struct i386_record_s *ir, uint8_t vex_w, uint8_t vex_r,
 	}
       break;
 
+    case 0x6f: /* VMOVDQ (U|A)  */
+    case 0x7f: /* VMOVDQ (U|A)  */
+      /* vmovdq instructions have information about source/destination
+	 spread over many places, so this code ended up messier than
+	 I'd like.  */
+      /* The VEX.pp bits identify if the move is aligned or not, but this
+	 doesn't influence the recording so we can ignore it.  */
+      i386_record_modrm (ir);
+      /* The first bit of modrm identifies if both operands of the instruction
+	 are registers (bit = 1) or if one of the operands is memory.  */
+      if (ir->mod & 2)
+	{
+	  if (opcode == 0x6f)
+	    {
+	      /* vex_r will identify the high bit of the destination
+		 register.  Source is identified by ir->rex_b, but that
+		 doesn't matter for recording.  */
+	      record_full_arch_list_add_reg (ir->regcache,
+					     tdep->ymm0_regnum + 8*vex_r + ir->reg);
+	    }
+	  else
+	    {
+	      /* The origin operand is >7 and destination operand is <= 7.
+		 This is special cased because in this one vex_r is used to
+		 identify the high bit of the SOURCE operand, not destination
+		 which would mess the previous expression.  */
+	      record_full_arch_list_add_reg (ir->regcache,
+					     tdep->ymm0_regnum + ir->rm);
+	    }
+	}
+      else
+	{
+	  /* This is the easy branch.  We just need to check the opcode
+	     to see if the source or destination is memory.  */
+	  if (opcode == 0x6f)
+	    {
+	      record_full_arch_list_add_reg (ir->regcache,
+					     tdep->ymm0_regnum
+					      + ir->reg + vex_r * 8);
+	    }
+	  else
+	    {
+	      /* We're writing 256 bits, so 1<<8.  */
+	      ir->ot = 8;
+	      i386_record_lea_modrm (ir);
+	    }
+	}
+      break;
+
     case 0x60:	/* VPUNPCKLBW  */
     case 0x61:	/* VPUNPCKLWD  */
     case 0x62:	/* VPUNPCKLDQ  */
diff --git a/gdb/testsuite/gdb.reverse/i386-avx-reverse.c b/gdb/testsuite/gdb.reverse/i386-avx-reverse.c
index 16303a42248..87574983c8a 100644
--- a/gdb/testsuite/gdb.reverse/i386-avx-reverse.c
+++ b/gdb/testsuite/gdb.reverse/i386-avx-reverse.c
@@ -20,8 +20,12 @@ 
 #include <stdlib.h>
 
 char global_buf0[] = {0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+		      0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+		      0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
 		      0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f};
 char global_buf1[] = {0, 0, 0, 0, 0, 0, 0, 0,
+		      0, 0, 0, 0, 0, 0, 0, 0,
+		      0, 0, 0, 0, 0, 0, 0, 0,
 		      0, 0, 0, 0, 0, 0, 0, 0};
 char *dyn_buf0;
 char *dyn_buf1;
@@ -30,8 +34,12 @@  int
 vmov_test ()
 {
   char buf0[] = {0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+		 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+		 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
 		 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f};
   char buf1[] = {0, 0, 0, 0, 0, 0, 0, 0,
+		 0, 0, 0, 0, 0, 0, 0, 0,
+		 0, 0, 0, 0, 0, 0, 0, 0,
 		 0, 0, 0, 0, 0, 0, 0, 0};
 
   /*start vmov_test.  */
@@ -73,6 +81,32 @@  vmov_test ()
   asm volatile ("vmovq %0, %%xmm15": "=m" (buf0));
   asm volatile ("vmovq %0, %%xmm15": "=m" (buf1));
 
+  /* Test vmovdq style instructions.  */
+  /* For local and dynamic buffers, we can't guarantee they will be aligned.
+     However, the aligned and unaligned versions seem to be encoded the same,
+     so testing one is enough to validate both.  */
+
+  /* Operations based on local buffers.  */
+  asm volatile ("vmovdqu %0, %%ymm0": : "m"(buf0));
+  asm volatile ("vmovdqu %%ymm0, %0": "=m"(buf1));
+
+  /* Operations based on global buffers.  */
+  /* Global buffers seem to always be aligned, lets sanity check vmovdqa.  */
+  asm volatile ("vmovdqa %0, %%ymm15": : "m"(global_buf0));
+  asm volatile ("vmovdqa %%ymm15, %0": "=m"(global_buf1));
+  asm volatile ("vmovdqu %0, %%ymm0": : "m"(global_buf0));
+  asm volatile ("vmovdqu %%ymm0, %0": "=m"(global_buf1));
+
+  /* Operations based on dynamic buffers.  */
+  /* The dynamic buffers are not aligned, so we skip vmovdqa.  */
+  asm volatile ("vmovdqu %0, %%ymm0": : "m"(*dyn_buf0));
+  asm volatile ("vmovdqu %%ymm0, %0": "=m"(*dyn_buf1));
+
+  /* Operations between 2 registers.  */
+  asm volatile ("vmovdqu %ymm15, %ymm0");
+  asm volatile ("vmovdqu %ymm2, %ymm15");
+  asm volatile ("vmovdqa %ymm15, %ymm0");
+
   /* We have a return statement to deal with
      epilogue in different compilers.  */
   return 0; /* end vmov_test */
@@ -161,11 +195,11 @@  vpbroadcast_test ()
 int
 main ()
 {
-  dyn_buf0 = (char *) malloc(sizeof(char) * 16);
-  dyn_buf1 = (char *) malloc(sizeof(char) * 16);
-  for (int i =0; i < 16; i++)
+  dyn_buf0 = (char *) malloc(sizeof(char) * 32);
+  dyn_buf1 = (char *) malloc(sizeof(char) * 32);
+  for (int i =0; i < 32; i++)
     {
-      dyn_buf0[i] = 0x20 + i;
+      dyn_buf0[i] = 0x20 + (i % 16);
       dyn_buf1[i] = 0;
     }
   /* Zero relevant xmm registers, se we know what to look for.  */
diff --git a/gdb/testsuite/gdb.reverse/i386-avx-reverse.exp b/gdb/testsuite/gdb.reverse/i386-avx-reverse.exp
index 75c313c2225..aea5e395cf8 100644
--- a/gdb/testsuite/gdb.reverse/i386-avx-reverse.exp
+++ b/gdb/testsuite/gdb.reverse/i386-avx-reverse.exp
@@ -134,6 +134,34 @@  global decimal
 
 if {[record_full_function "vmov"] == true} {
     # Now execute backwards, checking all instructions.
+    test_one_register "vmovdqa" "ymm0" \
+	"0x1f1e1d1c1b1a19181716151413121110, 0x1f1e1d1c1b1a19181716151413121110" \
+	"from register: "
+    test_one_register "vmovdqu" "ymm15" \
+	"0x1f1e1d1c1b1a19181716151413121110, 0x1f1e1d1c1b1a19181716151413121110" \
+	"from register: "
+    test_one_register "vmovdqu" "ymm0" \
+	"0x2f2e2d2c2b2a29282726252423222120, 0x2f2e2d2c2b2a29282726252423222120" \
+	"from register: "
+
+    test_one_memory "vmovdqu" "dyn_buf1" "0x0 .repeats 32 times" \
+	true "dynamic buffer: "
+    test_one_register "vmovdqu" "ymm0" \
+	"0x1f1e1d1c1b1a19181716151413121110, 0x1f1e1d1c1b1a19181716151413121110" \
+	"dynamic buffer: "
+
+    # Don't check the full buffer because that'd be too long
+    test_one_memory "vmovdqu" "global_buf1" \
+	"0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19" \
+	"global buffer: "
+    test_one_register "vmovdqu" "ymm0" \
+	"0x3f3e3d3c3b3a39383736353433323130, 0x3f3e3d3c3b3a39383736353433323130" \
+	"global buffer: "
+    test_one_memory "vmovdqa" "global_buf1" "0x0 .repeats 32 times"
+    test_one_register "vmovdqa" "ymm15" "0x0, 0x0"
+
+    test_one_memory "vmovdqu" "buf1" "0x0 .repeats 32 times"
+    test_one_register "vmovdqu" "ymm0" "0x2726252423222120, 0x0" "local buffer: "
 
     test_one_register "vmovq" "xmm15" "0x3736353433323130" "reg_reset: "
     test_one_register "vmovq" "xmm15" "0x0"