[1/3] gdb: Start supporting AVX instruction

Message ID 20240521202800.2865871-2-blarsen@redhat.com
State New
Headers
Series Small step in supporting AVX instructions |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gdb_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gdb_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gdb_check--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gdb_check--master-aarch64 success Testing passed

Commit Message

Guinevere Larsen May 21, 2024, 8:27 p.m. UTC
  This patch introduces the information needed to properly identify the
VEX prefix, used to signal an AVX and AVX2 instruction, and introduces
a helper function to handle all AVX instruction, instead of adding to
the 3000 line long recording function.

The new helper also handles unsupported instructions so that the largest
part of the i386_process_record doesn't have to be shifted by 2 spaces,
which made an unreadably big patch file.

The only expected difference added by this patch is a small change to
the unsupported message.

As a note for the future, we don't handle xmm16-31 and ymm16-31 because
those require the EVEX prefix, meaning avx512 support.
---
 gdb/amd64-tdep.c |  3 +-
 gdb/i386-tdep.c  | 91 ++++++++++++++++++++++++++++++++++++++++++++++--
 gdb/i386-tdep.h  |  2 ++
 3 files changed, 93 insertions(+), 3 deletions(-)
  

Comments

Tom Tromey June 5, 2024, 4:11 p.m. UTC | #1
>>>>> "Guinevere" == Guinevere Larsen <blarsen@redhat.com> writes:

Guinevere> +  /* These are used by VEX and XOP prefixes.  */
Guinevere> +  uint8_t map_select;
Guinevere> +  uint8_t vvvv;
Guinevere> +  uint8_t pp;
Guinevere> +  uint8_t l;

Are these the names used by the ISA?  Since if not they are pretty ugly.

Guinevere> +static bool
Guinevere> +i386_record_vex (struct i386_record_s *ir, uint8_t rex_w, uint8_t rex_r,
Guinevere> +		 int opcode, struct gdbarch *gdbarch)
Guinevere> +{
Guinevere> +  switch (opcode)
Guinevere> +    {
Guinevere> +    default:
Guinevere> +      gdb_printf (gdb_stderr,
Guinevere> +		  _("Process record does not support VEX instruction 0x%02x "
Guinevere> +		    "at address %s.\n"),
Guinevere> +		  (unsigned int) (opcode),
Guinevere> +		  paddress (gdbarch, ir->orig_addr));
Guinevere> +      return -1;

This should use true or false, not -1, because the function returns
bool.

I wouldn't be surprised to get some kind of dead code warning here,
since the switch only has a single branch.

Tom
  
Guinevere Larsen June 5, 2024, 5:33 p.m. UTC | #2
On 6/5/24 13:11, Tom Tromey wrote:
>>>>>> "Guinevere" == Guinevere Larsen <blarsen@redhat.com> writes:
> Guinevere> +  /* These are used by VEX and XOP prefixes.  */
> Guinevere> +  uint8_t map_select;
> Guinevere> +  uint8_t vvvv;
> Guinevere> +  uint8_t pp;
> Guinevere> +  uint8_t l;
>
> Are these the names used by the ISA?  Since if not they are pretty ugly.
Yes, these are the ISA names for those bits. I would use more 
descriptive names instead of ISA ones, but I have no idea what they are 
supposed to mean.
>
> Guinevere> +static bool
> Guinevere> +i386_record_vex (struct i386_record_s *ir, uint8_t rex_w, uint8_t rex_r,
> Guinevere> +		 int opcode, struct gdbarch *gdbarch)
> Guinevere> +{
> Guinevere> +  switch (opcode)
> Guinevere> +    {
> Guinevere> +    default:
> Guinevere> +      gdb_printf (gdb_stderr,
> Guinevere> +		  _("Process record does not support VEX instruction 0x%02x "
> Guinevere> +		    "at address %s.\n"),
> Guinevere> +		  (unsigned int) (opcode),
> Guinevere> +		  paddress (gdbarch, ir->orig_addr));
> Guinevere> +      return -1;
>
> This should use true or false, not -1, because the function returns
> bool.
nice catch! the function should be returning int instead.
>
> I wouldn't be surprised to get some kind of dead code warning here,
> since the switch only has a single branch.
no warnings in my system... but that is possible, I guess. Do you think 
it would be better to merge this commit with the following one?
  
Tom Tromey June 6, 2024, 5:01 p.m. UTC | #3
>>>>> "Guinevere" == Guinevere Larsen <blarsen@redhat.com> writes:

>> I wouldn't be surprised to get some kind of dead code warning here,
>> since the switch only has a single branch.

Guinevere> no warnings in my system... but that is possible, I guess. Do you
Guinevere> think it would be better to merge this commit with the following one?

I wouldn't worry about it.

Tom
  

Patch

diff --git a/gdb/amd64-tdep.c b/gdb/amd64-tdep.c
index df6b882a3fb..27d6e20f90c 100644
--- a/gdb/amd64-tdep.c
+++ b/gdb/amd64-tdep.c
@@ -3134,7 +3134,8 @@  static const int amd64_record_regmap[] =
   AMD64_R8_REGNUM, AMD64_R9_REGNUM, AMD64_R10_REGNUM, AMD64_R11_REGNUM,
   AMD64_R12_REGNUM, AMD64_R13_REGNUM, AMD64_R14_REGNUM, AMD64_R15_REGNUM,
   AMD64_RIP_REGNUM, AMD64_EFLAGS_REGNUM, AMD64_CS_REGNUM, AMD64_SS_REGNUM,
-  AMD64_DS_REGNUM, AMD64_ES_REGNUM, AMD64_FS_REGNUM, AMD64_GS_REGNUM
+  AMD64_DS_REGNUM, AMD64_ES_REGNUM, AMD64_FS_REGNUM, AMD64_GS_REGNUM,
+  AMD64_XMM0_REGNUM, AMD64_YMM0H_REGNUM
 };
 
 /* Implement the "in_indirect_branch_thunk" gdbarch function.  */
diff --git a/gdb/i386-tdep.c b/gdb/i386-tdep.c
index f1f909e1616..93a0926c4bc 100644
--- a/gdb/i386-tdep.c
+++ b/gdb/i386-tdep.c
@@ -4637,6 +4637,12 @@  struct i386_record_s
   int rip_offset;
   int popl_esp_hack;
   const int *regmap;
+
+  /* These are used by VEX and XOP prefixes.  */
+  uint8_t map_select;
+  uint8_t vvvv;
+  uint8_t pp;
+  uint8_t l;
 };
 
 /* Parse the "modrm" part of the memory address irp->addr points at.
@@ -4975,6 +4981,31 @@  static int i386_record_floats (struct gdbarch *gdbarch,
   return 0;
 }
 
+/* i386_process_record helper to deal with instructions that start
+   with VEX prefix.  */
+
+static bool
+i386_record_vex (struct i386_record_s *ir, uint8_t rex_w, uint8_t rex_r,
+		 int opcode, struct gdbarch *gdbarch)
+{
+  switch (opcode)
+    {
+    default:
+      gdb_printf (gdb_stderr,
+		  _("Process record does not support VEX instruction 0x%02x "
+		    "at address %s.\n"),
+		  (unsigned int) (opcode),
+		  paddress (gdbarch, ir->orig_addr));
+      return -1;
+    }
+
+  record_full_arch_list_add_reg (ir->regcache, ir->regmap[X86_RECORD_REIP_REGNUM]);
+  if (record_full_arch_list_add_end ())
+    return -1;
+
+  return 0;
+}
+
 /* Parse the current instruction, and record the values of the
    registers and memory that will be changed by the current
    instruction.  Returns -1 if something goes wrong, 0 otherwise.  */
@@ -4997,6 +5028,7 @@  i386_process_record (struct gdbarch *gdbarch, struct regcache *regcache,
   i386_gdbarch_tdep *tdep = gdbarch_tdep<i386_gdbarch_tdep> (gdbarch);
   uint8_t rex_w = -1;
   uint8_t rex_r = 0;
+  bool vex_prefix = false;
 
   memset (&ir, 0, sizeof (struct i386_record_s));
   ir.regcache = regcache;
@@ -5082,6 +5114,53 @@  i386_process_record (struct gdbarch *gdbarch, struct regcache *regcache,
 	  else					/* 32 bit target */
 	    goto out_prefixes;
 	  break;
+	case 0xc4:	/* 3-byte VEX prefixes (for AVX/AVX2 instructions).  */
+	  {
+	    /* The first byte just identifies the VEX prefix.  Data is stored
+	       on the following 2 bytes.  */
+	    uint8_t byte;
+	    if (record_read_memory (gdbarch, ir.addr, &byte, 1))
+	      return -1;
+	    ir.addr++;
+
+	    rex_r = !((byte >> 7) & 0x1);
+	    ir.rex_x = !((byte >> 6) & 0x1);
+	    ir.rex_b = !((byte >> 5) & 0x1);
+	    ir.map_select = byte & 0x1f;
+	    /* Collect the last byte of the prefix.  */
+	    if (record_read_memory (gdbarch, ir.addr, &byte, 1))
+	      return -1;
+	    ir.addr++;
+	    rex_w = (byte >> 7) & 0x1;
+	    ir.vvvv = (~(byte >> 3) & 0xf);
+	    ir.l = (byte >> 2) & 0x1;
+	    ir.pp = byte & 0x3;
+	    vex_prefix = true;
+
+	    break;
+	  }
+	case 0xc5:	/* 2-byte VEX prefix for AVX/AVX2 instructions.  */
+	  {
+	    /* The first byte just identifies the VEX prefix.  Data is stored
+	       on the following 2 bytes.  */
+	    uint8_t byte;
+	    if (record_read_memory (gdbarch, ir.addr, &byte, 1))
+	      return -1;
+	    ir.addr++;
+
+	    /* On the 2-byte versions, these are pre-defined.  */
+	    ir.rex_x = 0;
+	    ir.rex_b = 0;
+	    rex_w = 0;
+	    ir.map_select = 1;
+
+	    rex_r = !((byte >> 7) & 0x1);
+	    ir.vvvv = (~(byte >> 3) & 0xf);
+	    ir.l = (byte >> 2) & 0x1;
+	    ir.pp = byte & 0x3;
+	    vex_prefix = true;
+	    break;
+	  }
 	default:
 	  goto out_prefixes;
 	  break;
@@ -5104,6 +5183,12 @@  i386_process_record (struct gdbarch *gdbarch, struct regcache *regcache,
 
   /* Now check op code.  */
   opcode = (uint32_t) opcode8;
+  if (vex_prefix)
+    {
+      /* If we found the VEX prefix, i386 will either record or warn that
+	 the instruction isn't supported, so we can return the VEX result.  */
+      return i386_record_vex (&ir, rex_w, rex_r, opcode, gdbarch);
+    }
  reswitch:
   switch (opcode)
     {
@@ -8088,9 +8173,11 @@  static const int i386_record_regmap[] =
 {
   I386_EAX_REGNUM, I386_ECX_REGNUM, I386_EDX_REGNUM, I386_EBX_REGNUM,
   I386_ESP_REGNUM, I386_EBP_REGNUM, I386_ESI_REGNUM, I386_EDI_REGNUM,
-  0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0,
+  0, 0, 0, 0,
   I386_EIP_REGNUM, I386_EFLAGS_REGNUM, I386_CS_REGNUM, I386_SS_REGNUM,
-  I386_DS_REGNUM, I386_ES_REGNUM, I386_FS_REGNUM, I386_GS_REGNUM
+  I386_DS_REGNUM, I386_ES_REGNUM, I386_FS_REGNUM, I386_GS_REGNUM,
+  /* xmm0_regnum */ 0, I386_YMM0H_REGNUM
 };
 
 /* Check that the given address appears suitable for a fast
diff --git a/gdb/i386-tdep.h b/gdb/i386-tdep.h
index a85e0a984a0..1f21f8de06c 100644
--- a/gdb/i386-tdep.h
+++ b/gdb/i386-tdep.h
@@ -339,6 +339,8 @@  enum record_i386_regnum
   X86_RECORD_ES_REGNUM,
   X86_RECORD_FS_REGNUM,
   X86_RECORD_GS_REGNUM,
+  X86_RECORD_XMM0_REGNUM,
+  X86_RECORD_YMM0H_REGNUM,
 };
 
 #define I386_NUM_GREGS	16