ld/pdb: Handle DEBUG_S_INLINEELINES data

Message ID a7bfd2c6-ff49-40a7-bc00-ac50f4c31427@harmstone.com
State New
Headers
Series ld/pdb: Handle DEBUG_S_INLINEELINES data |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm fail Patch failed to apply
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 fail Patch failed to apply

Commit Message

Mark Harmstone Sept. 22, 2024, 7:52 p.m. UTC
  Jan, am I okay to commit this? It's needed for some gcc work I've been doing.

This is, I think, the last major PDB-related thing missing from ld.

Thanks

Mark

-------- Forwarded Message --------
Subject: [PATCH] ld/pdb: Handle DEBUG_S_INLINEELINES data
Date: Tue, 10 Sep 2024 01:27:11 +0100
From: Mark Harmstone <mark@harmstone.com>
To: binutils@sourceware.org
CC: Mark Harmstone <mark@harmstone.com>

The DEBUG_S_INLINEELINES block in the .debug$S section records the line
numbers in a source file covered by inlined functions. It's similar to
the DEBUG_S_LINES block, but as it references LF_FUNC_ID types we also
need to parse it to remap the type numbers.
---
  ld/pdb.c                                      |  84 +++++++++
  ld/pdb.h                                      |   3 +
  .../ld-pe/pdb-inlineelines1-c13-info2.d       |  10 ++
  ld/testsuite/ld-pe/pdb-inlineelines1a.s       |  20 +++
  ld/testsuite/ld-pe/pdb-inlineelines1b.s       | 160 ++++++++++++++++++
  ld/testsuite/ld-pe/pdb.exp                    |  94 ++++++++++
  6 files changed, 371 insertions(+)
  create mode 100644 ld/testsuite/ld-pe/pdb-inlineelines1-c13-info2.d
  create mode 100644 ld/testsuite/ld-pe/pdb-inlineelines1a.s
  create mode 100644 ld/testsuite/ld-pe/pdb-inlineelines1b.s

  test7
  test8
  test9
+test10
  

Comments

Jan Beulich Sept. 23, 2024, 10:47 a.m. UTC | #1
On 22.09.2024 21:52, Mark Harmstone wrote:
> Jan, am I okay to commit this? It's needed for some gcc work I've been doing.
> 
> This is, I think, the last major PDB-related thing missing from ld.

I'm sorry, this one slipped my attention. You're okay to put it in. One
remark though (albeit looking at existing ld-pe/pdb-*.s I'm apparently
ages too late): It would be nice if testcases were universally well-
formed assembly. That is (what I specifically notice here): directives
never start in the very first column. Gas accepts that (and probably
always will) on most targets (and apparently all where PDB presently
matters), but at least some specific targets permit only labels to live
at the very beginning of a statement.

Jan
  

Patch

diff --git a/ld/pdb.c b/ld/pdb.c
index a15da9db73d..5fd55fbb063 100644
--- a/ld/pdb.c
+++ b/ld/pdb.c
@@ -161,6 +161,9 @@  static const uint32_t crc_table[] =
    0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
  };
  +static bool remap_type (void *data, struct type_entry **map,
+			uint32_t type_num, uint32_t num_types);
+
  /* Add a new stream to the PDB archive, and return its BFD.  */
  static bfd *
  add_stream (bfd *pdb, const char *name, uint16_t *stream_num)
@@ -1836,6 +1839,76 @@  calculate_symbols_size (uint8_t *data, uint32_t size, uint32_t *sym_size)
    return true;
  }
  +/* Parse the DEBUG_S_INLINEELINES data, which records the line numbers that
+   correspond to inlined functions.  This is similar to DEBUG_S_LINES (see
+   handle_debugs_section), but rather than just copying we also need to remap
+   the numbers of the referenced LF_FUNC_ID types.  */
+
+static bool
+parse_inlinee_lines (uint8_t *data, uint32_t size, uint8_t **bufptr,
+		     struct type_entry **map, uint32_t num_types)
+{
+  uint32_t version;
+  uint8_t *ptr;
+  unsigned int num_entries;
+
+  bfd_putl32 (DEBUG_S_INLINEELINES, *bufptr);
+  *bufptr += sizeof (uint32_t);
+
+  bfd_putl32 (size, *bufptr);
+  *bufptr += sizeof (uint32_t);
+
+  /* The inlinee lines data consists of a version uint32_t (0), followed by an
+     array of struct inlinee_source_line:
+
+     struct inlinee_source_line
+     {
+	uint32_t function_id;
+	uint32_t file_id;
+	uint32_t line_no;
+     };
+
+     (see InlineeSourceLine in cvinfo.h)
+
+     We're only interested here in the function_id, as we need to remap its
+     type number.
+  */
+
+  if (size < sizeof (uint32_t))
+    {
+      einfo (_("%P: warning: truncated DEBUG_S_INLINEELINES data\n"));
+      return false;
+    }
+
+  version = bfd_getl32 (data + sizeof (uint32_t) + sizeof (uint32_t));
+  if (version != CV_INLINEE_SOURCE_LINE_SIGNATURE)
+    {
+      einfo (_("%P: warning: unexpected DEBUG_S_INLINEELINES version %u\n"),
+	     version);
+      return false;
+    }
+
+  memcpy (*bufptr, data, size);
+  ptr = *bufptr + sizeof (uint32_t);
+  *bufptr += size;
+
+  num_entries = (size - sizeof (uint32_t)) / (3 * sizeof (uint32_t));
+
+  for (unsigned int i = 0; i < num_entries; i++)
+    {
+      uint32_t func_id;
+
+      func_id = bfd_getl32 (ptr);
+
+      if (!remap_type (ptr, map, func_id, num_types))
+	return false;
+
+      ptr += 3 * sizeof (uint32_t);
+    }
+
+  return true;
+}
+
  /* Parse the .debug$S section within an object file.  */
  static bool
  handle_debugs_section (asection *s, bfd *mod, struct string_table *strings,
@@ -1951,6 +2024,7 @@  handle_debugs_section (asection *s, bfd *mod, struct string_table *strings,
        switch (type)
  	{
  	case DEBUG_S_FILECHKSMS:
+	case DEBUG_S_INLINEELINES:
  	  c13_size += sizeof (uint32_t) + sizeof (uint32_t) + size;
   	  if (c13_size % sizeof (uint32_t))
@@ -2088,6 +2162,16 @@  handle_debugs_section (asection *s, bfd *mod, struct string_table *strings,
  	    }
   	  break;
+
+	case DEBUG_S_INLINEELINES:
+	  if (!parse_inlinee_lines (data + off, size, &bufptr, map, num_types))
+	    {
+	      free (data);
+	      free (symbuf);
+	      return false;
+	    }
+
+	  break;
  	}
         off += size;
diff --git a/ld/pdb.h b/ld/pdb.h
index a9b518a9f99..06a58bbb0ac 100644
--- a/ld/pdb.h
+++ b/ld/pdb.h
@@ -241,10 +241,13 @@  struct optional_dbg_header
  #define DEBUG_S_LINES			0xf2
  #define DEBUG_S_STRINGTABLE		0xf3
  #define DEBUG_S_FILECHKSMS		0xf4
+#define DEBUG_S_INLINEELINES		0xf6
   #define STRING_TABLE_SIGNATURE		0xeffeeffe
  #define STRING_TABLE_VERSION		1
  +#define CV_INLINEE_SOURCE_LINE_SIGNATURE	0
+
  /* VHdr in nmt.h */
  struct string_table_header
  {
diff --git a/ld/testsuite/ld-pe/pdb-inlineelines1-c13-info2.d b/ld/testsuite/ld-pe/pdb-inlineelines1-c13-info2.d
new file mode 100644
index 00000000000..ac4e49bbe46
--- /dev/null
+++ b/ld/testsuite/ld-pe/pdb-inlineelines1-c13-info2.d
@@ -0,0 +1,10 @@ 
+
+tmpdir/pdb-inlineelines1-c13-info2:     file format binary
+
+Contents of section .data:
+ 0000 f4000000 30000000 02000000 10016745  ....0.........gE
+ 0010 2301efcd ab8998ba dcfe1023 45670000  #..........#Eg..
+ 0020 08000000 100198ba dcfe1023 45676745  ...........#EggE
+ 0030 2301efcd ab890000 f6000000 1c000000  #...............
+ 0040 00000000 02100000 00000000 2a000000  ............*...
+ 0050 03100000 18000000 1c000000           ............    diff --git a/ld/testsuite/ld-pe/pdb-inlineelines1a.s b/ld/testsuite/ld-pe/pdb-inlineelines1a.s
new file mode 100644
index 00000000000..1ab2617d80e
--- /dev/null
+++ b/ld/testsuite/ld-pe/pdb-inlineelines1a.s
@@ -0,0 +1,20 @@ 
+.equ CV_SIGNATURE_C13, 4
+
+.equ LF_STRING_ID, 0x1605
+
+.equ CV_INLINEE_SOURCE_LINE_SIGNATURE, 0
+
+.section ".debug$T", "rn"
+
+.long CV_SIGNATURE_C13
+
+/* Type 1000, string "hello" */
+.string1:
+.short .types_end - .string1 - 2
+.short LF_STRING_ID
+.long 0 /* sub-string */
+.asciz "hello"
+.byte 0xf2 /* padding */
+.byte 0xf1 /* padding */
+
+.types_end:
diff --git a/ld/testsuite/ld-pe/pdb-inlineelines1b.s b/ld/testsuite/ld-pe/pdb-inlineelines1b.s
new file mode 100644
index 00000000000..d8d917068f3
--- /dev/null
+++ b/ld/testsuite/ld-pe/pdb-inlineelines1b.s
@@ -0,0 +1,160 @@ 
+.equ CV_SIGNATURE_C13, 4
+
+.equ DEBUG_S_STRINGTABLE, 0xf3
+.equ DEBUG_S_FILECHKSMS, 0xf4
+.equ DEBUG_S_INLINEELINES, 0xf6
+.equ CHKSUM_TYPE_MD5, 1
+
+.equ NUM_MD5_BYTES, 16
+
+.equ T_VOID, 0x0003
+.equ T_UINT4, 0x0075
+
+.equ LF_ARGLIST, 0x1201
+.equ LF_PROCEDURE, 0x1008
+.equ LF_FUNC_ID, 0x1601
+.equ LF_STRING_ID, 0x1605
+
+.equ CV_INLINEE_SOURCE_LINE_SIGNATURE, 0
+
+.section ".debug$T", "rn"
+
+.long CV_SIGNATURE_C13
+
+/* Type 1000, string "world" */
+.string1:
+.short .arglist1 - .string1 - 2
+.short LF_STRING_ID
+.long 0 /* sub-string */
+.asciz "world"
+.byte 0xf2 /* padding */
+.byte 0xf1 /* padding */
+
+/* Type 1001, arglist (uint32_t) */
+.arglist1:
+.short .proctype1 - .arglist1 - 2
+.short LF_ARGLIST
+.long 1 /* no. entries */
+.long T_UINT4
+
+/* Type 1002, procedure (return type T_VOID, arglist 1001) */
+.proctype1:
+.short .funcid1 - .proctype1 - 2
+.short LF_PROCEDURE
+.long T_VOID
+.byte 0 /* calling convention */
+.byte 0 /* attributes */
+.short 1 /* no. parameters */
+.long 0x1001
+
+/* Type 1003, func ID for proc1 */
+.funcid1:
+.short .funcid2 - .funcid1 - 2
+.short LF_FUNC_ID
+.long 0 /* parent scope */
+.long 0x1002 /* type */
+.asciz "proc1"
+.byte 0xf2 /* padding */
+.byte 0xf1 /* padding */
+
+/* Type 1004, func ID for proc2 */
+.funcid2:
+.short .types_end - .funcid2 - 2
+.short LF_FUNC_ID
+.long 0 /* parent scope */
+.long 0x1002 /* type */
+.asciz "proc2"
+.byte 0xf2 /* padding */
+.byte 0xf1 /* padding */
+
+.types_end:
+
+.section ".debug$S", "rn"
+.long CV_SIGNATURE_C13
+
+/*
+  *** STRINGTABLE
+
+  00000000
+  00000001 foo.c
+  00000007 bar.c
+*/
+
+.long DEBUG_S_STRINGTABLE
+.long .strings_end - .strings_start
+
+.strings_start:
+
+.asciz ""
+
+.src1:
+.asciz "foo.c"
+
+.src2:
+.asciz "bar.c"
+
+.strings_end:
+
+.balign 4
+
+/*
+  *** FILECHKSUMS
+
+  FileId  St.Offset  Cb  Type  ChksumBytes
+      0   00000001   10  MD5   67452301EFCDAB8998BADCFE10234567
+      18  00000007   10  MD5   98BADCFE1023456767452301EFCDAB89
+*/
+
+.long DEBUG_S_FILECHKSMS
+.long .chksms_end - .chksms_start
+
+.chksms_start:
+
+.file1:
+.long .src1 - .strings_start
+.byte NUM_MD5_BYTES
+.byte CHKSUM_TYPE_MD5
+.long 0x01234567
+.long 0x89abcdef
+.long 0xfedcba98
+.long 0x67452310
+.short 0 /* padding */
+
+.file2:
+.long .src2 - .strings_start
+.byte NUM_MD5_BYTES
+.byte CHKSUM_TYPE_MD5
+.long 0xfedcba98
+.long 0x67452310
+.long 0x01234567
+.long 0x89abcdef
+.short 0 /* padding */
+
+.chksms_end:
+
+.balign 4
+
+/*
+  *** INLINEE LINES
+
+  InlineeId  FileId  StaringLine
+       1003       0           42
+       1004      18           28
+*/
+
+.long DEBUG_S_INLINEELINES
+.long .lines_end - .lines_start
+
+.lines_start:
+
+.long CV_INLINEE_SOURCE_LINE_SIGNATURE
+
+.long 0x1003
+.long .file1 - .chksms_start
+.long 42
+
+.long 0x1004
+.long .file2 - .chksms_start
+.long 28
+
+.lines_end:
diff --git a/ld/testsuite/ld-pe/pdb.exp b/ld/testsuite/ld-pe/pdb.exp
index b530e0a639d..803ec8a2463 100644
--- a/ld/testsuite/ld-pe/pdb.exp
+++ b/ld/testsuite/ld-pe/pdb.exp
@@ -1744,6 +1744,99 @@  proc test9 { } {
      }
  }
  +proc test10 { } {
+    global as
+    global ar
+    global ld
+    global objdump
+    global srcdir
+    global subdir
+
+    if ![ld_assemble $as $srcdir/$subdir/pdb-inlineelines1a.s tmpdir/pdb-inlineelines1a.o] {
+	unsupported "Build pdb-inlineelines1a.o"
+	return
+    }
+
+    if ![ld_assemble $as $srcdir/$subdir/pdb-inlineelines1b.s tmpdir/pdb-inlineelines1b.o] {
+      unsupported "Build pdb-inlineelines1a.o"
+      return
+    }
+
+    if ![ld_link $ld "tmpdir/pdb-inlineelines1.exe" "--pdb=tmpdir/pdb-inlineelines1.pdb tmpdir/pdb-inlineelines1a.o tmpdir/pdb-inlineelines1b.o"] {
+	unsupported "Create PE image with PDB file"
+	return
+    }
+
+    # read relevant bits from DBI stream
+
+    set exec_output [run_host_cmd "$ar" "x --output tmpdir tmpdir/pdb-inlineelines1.pdb 0003"]
+
+    if ![string match "" $exec_output] {
+	fail "Could not extract DBI stream"
+	return
+    } else {
+	pass "Extracted DBI stream"
+    }
+
+    set fi [open tmpdir/0003]
+    fconfigure $fi -translation binary
+
+    seek $fi 24
+
+    # read substream sizes
+
+    set data [read $fi 4]
+    binary scan $data i mod_info_size
+
+    seek $fi 36 current
+
+    set mod_info [read $fi $mod_info_size]
+
+    close $fi
+
+    # check C13 info in second module
+
+    # We're interested here that the inlinee function IDs get rewritten:
+    # 1003 -> 1002, 1004 -> 1003.  The numbers are lower because linking splits
+    # the types into two separate streams, numbered individually.
+
+    # This is what cvdump.exe -inll pdb-inlineelines1.pdb should look like:
+
+    # *** INLINEE LINES
+    #
+    # InlineeId  FileId  StaringLine
+    #      1002       0           42
+    #      1003       1           28
+
+    # For some reason it numbers file IDs in bytes for object files but as an
+    # index for PDBs, but they're stored on disk the same way.
+
+    set fn1_end [string first \000 $mod_info 64]
+    set fn2_end [string first \000 $mod_info [expr $fn1_end + 1]]
+
+    set off [expr $fn2_end + 1]
+
+    if { [expr $off % 4] != 0 } {
+	set off [expr $off + 4 - ($off % 4)]
+    }
+
+    set c13_info [extract_c13_info "tmpdir/pdb-inlineelines1.pdb" [string range $mod_info $off [expr $off + 63]]]
+
+    set fi [open tmpdir/pdb-inlineelines1-c13-info2 w]
+    fconfigure $fi -translation binary
+    puts -nonewline $fi $c13_info
+    close $fi
+
+    set exp [file_contents "$srcdir/$subdir/pdb-inlineelines1-c13-info2.d"]
+    set got [run_host_cmd "$objdump" "-s --target=binary tmpdir/pdb-inlineelines1-c13-info2"]
+
+    if [string match $exp $got] {
+	pass "Correct C13 info for second module"
+    } else {
+	fail "Incorrect C13 info for second module"
+    }
+}
+
  test1
  test2
  test3
@@ -1753,3 +1846,4 @@  test6