[v0,06/15] readelf: add support to dump build attributes v2

Message ID 20250310175131.1217374-7-matthieu.longo@arm.com
State New
Headers
Series AArch64 AEABI build attributes (a.k.a. object attributes v2) |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm fail Patch failed to apply
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 fail Patch failed to apply

Commit Message

Matthieu Longo March 10, 2025, 5:51 p.m. UTC
  From: Richard Ball <richard.ball@arm.com>

Co-Authored-By: Matthieu Longo <matthieu.longo@arm.com>
---
 bfd/elf-attrs.h       |  18 +++
 binutils/readelf.c    | 261 ++++++++++++++++++++++++++++++++++++++++++
 include/elf/aarch64.h |  13 +++
 3 files changed, 292 insertions(+)
  

Comments

Jan Beulich March 11, 2025, 8:12 a.m. UTC | #1
On 10.03.2025 18:51, Matthieu Longo wrote:
> --- a/binutils/readelf.c
> +++ b/binutils/readelf.c
> @@ -58,6 +58,7 @@
>  #define BFD64
>  
>  #include "bfd.h"
> +#include "elf-attrs.h"
>  #include "bucomm.h"
>  #include "elfcomm.h"
>  #include "demanguse.h"
> @@ -19456,6 +19457,263 @@ free_data:
>    return res;
>  }
>  
> +static unsigned char *
> +display_attr_v2 (unsigned char * cursor,
> +		 const unsigned char * const end,
> +		 const char *subsec_name,
> +		 obj_attr_encoding_v2 value_encoding)
> +{
> +  static const known_tag_v2 known_tags_aeabi_feature_and_bits [] =
> +  {
> +    {Tag_Feature_BTI, "Feature_BTI", {0}},
> +    {Tag_Feature_PAC, "Feature_PAC", {0}},
> +    {Tag_Feature_GCS, "Feature_GCS", {0}},
> +  };
> +  static const known_tag_v2 known_tags_aeabi_pauthabi [] =
> +  {
> +    {Tag_PAuth_Platform, "PAuth_Platform", {0}},
> +    {Tag_PAuth_Schema, "PAuth_Schema", {0}},
> +  };
> +  static const known_subsection_v2 known_subsections[] =
> +  {
> +    {
> +      .subsec_name = "aeabi_feature_and_bits",
> +      .known_tags = known_tags_aeabi_feature_and_bits,
> +      .optional = true,
> +      .encoding = ULEB128,
> +      .len = sizeof (known_tags_aeabi_feature_and_bits),
> +    },
> +    {
> +      .subsec_name = "aeabi_pauthabi",
> +      .known_tags = known_tags_aeabi_pauthabi,
> +      .optional = false,
> +      .encoding = ULEB128,
> +      .len = sizeof (known_tags_aeabi_pauthabi),
> +    },
> +  };
> +
> +  const known_subsection_v2 *
> +  identify_subsection_ (const char* name)
> +  {
> +    for (unsigned i = 0; i < ARRAY_SIZE (known_subsections); ++i)
> +      if (strcmp (name, known_subsections[i].subsec_name) == 0)
> +	return &known_subsections[i];
> +    return NULL;
> +  }
> +
> +  const known_tag_v2 *
> +  identify_tag_ (const known_subsection_v2* subsec,
> +		 uint32_t tag)
> +  {
> +    for (unsigned i = 0; i < subsec->len; ++i)
> +      {
> +	const known_tag_v2 *known_tag = &subsec->known_tags[i];
> +	if (known_tag->tag == tag)
> +	  return known_tag;
> +      }
> +    return NULL;
> +  }
> +
> +  uint32_t tag;
> +  READ_ULEB (tag, cursor, end);
> +
> +  const known_tag_v2 *tag_info = NULL;
> +  const known_subsection_v2 *subsec_info = identify_subsection_ (subsec_name);
> +  if (subsec_info != NULL)
> +    tag_info = identify_tag_ (subsec_info, tag);
> +
> +  if (tag_info != NULL)
> +    printf ("    Tag_%s:	", tag_info->name);
> +  else
> +    printf ("    Tag_unknown_%u:	", tag);
> +
> +  switch (value_encoding)
> +    {
> +    case NTBS:
> +      cursor = display_tag_value (-1, cursor, end);
> +      break;
> +    case ULEB128:
> +      cursor = display_tag_value (0, cursor, end);
> +      break;
> +    }
> +
> +  return cursor;
> +}
> +
> +typedef struct {
> +  bool err;
> +  uint64_t read;
> +} BufferReadOp_t ;
> +
> +static BufferReadOp_t
> +elf_parse_attrs_subsection_v2 (unsigned char* cursor,
> +			       const uint64_t max_read,
> +			       const char* public_name)
> +{
> +  BufferReadOp_t op = { .err = false, .read = 0 };
> +
> +  const uint32_t F_SUBSECTION_LEN = sizeof(uint32_t);
> +  /* The minimum subsection length is 5: 4 bytes for the length itself, and 1
> +     byte for an empty NUL-terminated string, and no vendor-data.  */
> +  const uint32_t F_MIN_SUBSECTION_DATA_LEN = F_SUBSECTION_LEN + 1;
> +
> +  if (max_read <= F_SUBSECTION_LEN)
> +    {
> +      error (_("Build attributes section ends prematurely\n"));
> +      return op;
> +    }
> +  uint32_t subsection_len = byte_get (cursor, F_SUBSECTION_LEN);
> +  op.read += F_SUBSECTION_LEN;
> +  cursor += F_SUBSECTION_LEN;
> +  if (subsection_len > max_read)
> +    {
> +      error (_("Bad subsection length (%u > max=%lu)\n"),
> +	     subsection_len, max_read);
> +      // error, but still try to display the content until meeting a more serious error.
> +      subsection_len = max_read;
> +      op.err = true;
> +    }
> +  /* PR 17531: file: 001-101425-0.004  */

A comment like this, referencing a really old PR, suggests code is being copied
when - if at all possible - code would better be re-used (with refactoring as
necessary).

> +  else if (subsection_len < F_MIN_SUBSECTION_DATA_LEN)
> +    {
> +      error (_("Subsection length of %u is too small\n"), subsection_len);
> +      op.err = true;
> +      return op;
> +    }
> +
> +  size_t subsection_name_len = strnlen ((char *) cursor, subsection_len) + 1;
> +  if (subsection_name_len >= subsection_len)
> +    {
> +      error (_("Subsection name seems corrupted (missing '\\0')\n"));
> +      op.err = true;
> +      return op;
> +    }
> +  //if (subsection_name_len == 0)
> +  //  // do something here when the string is '\0'

What's this? Was this meant to be removed before submitting?

Jan
  
Matthieu Longo March 20, 2025, 2:45 p.m. UTC | #2
On 2025-03-11 08:12, Jan Beulich wrote:
> On 10.03.2025 18:51, Matthieu Longo wrote:
>> --- a/binutils/readelf.c
>> +++ b/binutils/readelf.c
>> @@ -58,6 +58,7 @@
>>   #define BFD64
>>   
>>   #include "bfd.h"
>> +#include "elf-attrs.h"
>>   #include "bucomm.h"
>>   #include "elfcomm.h"
>>   #include "demanguse.h"
>> @@ -19456,6 +19457,263 @@ free_data:
>>     return res;
>>   }
>>   
>> +static unsigned char *
>> +display_attr_v2 (unsigned char * cursor,
>> +		 const unsigned char * const end,
>> +		 const char *subsec_name,
>> +		 obj_attr_encoding_v2 value_encoding)
>> +{
>> +  static const known_tag_v2 known_tags_aeabi_feature_and_bits [] =
>> +  {
>> +    {Tag_Feature_BTI, "Feature_BTI", {0}},
>> +    {Tag_Feature_PAC, "Feature_PAC", {0}},
>> +    {Tag_Feature_GCS, "Feature_GCS", {0}},
>> +  };
>> +  static const known_tag_v2 known_tags_aeabi_pauthabi [] =
>> +  {
>> +    {Tag_PAuth_Platform, "PAuth_Platform", {0}},
>> +    {Tag_PAuth_Schema, "PAuth_Schema", {0}},
>> +  };
>> +  static const known_subsection_v2 known_subsections[] =
>> +  {
>> +    {
>> +      .subsec_name = "aeabi_feature_and_bits",
>> +      .known_tags = known_tags_aeabi_feature_and_bits,
>> +      .optional = true,
>> +      .encoding = ULEB128,
>> +      .len = sizeof (known_tags_aeabi_feature_and_bits),
>> +    },
>> +    {
>> +      .subsec_name = "aeabi_pauthabi",
>> +      .known_tags = known_tags_aeabi_pauthabi,
>> +      .optional = false,
>> +      .encoding = ULEB128,
>> +      .len = sizeof (known_tags_aeabi_pauthabi),
>> +    },
>> +  };
>> +
>> +  const known_subsection_v2 *
>> +  identify_subsection_ (const char* name)
>> +  {
>> +    for (unsigned i = 0; i < ARRAY_SIZE (known_subsections); ++i)
>> +      if (strcmp (name, known_subsections[i].subsec_name) == 0)
>> +	return &known_subsections[i];
>> +    return NULL;
>> +  }
>> +
>> +  const known_tag_v2 *
>> +  identify_tag_ (const known_subsection_v2* subsec,
>> +		 uint32_t tag)
>> +  {
>> +    for (unsigned i = 0; i < subsec->len; ++i)
>> +      {
>> +	const known_tag_v2 *known_tag = &subsec->known_tags[i];
>> +	if (known_tag->tag == tag)
>> +	  return known_tag;
>> +      }
>> +    return NULL;
>> +  }
>> +
>> +  uint32_t tag;
>> +  READ_ULEB (tag, cursor, end);
>> +
>> +  const known_tag_v2 *tag_info = NULL;
>> +  const known_subsection_v2 *subsec_info = identify_subsection_ (subsec_name);
>> +  if (subsec_info != NULL)
>> +    tag_info = identify_tag_ (subsec_info, tag);
>> +
>> +  if (tag_info != NULL)
>> +    printf ("    Tag_%s:	", tag_info->name);
>> +  else
>> +    printf ("    Tag_unknown_%u:	", tag);
>> +
>> +  switch (value_encoding)
>> +    {
>> +    case NTBS:
>> +      cursor = display_tag_value (-1, cursor, end);
>> +      break;
>> +    case ULEB128:
>> +      cursor = display_tag_value (0, cursor, end);
>> +      break;
>> +    }
>> +
>> +  return cursor;
>> +}
>> +
>> +typedef struct {
>> +  bool err;
>> +  uint64_t read;
>> +} BufferReadOp_t ;
>> +
>> +static BufferReadOp_t
>> +elf_parse_attrs_subsection_v2 (unsigned char* cursor,
>> +			       const uint64_t max_read,
>> +			       const char* public_name)
>> +{
>> +  BufferReadOp_t op = { .err = false, .read = 0 };
>> +
>> +  const uint32_t F_SUBSECTION_LEN = sizeof(uint32_t);
>> +  /* The minimum subsection length is 5: 4 bytes for the length itself, and 1
>> +     byte for an empty NUL-terminated string, and no vendor-data.  */
>> +  const uint32_t F_MIN_SUBSECTION_DATA_LEN = F_SUBSECTION_LEN + 1;
>> +
>> +  if (max_read <= F_SUBSECTION_LEN)
>> +    {
>> +      error (_("Build attributes section ends prematurely\n"));
>> +      return op;
>> +    }
>> +  uint32_t subsection_len = byte_get (cursor, F_SUBSECTION_LEN);
>> +  op.read += F_SUBSECTION_LEN;
>> +  cursor += F_SUBSECTION_LEN;
>> +  if (subsection_len > max_read)
>> +    {
>> +      error (_("Bad subsection length (%u > max=%lu)\n"),
>> +	     subsection_len, max_read);
>> +      // error, but still try to display the content until meeting a more serious error.
>> +      subsection_len = max_read;
>> +      op.err = true;
>> +    }
>> +  /* PR 17531: file: 001-101425-0.004  */
> 
> A comment like this, referencing a really old PR, suggests code is being copied
> when - if at all possible - code would better be re-used (with refactoring as
> necessary).
> 

Richard Ball, the initial author of this patch, tried to refactor the 
code of Object Attributes v1 but it was making things more difficult to 
understand, and introduced a risk of regressions.

This code was not copied but the reference to PR17531 was added because 
it would be basically the same issue, i.e. the length of the subsection 
that is specified is greater than the available size of remaining data 
in the section.

>> +  else if (subsection_len < F_MIN_SUBSECTION_DATA_LEN)
>> +    {
>> +      error (_("Subsection length of %u is too small\n"), subsection_len);
>> +      op.err = true;
>> +      return op;
>> +    }
>> +
>> +  size_t subsection_name_len = strnlen ((char *) cursor, subsection_len) + 1;
>> +  if (subsection_name_len >= subsection_len)
>> +    {
>> +      error (_("Subsection name seems corrupted (missing '\\0')\n"));
>> +      op.err = true;
>> +      return op;
>> +    }
>> +  //if (subsection_name_len == 0)
>> +  //  // do something here when the string is '\0'
> 
> What's this? Was this meant to be removed before submitting?
> 

Yes, I asked for clarification with the spec. Such a name is valid but 
useless. I replaced this commented code with a detailed comment.

> Jan
  

Patch

diff --git a/bfd/elf-attrs.h b/bfd/elf-attrs.h
index 5d41bb98538..a4af0da1f68 100644
--- a/bfd/elf-attrs.h
+++ b/bfd/elf-attrs.h
@@ -100,3 +100,21 @@  typedef struct obj_attr_subsection_list
   /* The size of the list.  */
   uint64_t size;
 } obj_attr_subsection_list;
+
+typedef struct
+{
+  const uint32_t tag;
+  const char * name;
+  union obj_attr_value_v2 default_value;
+} known_tag_v2;
+
+#define OBJ_ATTR_TAG_v2(s) "Tag_" # s
+typedef struct
+{
+  const char *subsec_name;
+  const known_tag_v2 *known_tags;
+  const bool optional;
+  const obj_attr_encoding_v2 encoding;
+  const size_t len;
+} known_subsection_v2;
+
diff --git a/binutils/readelf.c b/binutils/readelf.c
index dd1871d8c75..b69180e4e33 100644
--- a/binutils/readelf.c
+++ b/binutils/readelf.c
@@ -58,6 +58,7 @@ 
 #define BFD64
 
 #include "bfd.h"
+#include "elf-attrs.h"
 #include "bucomm.h"
 #include "elfcomm.h"
 #include "demanguse.h"
@@ -19456,6 +19457,263 @@  free_data:
   return res;
 }
 
+static unsigned char *
+display_attr_v2 (unsigned char * cursor,
+		 const unsigned char * const end,
+		 const char *subsec_name,
+		 obj_attr_encoding_v2 value_encoding)
+{
+  static const known_tag_v2 known_tags_aeabi_feature_and_bits [] =
+  {
+    {Tag_Feature_BTI, "Feature_BTI", {0}},
+    {Tag_Feature_PAC, "Feature_PAC", {0}},
+    {Tag_Feature_GCS, "Feature_GCS", {0}},
+  };
+  static const known_tag_v2 known_tags_aeabi_pauthabi [] =
+  {
+    {Tag_PAuth_Platform, "PAuth_Platform", {0}},
+    {Tag_PAuth_Schema, "PAuth_Schema", {0}},
+  };
+  static const known_subsection_v2 known_subsections[] =
+  {
+    {
+      .subsec_name = "aeabi_feature_and_bits",
+      .known_tags = known_tags_aeabi_feature_and_bits,
+      .optional = true,
+      .encoding = ULEB128,
+      .len = sizeof (known_tags_aeabi_feature_and_bits),
+    },
+    {
+      .subsec_name = "aeabi_pauthabi",
+      .known_tags = known_tags_aeabi_pauthabi,
+      .optional = false,
+      .encoding = ULEB128,
+      .len = sizeof (known_tags_aeabi_pauthabi),
+    },
+  };
+
+  const known_subsection_v2 *
+  identify_subsection_ (const char* name)
+  {
+    for (unsigned i = 0; i < ARRAY_SIZE (known_subsections); ++i)
+      if (strcmp (name, known_subsections[i].subsec_name) == 0)
+	return &known_subsections[i];
+    return NULL;
+  }
+
+  const known_tag_v2 *
+  identify_tag_ (const known_subsection_v2* subsec,
+		 uint32_t tag)
+  {
+    for (unsigned i = 0; i < subsec->len; ++i)
+      {
+	const known_tag_v2 *known_tag = &subsec->known_tags[i];
+	if (known_tag->tag == tag)
+	  return known_tag;
+      }
+    return NULL;
+  }
+
+  uint32_t tag;
+  READ_ULEB (tag, cursor, end);
+
+  const known_tag_v2 *tag_info = NULL;
+  const known_subsection_v2 *subsec_info = identify_subsection_ (subsec_name);
+  if (subsec_info != NULL)
+    tag_info = identify_tag_ (subsec_info, tag);
+
+  if (tag_info != NULL)
+    printf ("    Tag_%s:	", tag_info->name);
+  else
+    printf ("    Tag_unknown_%u:	", tag);
+
+  switch (value_encoding)
+    {
+    case NTBS:
+      cursor = display_tag_value (-1, cursor, end);
+      break;
+    case ULEB128:
+      cursor = display_tag_value (0, cursor, end);
+      break;
+    }
+
+  return cursor;
+}
+
+typedef struct {
+  bool err;
+  uint64_t read;
+} BufferReadOp_t ;
+
+static BufferReadOp_t
+elf_parse_attrs_subsection_v2 (unsigned char* cursor,
+			       const uint64_t max_read,
+			       const char* public_name)
+{
+  BufferReadOp_t op = { .err = false, .read = 0 };
+
+  const uint32_t F_SUBSECTION_LEN = sizeof(uint32_t);
+  /* The minimum subsection length is 5: 4 bytes for the length itself, and 1
+     byte for an empty NUL-terminated string, and no vendor-data.  */
+  const uint32_t F_MIN_SUBSECTION_DATA_LEN = F_SUBSECTION_LEN + 1;
+
+  if (max_read <= F_SUBSECTION_LEN)
+    {
+      error (_("Build attributes section ends prematurely\n"));
+      return op;
+    }
+  uint32_t subsection_len = byte_get (cursor, F_SUBSECTION_LEN);
+  op.read += F_SUBSECTION_LEN;
+  cursor += F_SUBSECTION_LEN;
+  if (subsection_len > max_read)
+    {
+      error (_("Bad subsection length (%u > max=%lu)\n"),
+	     subsection_len, max_read);
+      // error, but still try to display the content until meeting a more serious error.
+      subsection_len = max_read;
+      op.err = true;
+    }
+  /* PR 17531: file: 001-101425-0.004  */
+  else if (subsection_len < F_MIN_SUBSECTION_DATA_LEN)
+    {
+      error (_("Subsection length of %u is too small\n"), subsection_len);
+      op.err = true;
+      return op;
+    }
+
+  size_t subsection_name_len = strnlen ((char *) cursor, subsection_len) + 1;
+  if (subsection_name_len >= subsection_len)
+    {
+      error (_("Subsection name seems corrupted (missing '\\0')\n"));
+      op.err = true;
+      return op;
+    }
+  //if (subsection_name_len == 0)
+  //  // do something here when the string is '\0'
+
+  unsigned char * const end = cursor + subsection_len - F_SUBSECTION_LEN;
+  while (cursor < end)
+    {
+      const char* subsec_name = (const char*) cursor;
+      printf (_(" - Name:	%s\n"), subsec_name);
+      bool public_subsection =
+	strncmp (subsec_name, public_name, strlen (public_name)) == 0;
+      cursor += subsection_name_len;
+      op.read += subsection_name_len;
+
+      printf ("   Scope:	%s\n", public_subsection ? "public" : "private");
+      printf ("   Length:	%u\n", subsection_len);
+
+      uint8_t optional;
+      READ_ULEB (optional, cursor, end);
+      op.read += 1;
+
+      if (optional > 1)
+	{
+	  error (_("Optional value seems corrupted, got %u but only"
+		   " 0x0 (false) or 0x1 (true) are valid values."), optional);
+	  op.err = true;
+	  op.read = subsection_len;
+	  return op;
+	}
+
+      printf ("   Optional:	%s\n", optional ? "True" : "False");
+
+      uint8_t value_encoding_raw;
+      READ_ULEB (value_encoding_raw, cursor, end);
+      op.read += 1;
+
+      if (value_encoding_raw > NTBS)
+	{
+	  error (_("Attribute type seems corrupted, got %u but only 0x0 (ULEB128)"
+		   " or 0x1 (NTBS) are valid types."), value_encoding_raw);
+	  op.err = true;
+	  op.read = subsection_len;
+	  return op;
+	}
+
+      enum obj_attr_encoding_v2 value_encoding = value_encoding_raw;
+      switch (value_encoding)
+	{
+	case ULEB128:
+	  printf ("   Encoding:	ULEB128\n");
+	  break;
+	case NTBS:
+	  printf ("   Encoding:	asciz\n");
+	  break;
+	}
+
+      printf ("   Values:\n");
+      while (cursor < end)
+	{
+	  unsigned char* cursor_new =
+	    display_attr_v2 (cursor, end, subsec_name, value_encoding);
+	  op.read += (cursor_new - cursor);
+	  cursor = cursor_new;
+	}
+      putchar ('\n');
+    }
+
+  if (cursor != end)
+    abort();
+
+  return op;
+}
+
+static bool
+process_attributes_v2 (Filedata *filedata,
+		       const char *public_name,
+		       uint32_t section_type)
+{
+  /* Find the section header so that we get the size.  */
+  Elf_Internal_Shdr *sec_hdr = find_section_by_type (filedata, section_type);
+  if (sec_hdr == NULL)
+    /* No section, exit without error.  */
+    return true;
+
+  unsigned char * const data = (unsigned char*)
+    get_data (NULL, filedata, sec_hdr->sh_offset, 1, sec_hdr->sh_size,
+	      _("build attributes"));
+  if (data == NULL)
+    return false;
+
+  unsigned char *cursor = data;
+  bool res = true;
+
+  /* The first character is the version of the attributes.
+     Currently only version 1, (aka 'A') is recognised here.  */
+  if (*cursor != 'A')
+    {
+      error (_("Unknown attributes version '%c'(%d) - expecting 'A'\n"),
+	     *cursor, *cursor);
+      res = false;
+      goto free_data;
+    }
+
+  ++cursor;
+
+  printf (("Subsections:\n"));
+  BufferReadOp_t op;
+  for (uint64_t remaining = sec_hdr->sh_size - 1; // already read 'A'
+       remaining > 1;
+       remaining -= op.read, cursor += op.read)
+    {
+      op = elf_parse_attrs_subsection_v2 (cursor, remaining, public_name);
+      if (op.err)
+	{
+	  error (_("Cannot parse subsection at offset %lx"),
+	    sec_hdr->sh_size - remaining);
+	  res = false;
+	  goto free_data;
+	}
+    }
+
+free_data:
+  free ((void*) data);
+
+  return res;
+}
+
 /* DATA points to the contents of a MIPS GOT that starts at VMA PLTGOT.
    Print the Address, Access and Initial fields of an entry at VMA ADDR
    and return the VMA of the next entry, or -1 if there was a problem.
@@ -23444,6 +23702,9 @@  process_arch_specific (Filedata * filedata)
 				 display_arm_attribute,
 				 display_generic_attribute);
 
+    case EM_AARCH64:
+      return process_attributes_v2 (filedata, "aeabi", SHT_AARCH64_ATTRIBUTES);
+
     case EM_MIPS:
     case EM_MIPS_RS3_LE:
       return process_mips_specific (filedata);
diff --git a/include/elf/aarch64.h b/include/elf/aarch64.h
index e218e07fa73..c076174a7ac 100644
--- a/include/elf/aarch64.h
+++ b/include/elf/aarch64.h
@@ -57,6 +57,19 @@ 
 #define STO_AARCH64_VARIANT_PCS	0x80  /* Symbol may follow different call
 					 convention from the base PCS.  */
 
+/* Tags used in aeabi_feature_and_bits subsection.  */
+typedef enum Tag_Feature_XXX {
+  Tag_Feature_BTI = 0,
+  Tag_Feature_PAC = 1,
+  Tag_Feature_GCS = 2,
+} Tag_Feature_XXX;
+
+/* Tags used in aeabi_pauthabi subsection.  */
+typedef enum Tag_PAuth_XXX {
+  Tag_PAuth_Platform = 1,
+  Tag_PAuth_Schema = 2,
+} Tag_PAuth_XXX;
+
 /* Relocation types.  */
 
 START_RELOC_NUMBERS (elf_aarch64_reloc_type)