[8/8] gas: permit wider-than-byte operands for .cfi_escape

Message ID 1f48c7d9-24da-4312-bbde-475411b61eb0@suse.com
State New
Headers
Series gas: CFI directive and listing adjustments |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 success Test passed
linaro-tcwg-bot/tcwg_binutils_check--master-arm fail Patch failed to apply

Commit Message

Jan Beulich Feb. 26, 2025, 3:18 p.m. UTC
  Some DW_CFA_* and DW_OP_* take wider than byte, but non-LEB128 operands.
Having to hand-encode such when needing to resort to .cfi_escape isn't
very helpful.
---
I was wondering whether there's a need to distinguish signed and
unsigned forms. .dc.<W> as well as .long et al don't normally do, with
x86'es .slong being an exception (there it matters for picking the
correct relocation type). The support for that special directive is a
little bit of a hack, hence it doesn't easily lend itself to extending
to .cfi_escape. Perhaps something to be sorted later, when an actual
need arises.

I was further wondering whether endian-ness also needs to be encodable
in the directive operands; I sincerely hope native endian-ness will
suffice.

Should we also permit data1()?
  

Comments

Indu Bhagat March 6, 2025, 4:47 p.m. UTC | #1
On 2/26/25 7:18 AM, Jan Beulich wrote:
> Some DW_CFA_* and DW_OP_* take wider than byte, but non-LEB128 operands.
> Having to hand-encode such when needing to resort to .cfi_escape isn't
> very helpful.
> ---
> I was wondering whether there's a need to distinguish signed and
> unsigned forms. .dc.<W> as well as .long et al don't normally do, with
> x86'es .slong being an exception (there it matters for picking the
> correct relocation type). The support for that special directive is a
> little bit of a hack, hence it doesn't easily lend itself to extending
> to .cfi_escape. Perhaps something to be sorted later, when an actual
> need arises.
> 
> I was further wondering whether endian-ness also needs to be encodable
> in the directive operands; I sincerely hope native endian-ness will
> suffice.
> 

FWIW, as far as I can reason, non-native endian-ness should not be 
necessary. (I mean why would one want to emit unwind information in 
non-native endianness of the target..)
  

Patch

--- a/gas/doc/as.texi
+++ b/gas/doc/as.texi
@@ -5181,6 +5181,18 @@  also use extended kind-of-expression for
 
 @itemize @bullet
 
+@item @code{data2(@var{expression})}
+to emit a 2-byte item,
+
+@item @code{data4(@var{expression})}
+to emit a 4-byte item (provided address size is at least 32 bits),
+
+@item @code{data8(@var{expression})}
+to emit an 8-byte item (provided address size is at least 64 bits),
+
+@item @code{addr(@var{expression})}
+to emit an address-sized item,
+
 @item @code{sleb128(@var{expression})}
 to emit a SLEB128 item,
 
--- a/gas/dw2gencfi.c
+++ b/gas/dw2gencfi.c
@@ -373,7 +373,14 @@  struct cfi_escape_data
   struct cfi_escape_data *next;
   expressionS exp;
   enum {
-    ESC_byte,
+    /* "Plain" data is indicated just by their size, such that values can be
+       easily passed to other functions.  The ESC_data<N> enumerators exist
+       here only as placeholders.  */
+    ESC_byte = 1,
+    ESC_data2 = 2,
+    ESC_data4 = 4,
+    ESC_data8 = 8,
+    /* LEB128 data needs dedicated enumerators.  */
     ESC_sleb128,
     ESC_uleb128,
   } type;
@@ -950,6 +957,21 @@  dot_cfi (int arg)
   demand_empty_rest_of_line ();
 }
 
+#ifndef TC_ADDRESS_BYTES
+#define TC_ADDRESS_BYTES address_bytes
+
+static inline unsigned int
+address_bytes (void)
+{
+  /* Choose smallest of 1, 2, 4, 8 bytes that is large enough to
+     contain an address.  */
+  unsigned int n = (stdoutput->arch_info->bits_per_address - 1) / 8;
+  n |= n >> 1;
+  n |= n >> 2;
+  return n + 1;
+}
+#endif
+
 static void
 dot_cfi_escape (int ignored ATTRIBUTE_UNUSED)
 {
@@ -980,6 +1002,14 @@  dot_cfi_escape (int ignored ATTRIBUTE_UN
 	e->type = ESC_sleb128;
       else if (strcmp (id, "uleb128") == 0)
 	e->type = ESC_uleb128;
+      else if (strcmp (id, "data2") == 0)
+	e->type = 2;
+      else if (TC_ADDRESS_BYTES () >= 4 && strcmp (id, "data4") == 0)
+	e->type = 4;
+      else if (TC_ADDRESS_BYTES () >= 8 && strcmp (id, "data8") == 0)
+	e->type = 8;
+      else if (strcmp (id, "addr") == 0)
+	e->type = TC_ADDRESS_BYTES ();
       else
 	e->type = ESC_byte;
 
@@ -1003,7 +1033,11 @@  dot_cfi_escape (int ignored ATTRIBUTE_UN
 	  expression (&e->exp);
 	}
       else
-	e->reloc = do_parse_cons_expression (&e->exp, 1);
+	{
+	  /* We may still be at the opening parenthesis.  Leave it to expression()
+	     to parse it and find the matching closing one.  */
+	  e->reloc = do_parse_cons_expression (&e->exp, e->type);
+	}
 
       *tail = e;
       tail = &e->next;
@@ -1825,7 +1859,7 @@  output_cfi_insn (struct cfi_insn_data *i
 	    if (e->type == ESC_sleb128 || e->type == ESC_uleb128)
 	      emit_leb128_expr (&e->exp, e->type == ESC_sleb128);
 	    else
-	      emit_expr_with_reloc (&e->exp, 1, e->reloc);
+	      emit_expr_with_reloc (&e->exp, e->type, e->reloc);
 	  }
 	break;
       }
--- a/gas/testsuite/gas/cfi/listing.l
+++ b/gas/testsuite/gas/cfi/listing.l
@@ -9,8 +9,8 @@ 
 [ 	]*[0-9]*[ 	]+[0-9a-f]{4} 4.02 ?0002[ 	]+\.cfi_escape 0x02, 0x00, 0x02, 0x00
 [ 	]*[0-9]*[ 	]+00
 [ 	]*[0-9]*[ 	]+[0-9a-f]{4} .*[ 	]\.nop
-[ 	]*[0-9]*[ 	]+[0-9a-f]{4} 4.03 ?0000[ 	]+\.cfi_escape 0x03; .cfi_escape 0x00, 0x00
-[ 	]*[0-9]*[ 	]+[0-9a-f]{4} 0400 ?0000[ 	]+\.cfi_escape 0x04; .cfi_escape 0x00, 0x00, 0x00, 0x00
+[ 	]*[0-9]*[ 	]+[0-9a-f]{4} 4.03 ?0000[ 	]+\.cfi_escape 0x03; .cfi_escape data2\(0\)
+[ 	]*[0-9]*[ 	]+[0-9a-f]{4} 0400 ?0000[ 	]+\.cfi_escape 0x04; .cfi_escape data4\(0\)
 [ 	]*[0-9]*[ 	]+00
 [ 	]*[0-9]*[ 	]+[0-9a-f]{4} .*[ 	]\.nop
 [ 	]*[0-9]*[ 	]+[0-9a-f]{4} 4.0B[ 	]+\.cfi_escape 0x0b
--- a/gas/testsuite/gas/cfi/listing.s
+++ b/gas/testsuite/gas/cfi/listing.s
@@ -7,8 +7,8 @@  func:
 	.nop
 	.cfi_escape 0x02, 0x00, 0x02, 0x00
 	.nop
-	.cfi_escape 0x03; .cfi_escape 0x00, 0x00
-	.cfi_escape 0x04; .cfi_escape 0x00, 0x00, 0x00, 0x00
+	.cfi_escape 0x03; .cfi_escape data2(0)
+	.cfi_escape 0x04; .cfi_escape data4(0)
 	.nop
 	.cfi_escape 0x0b
 	.nop