[3/3] x86/disasm: rework comment handling

Message ID 916dde10-59ed-4180-86e6-a42677b90c20@suse.com
State New
Headers
Series x86/disasm: adjustments to insn annotations |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 success Test passed
linaro-tcwg-bot/tcwg_binutils_check--master-arm success Test passed

Commit Message

Jan Beulich May 15, 2026, 1:08 p.m. UTC
  Model this after operand handling, such that comments can be emitted in
the same order as operands. %rip-relative address comments remain
separate for now. While there correct style for the symbols associated
with immediates: These aren't "comment starts", but symbol names.
---
As long as we want to continue to use the ->print_address_func() hook,
properly unifying %rip-relative comments with others won't be possible.
That, however, is in line with direct addresses also getting printed
specially, similarly by using ->print_address_func().

Was it really intended for the original -Mannotate test to be run only for
Linux targets? It certainly isn't intended here, yet I'd like the new
tests to be in the same directory as the original one. x86-64.exp,
however, bails right away for non-Linux (and remote hosts). I guess at
least the *.d based testing loop should be moved up. Tests truly only
working for Linux can easily be constrained in the *.d files themselves.
  

Comments

Mark Wielaard May 23, 2026, 9:51 a.m. UTC | #1
Hi Jan,

On Fri, May 15, 2026 at 03:08:19PM +0200, Jan Beulich wrote:
> Model this after operand handling, such that comments can be emitted in
> the same order as operands. %rip-relative address comments remain
> separate for now. While there correct style for the symbols associated
> with immediates: These aren't "comment starts", but symbol names.

On some GCC versions (specifically power9 ubuntu 13.3.0, riscv ubuntu
14.2.0, armhf armbian 12.2.0) this fails with:

../../binutils-gdb/opcodes/i386-dis.c:10387:9: error: format not a string literal and no format arguments [-Werror=format-security]
10387 |         i386_dis_printf (info, dis_style_comment_start, sep);
      |         ^~~~~~~~~~~~~~~
cc1: all warnings being treated as errors

Because it cannot see sep as being a string literal. Could probably be
fixed (not tested) using:

 i386_dis_printf (info, dis_style_comment_start, "%s", sep);

Cheers,

Mark

[*] Failing buildbots:
https://builder.sourceware.org/buildbot/#/builders/167/builds/12109
https://builder.sourceware.org/buildbot/#/builders/294/builds/4615
https://builder.sourceware.org/buildbot/#/builders/80/builds/5007
  

Patch

--- /dev/null
+++ b/binutils/testsuite/binutils-all/x86-64/comments.d
@@ -0,0 +1,14 @@ 
+#name: disassembly comments (AT&T)
+#ld:
+#objdump: -dwMannotate,att
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+[0-9a-f]+ <_start>:
+[ 	]*[0-9a-f]+:	48 c7 05 [0-9a-f ]+	movq   \$0x[0-9a-f]+,0x[0-9a-f]+\(%rip\) +# \[_start\], [0-9a-f]+ <fptr>
+[ 	]*[0-9a-f]+:	31 c0                	xor    %eax,%eax
+[ 	]*[0-9a-f]+:	c3                   	ret
+#pass
--- /dev/null
+++ b/binutils/testsuite/binutils-all/x86-64/comments.s
@@ -0,0 +1,9 @@ 
+	.text
+	.global _start
+_start:
+	movq	$_start, fptr(%rip)
+	xor	%eax, %eax
+	ret
+
+	.data
+fptr:	.quad	-1
--- /dev/null
+++ b/binutils/testsuite/binutils-all/x86-64/comments-intel.d
@@ -0,0 +1,14 @@ 
+#name: disassembly comments (Intel)
+#source: comments.s
+#ld:
+#objdump: -dwMannotate,intel
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+[0-9a-f]+ <_start>:
+[ 	]*[0-9a-f]+:	48 c7 05 [0-9a-f ]+	mov    QWORD PTR \[rip\+0x[0-9a-f]+\],0x[0-9a-f]+ +# [0-9a-f]+ <fptr>, \[_start\]
+[ 	]*[0-9a-f]+:	31 c0                	xor    eax,eax
+[ 	]*[0-9a-f]+:	c3                   	ret
--- a/opcodes/i386-dis.c
+++ b/opcodes/i386-dis.c
@@ -179,8 +179,7 @@  struct instr_info
 
   char obuf[MAX_OPERAND_BUFFER_SIZE];
   char *obufp;
-  char cbuf[COMMENT_BUFFER_SIZE];
-  char * cbufp;
+  char *cbufp;
   char *mnemonicendp;
   const uint8_t *start_codep;
   uint8_t *codep;
@@ -258,6 +257,7 @@  struct instr_info
   signed char op_index[MAX_OPERANDS];
   bool op_riprel[MAX_OPERANDS];
   char *op_out[MAX_OPERANDS];
+  char *cm_out[MAX_OPERANDS];
   bfd_vma op_address[MAX_OPERANDS];
   bfd_vma start_pc;
 
@@ -9749,7 +9749,6 @@  print_insn (bfd_vma pc, disassemble_info
     .start_codep = priv.the_buffer,
     .codep = priv.the_buffer,
     .obufp = ins.obuf,
-    .cbufp = ins.cbuf,
     .last_lock_prefix = -1,
     .last_repz_prefix = -1,
     .last_repnz_prefix = -1,
@@ -9761,6 +9760,7 @@  print_insn (bfd_vma pc, disassemble_info
     .fwait_prefix = -1,
   };
   char op_out[MAX_OPERANDS][MAX_OPERAND_BUFFER_SIZE];
+  char cm_out[MAX_OPERANDS][COMMENT_BUFFER_SIZE];
 
   priv.orig_sizeflag = AFLAG | DFLAG;
   if ((info->mach & bfd_mach_i386_i386) != 0)
@@ -9874,6 +9874,8 @@  print_insn (bfd_vma pc, disassemble_info
     {
       op_out[i][0] = 0;
       ins.op_out[i] = op_out[i];
+      cm_out[i][0] = 0;
+      ins.cm_out[i] = cm_out[i];
     }
 
   sizeflag = priv.orig_sizeflag;
@@ -9992,6 +9994,7 @@  print_insn (bfd_vma pc, disassemble_info
 	  for (i = 0; i < MAX_OPERANDS; ++i)
 	    {
 	      ins.obufp = ins.op_out[i];
+	      ins.cbufp = ins.cm_out[i];
 	      ins.op_ad = MAX_OPERANDS - 1 - i;
 	      if (dp->op[i].rtn
 		  && !dp->op[i].rtn (&ins, dp->op[i].bytemode, sizeflag))
@@ -10317,6 +10320,10 @@  print_insn (bfd_vma pc, disassemble_info
 	  riprel = ins.op_riprel[i];
 	  ins.op_riprel[i] = ins.op_riprel[MAX_OPERANDS - 1 - i];
 	  ins.op_riprel[MAX_OPERANDS - 1 - i] = riprel;
+
+	  char *tmp = ins.cm_out[i];
+	  ins.cm_out[i] = ins.cm_out[MAX_OPERANDS - 1 - i];
+	  ins.cm_out[MAX_OPERANDS - 1 - i] = tmp;
 	}
     }
   else
@@ -10364,22 +10371,23 @@  print_insn (bfd_vma pc, disassemble_info
 	needcomma = 1;
       }
 
+  const char *sep = "        # ";
   for (i = 0; i < MAX_OPERANDS; i++)
     if (ins.op_index[i] != -1 && ins.op_riprel[i])
       {
-	i386_dis_printf (info, dis_style_comment_start, "        # ");
+	i386_dis_printf (info, dis_style_comment_start, sep);
+	sep = ", ";
 	(*info->print_address_func)
 	  ((bfd_vma)(ins.start_pc + (ins.codep - ins.start_codep)
 		     + ins.op_address[ins.op_index[i]]),
-	  info);
-	break;
+	   info);
+      }
+    else if (*ins.cm_out[i])
+      {
+	i386_dis_printf (info, dis_style_comment_start, sep);
+	sep = ", ";
+	i386_dis_printf (info, dis_style_symbol, "%s", ins.cm_out[i]);
       }
-  if (ins.cbufp != ins.cbuf)
-    {
-      if (i == MAX_OPERANDS)
-	i386_dis_printf (info, dis_style_comment_start, "        # ");
-      i386_dis_printf (info, dis_style_comment_start, "%s", ins.cbuf);
-    }
   
   ret = ins.codep - priv.the_buffer;
  out:
@@ -10760,6 +10768,7 @@  dofloat (instr_info *ins, int sizeflag)
 
       putop (ins, float_mem[fp_indx], sizeflag);
       ins->obufp = ins->op_out[0];
+      ins->cbufp = ins->cm_out[0];
       ins->op_ad = 2;
       return OP_E (ins, float_mem_mode[fp_indx], sizeflag);
     }
@@ -10781,12 +10790,14 @@  dofloat (instr_info *ins, int sizeflag)
       putop (ins, dp->name, sizeflag);
 
       ins->obufp = ins->op_out[0];
+      ins->cbufp = ins->cm_out[0];
       ins->op_ad = 2;
       if (dp->op[0].rtn
 	  && !dp->op[0].rtn (ins, dp->op[0].bytemode, sizeflag))
 	return false;
 
       ins->obufp = ins->op_out[1];
+      ins->cbufp = ins->cm_out[1];
       ins->op_ad = 1;
       if (dp->op[1].rtn
 	  && !dp->op[1].rtn (ins, dp->op[1].bytemode, sizeflag))
@@ -11588,16 +11599,19 @@  static void
 cappend_with_style (instr_info *ins, const char *s,
 		    enum disassembler_style style)
 {
-  if (ins->cbufp + strlen (s) + 4 >= ins->cbuf + COMMENT_BUFFER_SIZE)
+  size_t len = strlen (ins->cbufp);
+
+  if (len + !!len + strlen (s) + 4 >= COMMENT_BUFFER_SIZE)
     return;
 
   unsigned num = (unsigned) style;
 
+  if (len)
+    *ins->cbufp++ = ' ';
   *ins->cbufp++ = STYLE_MARKER_CHAR;
   *ins->cbufp++ = (num < 10 ? ('0' + num)
 		   : ((num < 16) ? ('a' + (num - 10)) : '0'));
   *ins->cbufp++ = STYLE_MARKER_CHAR;
-  *ins->cbufp = '\0';
 
   ins->cbufp = stpcpy (ins->cbufp, s);
 }
@@ -11697,7 +11711,7 @@  oappend_immediate (instr_info *ins, bfd_
 
   char * annotation = NULL;
 
-  if (asprintf (& annotation, " [%s]", sym->name) > 0)
+  if (asprintf (& annotation, "[%s]", sym->name) > 0)
     {
       /* Display the symbol associated with address 'imm'.  */
       cappend_with_style (ins, annotation, dis_style_symbol);
@@ -14255,6 +14269,10 @@  OP_VexW (instr_info *ins, int bytemode,
 
       ins->op_out[2] = ins->op_out[1];
       ins->op_out[1] = tmp;
+
+      tmp = ins->cm_out[2];
+      ins->cm_out[2] = ins->cm_out[1];
+      ins->cm_out[1] = tmp;
     }
   return true;
 }
@@ -14288,6 +14306,10 @@  OP_REG_VexI4 (instr_info *ins, int bytem
 
       ins->op_out[3] = ins->op_out[2];
       ins->op_out[2] = tmp;
+
+      tmp = ins->cm_out[2];
+      ins->cm_out[3] = ins->cm_out[2];
+      ins->cm_out[2] = tmp;
     }
   return true;
 }