Hello,
I've just modified the objdump to render the jump visualization using
UTF-8 characters. I attach the patch and a screenshot of the new output
with extended color enabled.
I send this message to ask whether this feature has an opportunity to be
added to the binutils. My patch is not finished – it is required to add
some other characters to the output to make the output cleaner, also
autodetection of UTF-8 environment (and enabling the UTF-8 on such
machines) should be added. Also, fine-tuning the output (such as
changing arrow shape) for user's needs would be a nice feature.
Thank you.
Jiří Wolker
Output of the objdump, in plain text:
00000000000001ed <print_msg>:
1ed: 50 push %rax
1ee: 52 push %rdx
1ef: 56 push %rsi
1f0: ╭────→ ac lods %ds:(%rsi),%al
1f1: │ 08 c0 or %al,%al
1f3: │ ╭─ 74 04 je 1f9 <print_msg+0xc>
1f5: │ │ e6 e9 out %al,$0xe9
1f7: ╰──│─ eb f7 jmp 1f0 <print_msg+0x3>
1f9: ╰─→ 8b 34 24 mov (%rsp),%esi
1fc: ╭───────→ 66 ba fd 03 mov $0x3fd,%dx
200: │ ec in (%dx),%al
201: │ 24 20 and $0x20,%al
203: ├─────── 74 f7 je 1fc <print_msg+0xf>
205: │ 66 ba f8 03 mov $0x3f8,%dx
209: │ ac lods %ds:(%rsi),%al
20a: │ 08 c0 or %al,%al
20c: │ ╭─ 74 07 je 215 <print_msg+0x28>
20e: │ │ 3c 0a cmp $0xa,%al
210: │ ╭──│─ 74 07 je 219 <print_msg+0x2c>
212: │ │ │ ee out %al,(%dx)
213: ├──│──│─ eb e7 jmp 1fc <print_msg+0xf>
215: │ │ ╰─→ 5e pop %rsi
216: │ │ 5a pop %rdx
217: │ │ 58 pop %rax
218: │ │ c3 ret
219: │ ╰────→ b0 0d mov $0xd,%al
21b: │ ee out %al,(%dx)
21c: │ ╭─→ 66 ba fd 03 mov $0x3fd,%dx
220: │ │ ec in (%dx),%al
221: │ │ 24 20 and $0x20,%al
223: │ ╰─ 74 f7 je 21c <print_msg+0x2f>
225: │ 66 ba f8 03 mov $0x3f8,%dx
229: │ b0 0a mov $0xa,%al
22b: │ ee out %al,(%dx)
22c: ╰─────── eb ce jmp 1fc <print_msg+0xf>
On 11.06.2024 20:14, Jiří Wolker wrote:
> I've just modified the objdump to render the jump visualization using
> UTF-8 characters. I attach the patch and a screenshot of the new output
> with extended color enabled.
>
> I send this message to ask whether this feature has an opportunity to be
> added to the binutils.
I see no reason why it shouldn't be acceptable once finished.
> My patch is not finished – it is required to add
> some other characters to the output to make the output cleaner, also
> autodetection of UTF-8 environment (and enabling the UTF-8 on such
> machines) should be added. Also, fine-tuning the output (such as
> changing arrow shape) for user's needs would be a nice feature.
Right, even the rendering in the browser on the mailing list archive
shows, like at least my email UI, that ...
> Output of the objdump, in plain text:
>
> 00000000000001ed <print_msg>:
> 1ed: 50 push %rax
> 1ee: 52 push %rdx
> 1ef: 56 push %rsi
> 1f0: ╭────→ ac lods %ds:(%rsi),%al
... arrows aren't quite smooth. Unlike in the picture you had attached.
As to selection of characters, I'd like to note that in the viewer I'm
using to look at your patch as well as in one of the the two consoles I
tried, I can't properly see two of the characters, apparently due to
limitations of the font used. While use of the feature is optional
anyway, it might be nice if characters chosen were covered by a wide
range of commonly used fonts (including ones specifically aiming at use
for console output, which I've often found to support more limited sets).
As to the new command line option: I think the way you have it would
end up somewhat ambiguous if both it and the original option were used.
Instead I think you want to make this a new sub-option of
--visualize-jumps, i.e. --visualize-jumps=utf8. Further, just to
mention it because you don't say so above, please don't forget to also
adjust doc/binutils.texi.
Jan
> 1f1: │ 08 c0 or %al,%al
> 1f3: │ ╭─ 74 04 je 1f9 <print_msg+0xc>
> 1f5: │ │ e6 e9 out %al,$0xe9
> 1f7: ╰──│─ eb f7 jmp 1f0 <print_msg+0x3>
> 1f9: ╰─→ 8b 34 24 mov (%rsp),%esi
> 1fc: ╭───────→ 66 ba fd 03 mov $0x3fd,%dx
> 200: │ ec in (%dx),%al
> 201: │ 24 20 and $0x20,%al
> 203: ├─────── 74 f7 je 1fc <print_msg+0xf>
> 205: │ 66 ba f8 03 mov $0x3f8,%dx
> 209: │ ac lods %ds:(%rsi),%al
> 20a: │ 08 c0 or %al,%al
> 20c: │ ╭─ 74 07 je 215 <print_msg+0x28>
> 20e: │ │ 3c 0a cmp $0xa,%al
> 210: │ ╭──│─ 74 07 je 219 <print_msg+0x2c>
> 212: │ │ │ ee out %al,(%dx)
> 213: ├──│──│─ eb e7 jmp 1fc <print_msg+0xf>
> 215: │ │ ╰─→ 5e pop %rsi
> 216: │ │ 5a pop %rdx
> 217: │ │ 58 pop %rax
> 218: │ │ c3 ret
> 219: │ ╰────→ b0 0d mov $0xd,%al
> 21b: │ ee out %al,(%dx)
> 21c: │ ╭─→ 66 ba fd 03 mov $0x3fd,%dx
> 220: │ │ ec in (%dx),%al
> 221: │ │ 24 20 and $0x20,%al
> 223: │ ╰─ 74 f7 je 21c <print_msg+0x2f>
> 225: │ 66 ba f8 03 mov $0x3f8,%dx
> 229: │ b0 0a mov $0xa,%al
> 22b: │ ee out %al,(%dx)
> 22c: ╰─────── eb ce jmp 1fc <print_msg+0xf>
>
From 9a61f321c098b3d7f9484b5673bc1afaf2fb6814 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ji=C5=99=C3=AD=20Wolker?= <projects@jwo.cz>
Date: Tue, 11 Jun 2024 19:56:43 +0200
Subject: [PATCH] objdump: UTF-8 jump visualization in disassembly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
A new option ‘--visualize-jumps-utf8’ was added. It uses the same
arguments like ‘--visualize-jumps’, but UTF-8 characters are used for
rendering the jump visualization instead of plain ASCII characters.
---
binutils/objdump.c | 127 +++++++++++++++++++++++++++++++++------------
1 file changed, 95 insertions(+), 32 deletions(-)
@@ -136,6 +136,7 @@ static const char * source_comment; /* --source_comment. */
static bool visualize_jumps = false; /* --visualize-jumps. */
static bool color_output = false; /* --visualize-jumps=color. */
static bool extended_color_output = false; /* --visualize-jumps=extended-color. */
+static bool utf8_jumps = false; /* --visualize-jumps-utf8. */
static int process_links = false; /* --process-links. */
static int show_all_symbols; /* --show-all-symbols. */
static bool decompressed_dumps = false; /* -Z, --decompress. */
@@ -236,6 +237,43 @@ static const struct objdump_private_desc * const objdump_private_vectors[] =
/* The list of detected jumps inside a function. */
static struct jump_info *detected_jumps = NULL;
+/* Symbols used in the jump visualization. */
+enum jump_line
+ {
+ jump_line_none,
+ jump_line_hor,
+ jump_line_arrow,
+ jump_line_biarrow,
+ jump_line_topleft,
+ jump_line_branch,
+ jump_line_bottomright,
+ jump_line_dest,
+ jump_line_ver,
+ };
+static const char jump_lines[] =
+ {
+ [jump_line_none] = ' ',
+ [jump_line_hor] = '-',
+ [jump_line_arrow] = '>',
+ [jump_line_biarrow] = 'X',
+ [jump_line_topleft] = ',',
+ [jump_line_branch] = '+',
+ [jump_line_bottomright] = '\\',
+ [jump_line_dest] = '>',
+ [jump_line_ver] = '|',
+ };
+static const char *const jump_lines_utf8[] =
+ {
+ [jump_line_none] = " ",
+ [jump_line_hor] = "─",
+ [jump_line_biarrow] = "X",
+ [jump_line_topleft] = "╭",
+ [jump_line_branch] = "├",
+ [jump_line_bottomright] = "╰",
+ [jump_line_dest] = "→",
+ [jump_line_ver] = "│",
+ };
+
typedef enum unicode_display_type
{
unicode_default = 0,
@@ -430,6 +468,8 @@ usage (FILE *stream, int status)
--visualize-jumps=extended-color\n\
Use extended 8-bit color codes\n"));
fprintf (stream, _("\
+ --visualize-jumps-utf8 Use box drawing characters for jumps visualization\n"));
+ fprintf (stream, _("\
--visualize-jumps=off Disable jump visualization\n"));
#if DEFAULT_FOR_COLORED_DISASSEMBLY
fprintf (stream, _("\
@@ -490,6 +530,7 @@ enum option_values
#endif
OPTION_SFRAME,
OPTION_VISUALIZE_JUMPS,
+ OPTION_VISUALIZE_JUMPS_UTF8,
OPTION_DISASSEMBLER_COLOR
};
@@ -559,6 +600,7 @@ static struct option long_options[]=
{"unicode", required_argument, NULL, 'U'},
{"version", no_argument, NULL, 'V'},
{"visualize-jumps", optional_argument, 0, OPTION_VISUALIZE_JUMPS},
+ {"visualize-jumps-utf8", optional_argument, 0, OPTION_VISUALIZE_JUMPS_UTF8},
{"wide", no_argument, NULL, 'w'},
{"disassembler-color", required_argument, NULL, OPTION_DISASSEMBLER_COLOR},
{NULL, no_argument, NULL, 0}
@@ -2888,14 +2930,14 @@ jump_info_sort (struct jump_info **base)
static void
jump_info_visualize_address (bfd_vma address,
int max_level,
- char *line_buffer,
+ uint8_t *line_buffer,
uint8_t *color_buffer)
{
struct jump_info *ji = detected_jumps;
size_t len = (max_level + 1) * 3;
/* Clear line buffer. */
- memset (line_buffer, ' ', len);
+ memset (line_buffer, jump_line_none, len);
memset (color_buffer, 0, len);
/* Iterate over jumps and add their ASCII art. */
@@ -2928,31 +2970,39 @@ jump_info_visualize_address (bfd_vma address,
size_t i = offset + 1;
for (; i < len - 1; ++i)
- if (line_buffer[i] == ' ')
+ if (line_buffer[i] == jump_line_none)
{
- line_buffer[i] = '-';
+ line_buffer[i] = jump_line_hor;
color_buffer[i] = color;
}
- if (line_buffer[i] == ' ')
+ if (line_buffer[i] == jump_line_none)
{
- line_buffer[i] = '-';
+ line_buffer[i] = jump_line_none;
color_buffer[i] = color;
}
- else if (line_buffer[i] == '>')
+ else if (line_buffer[i] == jump_line_dest)
{
- line_buffer[i] = 'X';
+ line_buffer[i] = jump_line_biarrow;
color_buffer[i] = color;
}
- if (line_buffer[offset] == ' ')
+ if (line_buffer[offset] == jump_line_none)
{
if (address <= ji->end)
- line_buffer[offset] =
- (jump_info_min_address (ji) == address) ? ',': '+';
+ {
+ if (jump_info_min_address (ji) == address)
+ line_buffer[offset] = jump_line_topleft;
+ else
+ line_buffer[offset] = jump_line_branch;
+ }
else
- line_buffer[offset] =
- (jump_info_max_address (ji) == address) ? '\'': '+';
+ {
+ if (jump_info_max_address (ji) == address)
+ line_buffer[offset] = jump_line_bottomright;
+ else
+ line_buffer[offset] = jump_line_branch;
+ }
color_buffer[offset] = color;
}
}
@@ -2962,37 +3012,38 @@ jump_info_visualize_address (bfd_vma address,
size_t i = offset + 1;
for (; i < len - 1; ++i)
- if (line_buffer[i] == ' ')
+ if (line_buffer[i] == jump_line_none)
{
- line_buffer[i] = '-';
+ line_buffer[i] = jump_line_hor;
color_buffer[i] = color;
}
- if (line_buffer[i] == ' ')
+ if (line_buffer[i] == jump_line_none)
{
- line_buffer[i] = '>';
+ line_buffer[i] = jump_line_dest;
color_buffer[i] = color;
}
- else if (line_buffer[i] == '-')
+ else if (line_buffer[i] == jump_line_hor)
{
- line_buffer[i] = 'X';
+ line_buffer[i] = jump_line_biarrow;
color_buffer[i] = color;
}
- if (line_buffer[offset] == ' ')
+ if (line_buffer[offset] == jump_line_none)
{
if (jump_info_min_address (ji) < address)
line_buffer[offset] =
- (jump_info_max_address (ji) > address) ? '>' : '\'';
+ (jump_info_max_address (ji) > address)
+ ? jump_line_dest : jump_line_bottomright;
else
- line_buffer[offset] = ',';
+ line_buffer[offset] = jump_line_topleft;
color_buffer[offset] = color;
}
}
/* Draw intermediate line segment. */
- else if (line_buffer[offset] == ' ')
+ else if (line_buffer[offset] == jump_line_none)
{
- line_buffer[offset] = '|';
+ line_buffer[offset] = jump_line_ver;
color_buffer[offset] = color;
}
}
@@ -3221,7 +3272,8 @@ null_styled_print (const void * stream ATTRIBUTE_UNUSED,
/* Print out jump visualization. */
static void
-print_jump_visualisation (bfd_vma addr, int max_level, char *line_buffer,
+print_jump_visualisation (bfd_vma addr, int max_level,
+ uint8_t *line_buffer, size_t line_buffer_size,
uint8_t *color_buffer)
{
if (!line_buffer)
@@ -3229,7 +3281,6 @@ print_jump_visualisation (bfd_vma addr, int max_level, char *line_buffer,
jump_info_visualize_address (addr, max_level, line_buffer, color_buffer);
- size_t line_buffer_size = strlen (line_buffer);
char last_color = 0;
size_t i;
@@ -3255,7 +3306,12 @@ print_jump_visualisation (bfd_vma addr, int max_level, char *line_buffer,
last_color = color;
}
}
- putchar ((i < line_buffer_size) ? line_buffer[i]: ' ');
+ if (i >= line_buffer_size)
+ putchar (' ');
+ else if (utf8_jumps)
+ fputs (jump_lines_utf8[line_buffer[i]], stdout);
+ else
+ putchar (jump_lines[line_buffer[i]]);
}
}
@@ -3323,7 +3379,8 @@ disassemble_bytes (struct disassemble_info *inf,
/* Determine maximum level. */
uint8_t *color_buffer = NULL;
- char *line_buffer = NULL;
+ uint8_t *line_buffer = NULL;
+ size_t line_buffer_size = -1;
int max_level = -1;
/* Some jumps were detected. */
@@ -3340,10 +3397,11 @@ disassemble_bytes (struct disassemble_info *inf,
/* Allocate buffers. */
size_t len = (max_level + 1) * 3 + 1;
+ line_buffer_size = len - 1;
line_buffer = xmalloc (len);
- line_buffer[len - 1] = 0;
+ line_buffer[line_buffer_size] = 0;
color_buffer = xmalloc (len);
- color_buffer[len - 1] = 0;
+ color_buffer[line_buffer_size] = 0;
}
addr_offset = start_offset;
@@ -3420,7 +3478,8 @@ disassemble_bytes (struct disassemble_info *inf,
}
print_jump_visualisation (section->vma + addr_offset,
- max_level, line_buffer,
+ max_level,
+ line_buffer, line_buffer_size,
color_buffer);
if (insns)
@@ -3629,7 +3688,8 @@ disassemble_bytes (struct disassemble_info *inf,
}
print_jump_visualisation (section->vma + j / opb,
- max_level, line_buffer,
+ max_level,
+ line_buffer, line_buffer_size,
color_buffer);
pb += octets_per_line;
@@ -6093,6 +6153,9 @@ main (int argc, char **argv)
case OPTION_INLINES:
unwind_inlines = true;
break;
+ case OPTION_VISUALIZE_JUMPS_UTF8:
+ utf8_jumps = true;
+ /* FALLTHROUGH */
case OPTION_VISUALIZE_JUMPS:
visualize_jumps = true;
color_output = false;
--
2.44.2