[3/5] x86: widen @got{,pcrel} support to PUSH and APX IMUL
Checks
Context |
Check |
Description |
linaro-tcwg-bot/tcwg_binutils_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 |
success
|
Test passed
|
linaro-tcwg-bot/tcwg_binutils_check--master-arm |
success
|
Test passed
|
Commit Message
With us doing the transformation to an immediate operand for MOV and
various ALU insns, there's little reason to then not support the same
conversion for the other two insns which have respective immediate
operand forms. Unfortunately for IMUL (due to the 0F opcode prefix)
there's no suitable relocation, so the pre-APX forms cannot be marked
for relaxation in the assembler.
---
TBD: Is REX2 really permitted with PUSH <imm>?
Comments
On Mon, Feb 3, 2025 at 7:41 PM Jan Beulich <jbeulich@suse.com> wrote:
>
> With us doing the transformation to an immediate operand for MOV and
> various ALU insns, there's little reason to then not support the same
> conversion for the other two insns which have respective immediate
> operand forms. Unfortunately for IMUL (due to the 0F opcode prefix)
> there's no suitable relocation, so the pre-APX forms cannot be marked
> for relaxation in the assembler.
> ---
> TBD: Is REX2 really permitted with PUSH <imm>?
>
> --- a/bfd/elf32-i386.c
> +++ b/bfd/elf32-i386.c
> @@ -1209,6 +1209,10 @@ elf_i386_tls_transition (struct bfd_link
> to
> test $foo, %reg1
> and convert
> + push foo@GOT[(%reg)]
> + to
> + push $foo
> + and convert
> binop foo@GOT[(%reg1)], %reg2
> to
> binop $foo, %reg2
> @@ -1233,7 +1237,7 @@ elf_i386_convert_load_reloc (bfd *abfd,
> unsigned int addend;
> unsigned int nop;
> bfd_vma nop_offset;
> - bool is_pic;
> + bool is_pic, is_branch = false;
> bool to_reloc_32;
> bool abs_symbol;
> unsigned int r_type;
> @@ -1301,6 +1305,23 @@ elf_i386_convert_load_reloc (bfd *abfd,
>
> opcode = bfd_get_8 (abfd, contents + roff - 2);
>
> + if (opcode == 0xff)
> + {
> + switch (modrm & 0x38)
> + {
> + case 0x10: /* CALL */
> + case 0x20: /* JMP */
> + is_branch = true;
> + break;
> +
> + case 0x30: /* PUSH */
> + break;
> +
> + default:
> + return true;
> + }
> + }
> +
> /* Convert to R_386_32 if PIC is false or there is no base
> register. */
> to_reloc_32 = !is_pic || baseless;
> @@ -1311,7 +1332,7 @@ elf_i386_convert_load_reloc (bfd *abfd,
> reloc. */
> if (h == NULL)
> {
> - if (opcode == 0x0ff)
> + if (is_branch)
> /* Convert "call/jmp *foo@GOT[(%reg)]". */
> goto convert_branch;
> else
> @@ -1327,7 +1348,7 @@ elf_i386_convert_load_reloc (bfd *abfd,
> && !eh->linker_def
> && local_ref)
> {
> - if (opcode == 0xff)
> + if (is_branch)
> {
> /* No direct branch to 0 for PIC. */
> if (is_pic)
> @@ -1343,7 +1364,7 @@ elf_i386_convert_load_reloc (bfd *abfd,
> }
> }
>
> - if (opcode == 0xff)
> + if (is_branch)
> {
> /* We have "call/jmp *foo@GOT[(%reg)]". */
> if ((h->root.type == bfd_link_hash_defined
> @@ -1399,7 +1420,8 @@ elf_i386_convert_load_reloc (bfd *abfd,
> else
> {
> /* We have "mov foo@GOT[(%re1g)], %reg2",
> - "test %reg1, foo@GOT(%reg2)" and
> + "test %reg1, foo@GOT(%reg2)",
> + "push foo@GOT[(%reg)]", or
> "binop foo@GOT[(%reg1)], %reg2".
>
> Avoid optimizing _DYNAMIC since ld.so may use its
> @@ -1460,6 +1482,13 @@ elf_i386_convert_load_reloc (bfd *abfd,
> modrm = 0xc0 | ((modrm & 0x38) >> 3) | (opcode & 0x38);
> opcode = 0x81;
> }
> + else if (opcode == 0xff)
> + {
> + /* Convert "push foo@GOT(%reg)" to
> + "push $foo". */
> + modrm = 0x68; /* Really the opcode. */
> + opcode = 0x26; /* Really a meaningless %es: prefix. */
> + }
> else
> return true;
>
> --- a/bfd/elf64-x86-64.c
> +++ b/bfd/elf64-x86-64.c
> @@ -1739,13 +1739,16 @@ elf_x86_64_need_pic (struct bfd_link_inf
> }
>
> /* Move the R bits to the B bits in EVEX payload byte 1. */
> -static unsigned int evex_move_r_to_b (unsigned int byte1)
> +static unsigned int evex_move_r_to_b (unsigned int byte1, bool copy)
> {
> byte1 = (byte1 & ~(1 << 5)) | ((byte1 & (1 << 7)) >> 2); /* R3 -> B3 */
> byte1 = (byte1 & ~(1 << 3)) | ((~byte1 & (1 << 4)) >> 1); /* R4 -> B4 */
>
> /* Set both R bits, as they're inverted. */
> - return byte1 | (1 << 4) | (1 << 7);
> + if (!copy)
> + byte1 |= (1 << 4) | (1 << 7);
> +
> + return byte1;
> }
>
> /* With the local symbol, foo, we convert
> @@ -1762,10 +1765,14 @@ static unsigned int evex_move_r_to_b (un
> to
> test $foo, %reg
> and convert
> + push foo@GOTPCREL(%rip)
> + to
> + push $foo
> + and convert
> binop foo@GOTPCREL(%rip), %reg
> to
> binop $foo, %reg
> - where binop is one of adc, add, and, cmp, or, sbb, sub, xor
> + where binop is one of adc, add, and, cmp, imul, or, sbb, sub, xor
> instructions. */
>
> static bool
> @@ -1781,6 +1788,7 @@ elf_x86_64_convert_load_reloc (bfd *abfd
> bool is_pic;
> bool no_overflow;
> bool relocx;
> + bool is_branch = false;
> bool to_reloc_pc32;
> bool abs_symbol;
> bool local_ref;
> @@ -1873,6 +1881,23 @@ elf_x86_64_convert_load_reloc (bfd *abfd
> r_symndx = htab->r_sym (irel->r_info);
>
> opcode = bfd_get_8 (abfd, contents + roff - 2);
> + modrm = bfd_get_8 (abfd, contents + roff - 1);
> + if (opcode == 0xff)
> + {
> + switch (modrm & 0x38)
> + {
> + case 0x10: /* CALL */
> + case 0x20: /* JMP */
> + is_branch = true;
> + break;
> +
> + case 0x30: /* PUSH */
> + break;
> +
> + default:
> + return true;
> + }
> + }
>
> /* Convert mov to lea since it has been done for a while. */
> if (opcode != 0x8b)
> @@ -1890,7 +1915,7 @@ elf_x86_64_convert_load_reloc (bfd *abfd
> 3. no_overflow is true.
> 4. PIC.
> */
> - to_reloc_pc32 = (opcode == 0xff
> + to_reloc_pc32 = (is_branch
> || !relocx
> || no_overflow
> || is_pic);
> @@ -1942,7 +1967,7 @@ elf_x86_64_convert_load_reloc (bfd *abfd
> && !eh->linker_def
> && local_ref))
> {
> - if (opcode == 0xff)
> + if (is_branch)
> {
> /* Skip for branch instructions since R_X86_64_PC32
> may overflow. */
> @@ -2009,7 +2034,7 @@ elf_x86_64_convert_load_reloc (bfd *abfd
> return true;
>
> convert:
> - if (opcode == 0xff)
> + if (is_branch)
> {
> /* We have "call/jmp *foo@GOTPCREL(%rip)". */
> unsigned int nop;
> @@ -2018,7 +2043,6 @@ elf_x86_64_convert_load_reloc (bfd *abfd
>
> /* Convert R_X86_64_GOTPCRELX and R_X86_64_REX_GOTPCRELX to
> R_X86_64_PC32. */
> - modrm = bfd_get_8 (abfd, contents + roff - 1);
> if (modrm == 0x25)
> {
> /* Convert to "jmp foo nop". */
> @@ -2064,11 +2088,12 @@ elf_x86_64_convert_load_reloc (bfd *abfd
> }
> else if (r_type == R_X86_64_CODE_6_GOTPCRELX && opcode != 0x8b)
> {
> + bool move_v_r = false;
> +
> /* R_X86_64_PC32 isn't supported. */
> if (to_reloc_pc32)
> return true;
>
> - modrm = bfd_get_8 (abfd, contents + roff - 1);
> if (opcode == 0x85)
> {
> /* Convert "ctest<cc> %reg, foo@GOTPCREL(%rip)" to
> @@ -2094,6 +2119,23 @@ elf_x86_64_convert_load_reloc (bfd *abfd
> modrm = 0xc0 | ((modrm & 0x38) >> 3) | (opcode & 0x38);
> opcode = 0x81;
> }
> + else if (opcode == 0xaf)
> + {
> + if (!(evex[2] & 0x10))
> + {
> + /* Convert "imul foo@GOTPCREL(%rip), %reg" to
> + "imul $foo, %reg, %reg". */
> + modrm = 0xc0 | ((modrm & 0x38) >> 3) | (modrm & 0x38);
> + }
> + else
> + {
> + /* Convert "imul foo@GOTPCREL(%rip), %reg1, %reg2" to
> + "imul $foo, %reg1, %reg2". */
> + modrm = 0xc0 | ((modrm & 0x38) >> 3) | (~evex[1] & 0x38);
> + move_v_r = true;
> + }
> + opcode = 0x69;
> + }
> else
> return true;
>
> @@ -2119,7 +2161,23 @@ elf_x86_64_convert_load_reloc (bfd *abfd
> bfd_put_8 (abfd, opcode, contents + roff - 2);
> bfd_put_8 (abfd, modrm, contents + roff - 1);
>
> - evex[0] = evex_move_r_to_b (evex[0]);
> + evex[0] = evex_move_r_to_b (evex[0], opcode == 0x69 && !move_v_r);
> + if (move_v_r)
> + {
> + /* Move the top two V bits to the R bits in EVEX payload byte 1.
> + Note that evex_move_r_to_b() set both R bits. */
> + if (!(evex[1] & (1 << 6)))
> + evex[0] &= ~(1 << 7); /* V3 -> R3 */
> + if (!(evex[2] & (1 << 3)))
> + evex[0] &= ~(1 << 4); /* V4 -> R4 */
> + /* Set all V bits, as they're inverted. */
> + evex[1] |= 0xf << 3;
> + evex[2] |= 1 << 3;
> + /* Clear the ND (ZU) bit (it ought to be ignored anyway). */
> + evex[2] &= ~(1 << 4);
> + bfd_put_8 (abfd, evex[2], contents + roff - 3);
> + bfd_put_8 (abfd, evex[1], contents + roff - 4);
> + }
> bfd_put_8 (abfd, evex[0], contents + roff - 5);
>
> /* No addend for R_X86_64_32/R_X86_64_32S relocations. */
> @@ -2162,7 +2220,10 @@ elf_x86_64_convert_load_reloc (bfd *abfd
> {
> if (bfd_get_8 (abfd, contents + roff - 4) == 0xd5)
> {
> - rex2 = bfd_get_8 (abfd, contents + roff - 3);
> + /* Make sure even an all-zero payload leaves a non-zero value
> + in the variable. */
> + rex2 = bfd_get_8 (abfd, contents + roff - 3) | 0x100;
> + rex2_mask |= 0x100;
> rex_w = (rex2 & REX_W) != 0;
> }
> else if (bfd_get_8 (abfd, contents + roff - 4) == 0x0f)
> @@ -2195,7 +2256,6 @@ elf_x86_64_convert_load_reloc (bfd *abfd
> /* Convert "mov foo@GOTPCREL(%rip), %reg" to
> "mov $foo, %reg". */
> opcode = 0xc7;
> - modrm = bfd_get_8 (abfd, contents + roff - 1);
> modrm = 0xc0 | (modrm & 0x38) >> 3;
> if (rex_w && ABI_64_P (link_info->output_bfd))
> {
> @@ -2222,7 +2282,6 @@ elf_x86_64_convert_load_reloc (bfd *abfd
> if (to_reloc_pc32)
> return true;
>
> - modrm = bfd_get_8 (abfd, contents + roff - 1);
> if (opcode == 0x85)
> {
> /* Convert "test %reg, foo@GOTPCREL(%rip)" to
> @@ -2237,6 +2296,39 @@ elf_x86_64_convert_load_reloc (bfd *abfd
> modrm = 0xc0 | ((modrm & 0x38) >> 3) | (opcode & 0x38);
> opcode = 0x81;
> }
> + else if (opcode == 0xaf && (rex2 & (REX2_M << 4)))
> + {
> + /* Convert "imul foo@GOTPCREL(%rip), %reg" to
> + "imul $foo, %reg, %reg". */
> + modrm = 0xc0 | ((modrm & 0x38) >> 3) | (modrm & 0x38);
> + rex_mask = 0;
> + rex2_mask = REX2_M << 4;
> + opcode = 0x69;
> + }
> + else if (opcode == 0xff && !(rex2 & (REX2_M << 4)))
> + {
> + /* Convert "push foo@GOTPCREL(%rip)" to
> + "push $foo". */
> + bfd_put_8 (abfd, 0x68, contents + roff - 1);
> + if (rex)
> + {
> + bfd_put_8 (abfd, 0x26, contents + roff - 3);
> + bfd_put_8 (abfd, rex, contents + roff - 2);
> + }
> + else if (rex2)
> + {
> + bfd_put_8 (abfd, 0x26, contents + roff - 4);
> + bfd_put_8 (abfd, 0xd5, contents + roff - 3);
> + bfd_put_8 (abfd, rex2, contents + roff - 2);
> + }
> + else
> + bfd_put_8 (abfd, 0x26, contents + roff - 2);
> +
> + r_type = R_X86_64_32S;
> + /* No addend for R_X86_64_32S relocations. */
> + irel->r_addend = 0;
> + goto finish;
> + }
> else
> return true;
>
> @@ -2297,6 +2389,7 @@ elf_x86_64_convert_load_reloc (bfd *abfd
> }
> }
>
> + finish:
> *r_type_p = r_type;
> irel->r_info = htab->r_info (r_symndx,
> r_type | R_X86_64_converted_reloc_bit);
> @@ -4378,7 +4471,7 @@ elf_x86_64_relocate_section (bfd *output
> continue;
> }
>
> - byte1 = evex_move_r_to_b (byte1);
> + byte1 = evex_move_r_to_b (byte1, false);
> bfd_put_8 (output_bfd, byte1, contents + roff - 5);
> bfd_put_8 (output_bfd, 0x81, contents + roff - 2);
> bfd_put_8 (output_bfd, 0xc0 | reg, contents + roff - 1);
> --- a/gas/config/tc-i386.c
> +++ b/gas/config/tc-i386.c
> @@ -12928,9 +12928,9 @@ output_disp (fragS *insn_start_frag, off
> else if (object_64bit)
> continue;
>
> - /* Check for "call/jmp *mem", "mov mem, %reg", "movrs mem, %reg",
> - "test %reg, mem" and "binop mem, %reg" where binop
> - is one of adc, add, and, cmp, or, sbb, sub, xor
> + /* Check for "call/jmp *mem", "push mem", "mov mem, %reg",
> + "movrs mem, %reg", "test %reg, mem" and "binop mem, %reg" where
> + binop is one of adc, add, and, cmp, or, sbb, sub, xor, or imul
> instructions without data prefix. Always generate
> R_386_GOT32X for "sym*GOT" operand in 32-bit mode. */
> unsigned int space = dot_insn () ? i.insn_opcode_space
> @@ -12940,7 +12940,7 @@ output_disp (fragS *insn_start_frag, off
> || (i.rm.mode == 0 && i.rm.regmem == 5))
> && ((space == SPACE_BASE
> && i.tm.base_opcode == 0xff
> - && (i.rm.reg == 2 || i.rm.reg == 4))
> + && (i.rm.reg == 2 || i.rm.reg == 4 || i.rm.reg == 6))
> || ((space == SPACE_BASE
> || space == SPACE_0F38
> || space == SPACE_MAP4)
> @@ -12949,7 +12949,13 @@ output_disp (fragS *insn_start_frag, off
> || space == SPACE_MAP4)
> && (i.tm.base_opcode == 0x85
> || (i.tm.base_opcode
> - | (i.operands > 2 ? 0x3a : 0x38)) == 0x3b))))
> + | (i.operands > 2 ? 0x3a : 0x38)) == 0x3b))
> + || (((space == SPACE_0F
> + /* Because of the 0F prefix, no suitable relocation
> + exists for this unless it's REX2-encoded. */
> + && is_apx_rex2_encoding ())
> + || space == SPACE_MAP4)
> + && i.tm.base_opcode == 0xaf)))
> {
> if (object_64bit)
> {
> --- a/ld/testsuite/ld-i386/i386.exp
> +++ b/ld/testsuite/ld-i386/i386.exp
> @@ -373,6 +373,8 @@ run_dump_test "load5a"
> run_dump_test "load5b"
> run_dump_test "load6"
> run_dump_test "load7"
> +run_dump_test "load8a"
> +run_dump_test "load8b"
> run_dump_test "pr19175"
> run_dump_test "pr19615"
> run_dump_test "pr19636-1a"
> --- /dev/null
> +++ b/ld/testsuite/ld-i386/load8.s
> @@ -0,0 +1,20 @@
> + .data
> + .type bar, @object
> +bar:
> + .byte 1
> + .size bar, .-bar
> + .globl foo
> + .type foo, @object
> +foo:
> + .byte 1
> + .size foo, .-foo
> + .text
> + .globl _start
> + .type _start, @function
> +_start:
> + push bar@GOT(%ecx)
> + push foo@GOT(%edx)
> + .ifndef PIC
> + push foo@GOT
> + .endif
> + .size _start, .-_start
> --- /dev/null
> +++ b/ld/testsuite/ld-i386/load8a.d
> @@ -0,0 +1,14 @@
> +#source: load8.s
> +#as: --32 -mrelax-relocations=yes
> +#ld: -melf_i386 -z noseparate-code
> +#objdump: -dw
> +
> +.*: +file format .*
> +
> +Disassembly of section .text:
> +
> +0+8048074 <_start>:
> +[ ]*[a-f0-9]+: 26 68 86 90 04 08 es push \$0x8049086
> +[ ]*[a-f0-9]+: 26 68 87 90 04 08 es push \$0x8049087
> +[ ]*[a-f0-9]+: 26 68 87 90 04 08 es push \$0x8049087
Please avoid adding the es prefix. It may not be nop in the future.
> +#pass
> --- /dev/null
> +++ b/ld/testsuite/ld-i386/load8b.d
> @@ -0,0 +1,13 @@
> +#source: load8.s
> +#as: --32 -mshared -mrelax-relocations=yes --defsym PIC=1
> +#ld: -melf_i386 -shared -z noseparate-code
> +#objdump: -dw
> +
> +.*: +file format .*
> +
> +Disassembly of section .text:
> +
> +0+[0-9a-f]+ <_start>:
> +[ ]*[0-9a-f]+: ff b1 f8 ff ff ff push -0x8\(%ecx\)
> +[ ]*[0-9a-f]+: ff b2 fc ff ff ff push -0x4\(%edx\)
> +#pass
> --- a/ld/testsuite/ld-x86-64/apx-load1.s
> +++ b/ld/testsuite/ld-x86-64/apx-load1.s
> @@ -118,5 +118,11 @@ _start:
> sub %rbp, bar@GOTPCREL(%rip), %r21
> xor %rsi, bar@GOTPCREL(%rip), %r22
>
> + imul bar@GOTPCREL(%rip), %r17
> + {nf} imul bar@GOTPCREL(%rip), %r17
> + imul bar@GOTPCREL(%rip), %r17, %rdx
> + imul bar@GOTPCREL(%rip), %rcx, %r18
> + {rex2} pushq bar@GOTPCREL(%rip)
> +
> .size _start, .-_start
> .p2align 12, 0x90
> --- a/ld/testsuite/ld-x86-64/apx-load1a.d
> +++ b/ld/testsuite/ld-x86-64/apx-load1a.d
> @@ -115,4 +115,9 @@ Disassembly of section .text:
> +[a-f0-9]+: 62 f4 dc 10 19 25 74 0c 20 00 sbb %rsp,0x200c74\(%rip\),%r20 # 602000 <.*>
> +[a-f0-9]+: 62 f4 d4 10 29 2d 6a 0c 20 00 sub %rbp,0x200c6a\(%rip\),%r21 # 602000 <.*>
> +[a-f0-9]+: 62 f4 cc 10 81 f6 20 20 60 00 xor \$0x602020,%rsi,%r22
> + +[a-f0-9]+: d5 58 69 c9 20 20 60 00 imul \$0x602020,%r17,%r17
> + +[a-f0-9]+: 62 ec fc 0c 69 c9 20 20 60 00 \{nf\} imul \$0x602020,%r17,%r17
> + +[a-f0-9]+: 62 fc fc 08 69 d1 20 20 60 00 imul \$0x602020,%r17,%rdx
> + +[a-f0-9]+: 62 e4 fc 08 69 d1 20 20 60 00 imul \$0x602020,%rcx,%r18
> + +[a-f0-9]+: 26 d5 00 68 20 20 60 00 es \{rex2 0x0\} push \$0x602020
> #pass
> --- a/ld/testsuite/ld-x86-64/apx-load1c.d
> +++ b/ld/testsuite/ld-x86-64/apx-load1c.d
> @@ -108,4 +108,9 @@ Disassembly of section .text:
> +[a-f0-9]+: 62 f4 dc 10 19 25 54 0d 20 00 sbb %rsp,0x200d54\(%rip\),%r20 # 2020e0 <.*>
> +[a-f0-9]+: 62 f4 d4 10 29 2d 4a 0d 20 00 sub %rbp,0x200d4a\(%rip\),%r21 # 2020e0 <.*>
> +[a-f0-9]+: 62 f4 cc 10 31 35 40 0d 20 00 xor %rsi,0x200d40\(%rip\),%r22 # 2020e0 <.*>
> + +[a-f0-9]+: d5 c8 af 0d 38 0d 20 00 imul 0x200d38\(%rip\),%r17 # 2020e0 <.*>
> + +[a-f0-9]+: 62 e4 fc 0c af 0d 2e 0d 20 00 \{nf\} imul 0x200d2e\(%rip\),%r17 # 2020e0 <.*>
> + +[a-f0-9]+: 62 e4 ec 18 af 0d 24 0d 20 00 imul 0x200d24\(%rip\),%r17,%rdx # 2020e0 <.*>
> + +[a-f0-9]+: 62 f4 ec 10 af 0d 1a 0d 20 00 imul 0x200d1a\(%rip\),%rcx,%r18 # 2020e0 <.*>
> + +[a-f0-9]+: d5 00 ff 35 12 0d 20 00 \{rex2 0x0\} push 0x200d12\(%rip\) # 2020e0 <.*>
> #pass
> --- a/ld/testsuite/ld-x86-64/apx-load1d.d
> +++ b/ld/testsuite/ld-x86-64/apx-load1d.d
> @@ -108,4 +108,9 @@ Disassembly of section .text:
> +[a-f0-9]+: 62 f4 dc 10 19 25 e4 0c 20 00 sbb %rsp,0x200ce4\(%rip\),%r20 # 202070 <.*>
> +[a-f0-9]+: 62 f4 d4 10 29 2d da 0c 20 00 sub %rbp,0x200cda\(%rip\),%r21 # 202070 <.*>
> +[a-f0-9]+: 62 f4 cc 10 31 35 d0 0c 20 00 xor %rsi,0x200cd0\(%rip\),%r22 # 202070 <.*>
> + +[a-f0-9]+: d5 c8 af 0d c8 0c 20 00 imul 0x200cc8\(%rip\),%r17 # 202070 <.*>
> + +[a-f0-9]+: 62 e4 fc 0c af 0d be 0c 20 00 \{nf\} imul 0x200cbe\(%rip\),%r17 # 202070 <.*>
> + +[a-f0-9]+: 62 e4 ec 18 af 0d b4 0c 20 00 imul 0x200cb4\(%rip\),%r17,%rdx # 202070 <.*>
> + +[a-f0-9]+: 62 f4 ec 10 af 0d aa 0c 20 00 imul 0x200caa\(%rip\),%rcx,%r18 # 202070 <.*>
> + +[a-f0-9]+: d5 00 ff 35 a2 0c 20 00 \{rex2 0x0\} push 0x200ca2\(%rip\) # 202070 <.*>
> #pass
> --- /dev/null
> +++ b/ld/testsuite/ld-x86-64/load5.s
> @@ -0,0 +1,17 @@
> + .data
> + .type bar, @object
> +bar:
> + .byte 1
> + .size bar, .-bar
> + .globl foo
> + .type foo, @object
> +foo:
> + .byte 1
> + .size foo, .-foo
> + .text
> + .globl _start
> + .type _start, @function
> +_start:
> + pushq bar@GOTPCREL(%rip)
> + {rex} pushq foo@GOTPCREL(%rip)
> + .size _start, .-_start
> --- /dev/null
> +++ b/ld/testsuite/ld-x86-64/load5a.d
> @@ -0,0 +1,15 @@
> +#source: load5.s
> +#as: --64 -mrelax-relocations=yes
> +#ld: -melf_x86_64
> +#objdump: -dw
> +
> +.*: +file format .*
> +
> +
> +Disassembly of section .text:
> +
> +#...
> +[a-f0-9]+ <_start>:
> +[ ]*[a-f0-9]+: 26 68 ([0-9a-f]{2} ){4} * es push \$0x[a-f0-9]+
> +[ ]*[a-f0-9]+: 26 40 68 ([0-9a-f]{2} ){4} * es rex push \$0x[a-f0-9]+
> +#pass
> --- /dev/null
> +++ b/ld/testsuite/ld-x86-64/load5b.d
> @@ -0,0 +1,15 @@
> +#source: load5.s
> +#as: --64 -mrelax-relocations=yes
> +#ld: -shared -melf_x86_64
> +#objdump: -dw
> +
> +.*: +file format .*
> +
> +
> +Disassembly of section .text:
> +
> +#...
> +[a-f0-9]+ <_start>:
> +[ ]*[a-f0-9]+: ff 35 ([0-9a-f]{2} ){4} * push +0x[a-f0-9]+\(%rip\) # [a-f0-9]+ <.*>
> +[ ]*[a-f0-9]+: 40 ff 35 ([0-9a-f]{2} ){4} * rex push 0x[a-f0-9]+\(%rip\) # [a-f0-9]+ <.*>
> +#pass
> --- a/ld/testsuite/ld-x86-64/x86-64.exp
> +++ b/ld/testsuite/ld-x86-64/x86-64.exp
> @@ -654,6 +654,8 @@ run_dump_test "load2"
> run_dump_test "load3a"
> run_dump_test "load3b"
> run_dump_test "load4"
> +run_dump_test "load5a"
> +run_dump_test "load5b"
> run_dump_test "call1a"
> run_dump_test "call1b"
> run_dump_test "call1c"
>
On 03.02.2025 23:40, H.J. Lu wrote:
> On Mon, Feb 3, 2025 at 7:41 PM Jan Beulich <jbeulich@suse.com> wrote:
>> --- /dev/null
>> +++ b/ld/testsuite/ld-i386/load8a.d
>> @@ -0,0 +1,14 @@
>> +#source: load8.s
>> +#as: --32 -mrelax-relocations=yes
>> +#ld: -melf_i386 -z noseparate-code
>> +#objdump: -dw
>> +
>> +.*: +file format .*
>> +
>> +Disassembly of section .text:
>> +
>> +0+8048074 <_start>:
>> +[ ]*[a-f0-9]+: 26 68 86 90 04 08 es push \$0x8049086
>> +[ ]*[a-f0-9]+: 26 68 87 90 04 08 es push \$0x8049087
>> +[ ]*[a-f0-9]+: 26 68 87 90 04 08 es push \$0x8049087
>
> Please avoid adding the es prefix. It may not be nop in the future.
Constructive comments please. What other prefix do you suggest we use?
Is another of the segment overrides okay? If not, all that's left is an
address size override, if I'm not mistaken. Which overall seems less
desirable to use.
Plus - is your concern only about 32-bit code, or also about 64-bit? For
32-bit code in particular I'm having difficulty seeing why an ES
prefix might gain new meaning going forward, when an increasing number
of ISA extensions are for 64-bit mode only anyway. If the concern
extends to 64-bit code (it shouldn't, as the pre-386 segment overrides
are documented as nop prefixes, if I'm not mistaken), earlier changes
would need adjusting then, too, I think.
Jan
On Tue, Feb 4, 2025 at 6:14 PM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 03.02.2025 23:40, H.J. Lu wrote:
> > On Mon, Feb 3, 2025 at 7:41 PM Jan Beulich <jbeulich@suse.com> wrote:
> >> --- /dev/null
> >> +++ b/ld/testsuite/ld-i386/load8a.d
> >> @@ -0,0 +1,14 @@
> >> +#source: load8.s
> >> +#as: --32 -mrelax-relocations=yes
> >> +#ld: -melf_i386 -z noseparate-code
> >> +#objdump: -dw
> >> +
> >> +.*: +file format .*
> >> +
> >> +Disassembly of section .text:
> >> +
> >> +0+8048074 <_start>:
> >> +[ ]*[a-f0-9]+: 26 68 86 90 04 08 es push \$0x8049086
> >> +[ ]*[a-f0-9]+: 26 68 87 90 04 08 es push \$0x8049087
> >> +[ ]*[a-f0-9]+: 26 68 87 90 04 08 es push \$0x8049087
> >
> > Please avoid adding the es prefix. It may not be nop in the future.
>
> Constructive comments please. What other prefix do you suggest we use?
> Is another of the segment overrides okay? If not, all that's left is an
> address size override, if I'm not mistaken. Which overall seems less
> desirable to use.
You can use 1-byte NOP.
> Plus - is your concern only about 32-bit code, or also about 64-bit? For
> 32-bit code in particular I'm having difficulty seeing why an ES
> prefix might gain new meaning going forward, when an increasing number
> of ISA extensions are for 64-bit mode only anyway. If the concern
> extends to 64-bit code (it shouldn't, as the pre-386 segment overrides
> are documented as nop prefixes, if I'm not mistaken), earlier changes
> would need adjusting then, too, I think.
I don't think adding instructions like PUSH is very useful.
On 04.02.2025 11:17, H.J. Lu wrote:
> On Tue, Feb 4, 2025 at 6:14 PM Jan Beulich <jbeulich@suse.com> wrote:
>>
>> On 03.02.2025 23:40, H.J. Lu wrote:
>>> On Mon, Feb 3, 2025 at 7:41 PM Jan Beulich <jbeulich@suse.com> wrote:
>>>> --- /dev/null
>>>> +++ b/ld/testsuite/ld-i386/load8a.d
>>>> @@ -0,0 +1,14 @@
>>>> +#source: load8.s
>>>> +#as: --32 -mrelax-relocations=yes
>>>> +#ld: -melf_i386 -z noseparate-code
>>>> +#objdump: -dw
>>>> +
>>>> +.*: +file format .*
>>>> +
>>>> +Disassembly of section .text:
>>>> +
>>>> +0+8048074 <_start>:
>>>> +[ ]*[a-f0-9]+: 26 68 86 90 04 08 es push \$0x8049086
>>>> +[ ]*[a-f0-9]+: 26 68 87 90 04 08 es push \$0x8049087
>>>> +[ ]*[a-f0-9]+: 26 68 87 90 04 08 es push \$0x8049087
>>>
>>> Please avoid adding the es prefix. It may not be nop in the future.
>>
>> Constructive comments please. What other prefix do you suggest we use?
>> Is another of the segment overrides okay? If not, all that's left is an
>> address size override, if I'm not mistaken. Which overall seems less
>> desirable to use.
>
> You can use 1-byte NOP.
That'll have an undue effect on debugging, by splitting a single insn
into two.
>> Plus - is your concern only about 32-bit code, or also about 64-bit? For
>> 32-bit code in particular I'm having difficulty seeing why an ES
>> prefix might gain new meaning going forward, when an increasing number
>> of ISA extensions are for 64-bit mode only anyway. If the concern
>> extends to 64-bit code (it shouldn't, as the pre-386 segment overrides
>> are documented as nop prefixes, if I'm not mistaken), earlier changes
>> would need adjusting then, too, I think.
>
> I don't think adding instructions like PUSH is very useful.
I was actively waiting for this kind of comment. Why was adding support
for e.g. ADC and SBB useful then? Imo it can only be one of two ways:
Either we support everything that can be supported, or we limit things
strictly to cases that are actively deemed useful.
Jan
On Tue, Feb 4, 2025 at 6:41 PM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 04.02.2025 11:17, H.J. Lu wrote:
> > On Tue, Feb 4, 2025 at 6:14 PM Jan Beulich <jbeulich@suse.com> wrote:
> >>
> >> On 03.02.2025 23:40, H.J. Lu wrote:
> >>> On Mon, Feb 3, 2025 at 7:41 PM Jan Beulich <jbeulich@suse.com> wrote:
> >>>> --- /dev/null
> >>>> +++ b/ld/testsuite/ld-i386/load8a.d
> >>>> @@ -0,0 +1,14 @@
> >>>> +#source: load8.s
> >>>> +#as: --32 -mrelax-relocations=yes
> >>>> +#ld: -melf_i386 -z noseparate-code
> >>>> +#objdump: -dw
> >>>> +
> >>>> +.*: +file format .*
> >>>> +
> >>>> +Disassembly of section .text:
> >>>> +
> >>>> +0+8048074 <_start>:
> >>>> +[ ]*[a-f0-9]+: 26 68 86 90 04 08 es push \$0x8049086
> >>>> +[ ]*[a-f0-9]+: 26 68 87 90 04 08 es push \$0x8049087
> >>>> +[ ]*[a-f0-9]+: 26 68 87 90 04 08 es push \$0x8049087
> >>>
> >>> Please avoid adding the es prefix. It may not be nop in the future.
> >>
> >> Constructive comments please. What other prefix do you suggest we use?
> >> Is another of the segment overrides okay? If not, all that's left is an
> >> address size override, if I'm not mistaken. Which overall seems less
> >> desirable to use.
> >
> > You can use 1-byte NOP.
>
> That'll have an undue effect on debugging, by splitting a single insn
> into two.
>
> >> Plus - is your concern only about 32-bit code, or also about 64-bit? For
> >> 32-bit code in particular I'm having difficulty seeing why an ES
> >> prefix might gain new meaning going forward, when an increasing number
> >> of ISA extensions are for 64-bit mode only anyway. If the concern
> >> extends to 64-bit code (it shouldn't, as the pre-386 segment overrides
> >> are documented as nop prefixes, if I'm not mistaken), earlier changes
> >> would need adjusting then, too, I think.
> >
> > I don't think adding instructions like PUSH is very useful.
>
> I was actively waiting for this kind of comment. Why was adding support
> for e.g. ADC and SBB useful then? Imo it can only be one of two ways:
Good question. They were included since they have the same operand
encoding as ADD and they can be transformed together with ADD.
> Either we support everything that can be supported, or we limit things
> strictly to cases that are actively deemed useful.
>
> Jan
On 04.02.2025 11:41, Jan Beulich wrote:
> On 04.02.2025 11:17, H.J. Lu wrote:
>> On Tue, Feb 4, 2025 at 6:14 PM Jan Beulich <jbeulich@suse.com> wrote:
>>>
>>> On 03.02.2025 23:40, H.J. Lu wrote:
>>>> On Mon, Feb 3, 2025 at 7:41 PM Jan Beulich <jbeulich@suse.com> wrote:
>>>>> --- /dev/null
>>>>> +++ b/ld/testsuite/ld-i386/load8a.d
>>>>> @@ -0,0 +1,14 @@
>>>>> +#source: load8.s
>>>>> +#as: --32 -mrelax-relocations=yes
>>>>> +#ld: -melf_i386 -z noseparate-code
>>>>> +#objdump: -dw
>>>>> +
>>>>> +.*: +file format .*
>>>>> +
>>>>> +Disassembly of section .text:
>>>>> +
>>>>> +0+8048074 <_start>:
>>>>> +[ ]*[a-f0-9]+: 26 68 86 90 04 08 es push \$0x8049086
>>>>> +[ ]*[a-f0-9]+: 26 68 87 90 04 08 es push \$0x8049087
>>>>> +[ ]*[a-f0-9]+: 26 68 87 90 04 08 es push \$0x8049087
>>>>
>>>> Please avoid adding the es prefix. It may not be nop in the future.
>>>
>>> Constructive comments please. What other prefix do you suggest we use?
>>> Is another of the segment overrides okay? If not, all that's left is an
>>> address size override, if I'm not mistaken. Which overall seems less
>>> desirable to use.
>>
>> You can use 1-byte NOP.
>
> That'll have an undue effect on debugging, by splitting a single insn
> into two.
>
>>> Plus - is your concern only about 32-bit code, or also about 64-bit? For
>>> 32-bit code in particular I'm having difficulty seeing why an ES
>>> prefix might gain new meaning going forward, when an increasing number
>>> of ISA extensions are for 64-bit mode only anyway. If the concern
>>> extends to 64-bit code (it shouldn't, as the pre-386 segment overrides
>>> are documented as nop prefixes, if I'm not mistaken), earlier changes
>>> would need adjusting then, too, I think.
>>
>> I don't think adding instructions like PUSH is very useful.
Further to my earlier reply: You didn't answer my question. Which is
necessary to determine whether earlier changes need adjustment.
> I was actively waiting for this kind of comment. Why was adding support
> for e.g. ADC and SBB useful then? Imo it can only be one of two ways:
> Either we support everything that can be supported, or we limit things
> strictly to cases that are actively deemed useful.
Thinking of it: With TEST being special-cased in the logic involved, I'm
also curious to learn of a code sequence where TEST would sensibly be
used (and where CMP can't be used instead).
Jan
On Tue, Feb 4, 2025 at 7:02 PM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 04.02.2025 11:41, Jan Beulich wrote:
> > On 04.02.2025 11:17, H.J. Lu wrote:
> >> On Tue, Feb 4, 2025 at 6:14 PM Jan Beulich <jbeulich@suse.com> wrote:
> >>>
> >>> On 03.02.2025 23:40, H.J. Lu wrote:
> >>>> On Mon, Feb 3, 2025 at 7:41 PM Jan Beulich <jbeulich@suse.com> wrote:
> >>>>> --- /dev/null
> >>>>> +++ b/ld/testsuite/ld-i386/load8a.d
> >>>>> @@ -0,0 +1,14 @@
> >>>>> +#source: load8.s
> >>>>> +#as: --32 -mrelax-relocations=yes
> >>>>> +#ld: -melf_i386 -z noseparate-code
> >>>>> +#objdump: -dw
> >>>>> +
> >>>>> +.*: +file format .*
> >>>>> +
> >>>>> +Disassembly of section .text:
> >>>>> +
> >>>>> +0+8048074 <_start>:
> >>>>> +[ ]*[a-f0-9]+: 26 68 86 90 04 08 es push \$0x8049086
> >>>>> +[ ]*[a-f0-9]+: 26 68 87 90 04 08 es push \$0x8049087
> >>>>> +[ ]*[a-f0-9]+: 26 68 87 90 04 08 es push \$0x8049087
> >>>>
> >>>> Please avoid adding the es prefix. It may not be nop in the future.
> >>>
> >>> Constructive comments please. What other prefix do you suggest we use?
> >>> Is another of the segment overrides okay? If not, all that's left is an
> >>> address size override, if I'm not mistaken. Which overall seems less
> >>> desirable to use.
> >>
> >> You can use 1-byte NOP.
> >
> > That'll have an undue effect on debugging, by splitting a single insn
> > into two.
> >
> >>> Plus - is your concern only about 32-bit code, or also about 64-bit? For
> >>> 32-bit code in particular I'm having difficulty seeing why an ES
> >>> prefix might gain new meaning going forward, when an increasing number
> >>> of ISA extensions are for 64-bit mode only anyway. If the concern
> >>> extends to 64-bit code (it shouldn't, as the pre-386 segment overrides
> >>> are documented as nop prefixes, if I'm not mistaken), earlier changes
> >>> would need adjusting then, too, I think.
> >>
> >> I don't think adding instructions like PUSH is very useful.
>
> Further to my earlier reply: You didn't answer my question. Which is
Which question?
> necessary to determine whether earlier changes need adjustment.
>
> > I was actively waiting for this kind of comment. Why was adding support
> > for e.g. ADC and SBB useful then? Imo it can only be one of two ways:
> > Either we support everything that can be supported, or we limit things
> > strictly to cases that are actively deemed useful.
>
> Thinking of it: With TEST being special-cased in the logic involved, I'm
> also curious to learn of a code sequence where TEST would sensibly be
> used (and where CMP can't be used instead).
>
Compiler may generate TEST with GOT.
On 04.02.2025 12:12, H.J. Lu wrote:
> On Tue, Feb 4, 2025 at 7:02 PM Jan Beulich <jbeulich@suse.com> wrote:
>>
>> On 04.02.2025 11:41, Jan Beulich wrote:
>>> On 04.02.2025 11:17, H.J. Lu wrote:
>>>> On Tue, Feb 4, 2025 at 6:14 PM Jan Beulich <jbeulich@suse.com> wrote:
>>>>>
>>>>> On 03.02.2025 23:40, H.J. Lu wrote:
>>>>>> On Mon, Feb 3, 2025 at 7:41 PM Jan Beulich <jbeulich@suse.com> wrote:
>>>>>>> --- /dev/null
>>>>>>> +++ b/ld/testsuite/ld-i386/load8a.d
>>>>>>> @@ -0,0 +1,14 @@
>>>>>>> +#source: load8.s
>>>>>>> +#as: --32 -mrelax-relocations=yes
>>>>>>> +#ld: -melf_i386 -z noseparate-code
>>>>>>> +#objdump: -dw
>>>>>>> +
>>>>>>> +.*: +file format .*
>>>>>>> +
>>>>>>> +Disassembly of section .text:
>>>>>>> +
>>>>>>> +0+8048074 <_start>:
>>>>>>> +[ ]*[a-f0-9]+: 26 68 86 90 04 08 es push \$0x8049086
>>>>>>> +[ ]*[a-f0-9]+: 26 68 87 90 04 08 es push \$0x8049087
>>>>>>> +[ ]*[a-f0-9]+: 26 68 87 90 04 08 es push \$0x8049087
>>>>>>
>>>>>> Please avoid adding the es prefix. It may not be nop in the future.
>>>>>
>>>>> Constructive comments please. What other prefix do you suggest we use?
>>>>> Is another of the segment overrides okay? If not, all that's left is an
>>>>> address size override, if I'm not mistaken. Which overall seems less
>>>>> desirable to use.
>>>>
>>>> You can use 1-byte NOP.
>>>
>>> That'll have an undue effect on debugging, by splitting a single insn
>>> into two.
>>>
>>>>> Plus - is your concern only about 32-bit code, or also about 64-bit? For
>>>>> 32-bit code in particular I'm having difficulty seeing why an ES
>>>>> prefix might gain new meaning going forward, when an increasing number
>>>>> of ISA extensions are for 64-bit mode only anyway. If the concern
>>>>> extends to 64-bit code (it shouldn't, as the pre-386 segment overrides
>>>>> are documented as nop prefixes, if I'm not mistaken), earlier changes
>>>>> would need adjusting then, too, I think.
>>>>
>>>> I don't think adding instructions like PUSH is very useful.
>>
>> Further to my earlier reply: You didn't answer my question. Which is
>
> Which question?
"Is your concern only about 32-bit code, or also about 64-bit?"
>> necessary to determine whether earlier changes need adjustment.
>>
>>> I was actively waiting for this kind of comment. Why was adding support
>>> for e.g. ADC and SBB useful then? Imo it can only be one of two ways:
>>> Either we support everything that can be supported, or we limit things
>>> strictly to cases that are actively deemed useful.
>>
>> Thinking of it: With TEST being special-cased in the logic involved, I'm
>> also curious to learn of a code sequence where TEST would sensibly be
>> used (and where CMP can't be used instead).
>
> Compiler may generate TEST with GOT.
Can it? Why would it? (IOW: Again you didn't really address my request.)
Jan
On Tue, Feb 4, 2025 at 7:16 PM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 04.02.2025 12:12, H.J. Lu wrote:
> > On Tue, Feb 4, 2025 at 7:02 PM Jan Beulich <jbeulich@suse.com> wrote:
> >>
> >> On 04.02.2025 11:41, Jan Beulich wrote:
> >>> On 04.02.2025 11:17, H.J. Lu wrote:
> >>>> On Tue, Feb 4, 2025 at 6:14 PM Jan Beulich <jbeulich@suse.com> wrote:
> >>>>>
> >>>>> On 03.02.2025 23:40, H.J. Lu wrote:
> >>>>>> On Mon, Feb 3, 2025 at 7:41 PM Jan Beulich <jbeulich@suse.com> wrote:
> >>>>>>> --- /dev/null
> >>>>>>> +++ b/ld/testsuite/ld-i386/load8a.d
> >>>>>>> @@ -0,0 +1,14 @@
> >>>>>>> +#source: load8.s
> >>>>>>> +#as: --32 -mrelax-relocations=yes
> >>>>>>> +#ld: -melf_i386 -z noseparate-code
> >>>>>>> +#objdump: -dw
> >>>>>>> +
> >>>>>>> +.*: +file format .*
> >>>>>>> +
> >>>>>>> +Disassembly of section .text:
> >>>>>>> +
> >>>>>>> +0+8048074 <_start>:
> >>>>>>> +[ ]*[a-f0-9]+: 26 68 86 90 04 08 es push \$0x8049086
> >>>>>>> +[ ]*[a-f0-9]+: 26 68 87 90 04 08 es push \$0x8049087
> >>>>>>> +[ ]*[a-f0-9]+: 26 68 87 90 04 08 es push \$0x8049087
> >>>>>>
> >>>>>> Please avoid adding the es prefix. It may not be nop in the future.
> >>>>>
> >>>>> Constructive comments please. What other prefix do you suggest we use?
> >>>>> Is another of the segment overrides okay? If not, all that's left is an
> >>>>> address size override, if I'm not mistaken. Which overall seems less
> >>>>> desirable to use.
> >>>>
> >>>> You can use 1-byte NOP.
> >>>
> >>> That'll have an undue effect on debugging, by splitting a single insn
> >>> into two.
> >>>
> >>>>> Plus - is your concern only about 32-bit code, or also about 64-bit? For
> >>>>> 32-bit code in particular I'm having difficulty seeing why an ES
> >>>>> prefix might gain new meaning going forward, when an increasing number
> >>>>> of ISA extensions are for 64-bit mode only anyway. If the concern
> >>>>> extends to 64-bit code (it shouldn't, as the pre-386 segment overrides
> >>>>> are documented as nop prefixes, if I'm not mistaken), earlier changes
> >>>>> would need adjusting then, too, I think.
> >>>>
> >>>> I don't think adding instructions like PUSH is very useful.
> >>
> >> Further to my earlier reply: You didn't answer my question. Which is
> >
> > Which question?
>
> "Is your concern only about 32-bit code, or also about 64-bit?"
Both.
> >> necessary to determine whether earlier changes need adjustment.
> >>
> >>> I was actively waiting for this kind of comment. Why was adding support
> >>> for e.g. ADC and SBB useful then? Imo it can only be one of two ways:
> >>> Either we support everything that can be supported, or we limit things
> >>> strictly to cases that are actively deemed useful.
> >>
> >> Thinking of it: With TEST being special-cased in the logic involved, I'm
> >> also curious to learn of a code sequence where TEST would sensibly be
> >> used (and where CMP can't be used instead).
> >
> > Compiler may generate TEST with GOT.
>
> Can it? Why would it? (IOW: Again you didn't really address my request.)
>
[hjl@gnu-tgl-3 tmp]$ cat x.c
extern int foo __attribute__ ((weak));
extern void bar (void);
__attribute__ ((regparm(3)))
void
_start (long int p)
{
if (((unsigned long) &foo) & p)
bar ();
}
[hjl@gnu-tgl-3 tmp]$ gcc -c -O2 -m32 x.c -mno-direct-extern-access
[hjl@gnu-tgl-3 tmp]$ objdump -dwr x.o
x.o: file format elf32-i386
Disassembly of section .text:
00000000 <_start>:
0: 85 05 00 00 00 00 test %eax,0x0 2: R_386_GOT32X foo
6: 75 08 jne 10 <_start+0x10>
8: c3 ret
9: 8d b4 26 00 00 00 00 lea 0x0(%esi,%eiz,1),%esi
10: e9 fc ff ff ff jmp 11 <_start+0x11> 11: R_386_PC32 bar
[hjl@gnu-tgl-3 tmp]$
Which request?
On 04.02.2025 13:14, H.J. Lu wrote:
> On Tue, Feb 4, 2025 at 7:16 PM Jan Beulich <jbeulich@suse.com> wrote:
>> On 04.02.2025 12:12, H.J. Lu wrote:
>>> On Tue, Feb 4, 2025 at 7:02 PM Jan Beulich <jbeulich@suse.com> wrote:
>>>> Thinking of it: With TEST being special-cased in the logic involved, I'm
>>>> also curious to learn of a code sequence where TEST would sensibly be
>>>> used (and where CMP can't be used instead).
>>>
>>> Compiler may generate TEST with GOT.
>>
>> Can it? Why would it? (IOW: Again you didn't really address my request.)
>>
>
> [hjl@gnu-tgl-3 tmp]$ cat x.c
> extern int foo __attribute__ ((weak));
>
> extern void bar (void);
>
> __attribute__ ((regparm(3)))
> void
> _start (long int p)
> {
> if (((unsigned long) &foo) & p)
> bar ();
> }
> [hjl@gnu-tgl-3 tmp]$ gcc -c -O2 -m32 x.c -mno-direct-extern-access
>
> [hjl@gnu-tgl-3 tmp]$ objdump -dwr x.o
>
> x.o: file format elf32-i386
>
>
> Disassembly of section .text:
>
> 00000000 <_start>:
> 0: 85 05 00 00 00 00 test %eax,0x0 2: R_386_GOT32X foo
> 6: 75 08 jne 10 <_start+0x10>
> 8: c3 ret
> 9: 8d b4 26 00 00 00 00 lea 0x0(%esi,%eiz,1),%esi
> 10: e9 fc ff ff ff jmp 11 <_start+0x11> 11: R_386_PC32 bar
> [hjl@gnu-tgl-3 tmp]$
>
> Which request?
"... learn of a code sequence where TEST would sensibly be used" in my
earlier reply. So yes, you provided a contrived example now. I'm afraid
I can't assign meaning to "if (((unsigned long) &foo) & p)", though.
Instead, slightly adjusting your example, I can see PUSH being used by
the compiler in exactly the way we can (but so far didn't) optimize.
That's ordinary argument passing, which I'm inclined to call "sensible".
IOW the question remains as to why optimization of TEST was added,
beyond the "because we can" reasoning. As before, ADD, SUB, and perhaps
CMP make sense when operating on pointers. For AND, OR, XOR, ADC, SBB,
and TEST I'm having a hard time seeing good uses. Since we deal with
them, we also ought to deal with IMUL in the same manner (where
possible). Since beyond these arithmetic ops we also deal with MOV, we
similarly should handle PUSH.
Jan
On Wed, Feb 5, 2025 at 7:37 PM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 04.02.2025 13:14, H.J. Lu wrote:
> > On Tue, Feb 4, 2025 at 7:16 PM Jan Beulich <jbeulich@suse.com> wrote:
> >> On 04.02.2025 12:12, H.J. Lu wrote:
> >>> On Tue, Feb 4, 2025 at 7:02 PM Jan Beulich <jbeulich@suse.com> wrote:
> >>>> Thinking of it: With TEST being special-cased in the logic involved, I'm
> >>>> also curious to learn of a code sequence where TEST would sensibly be
> >>>> used (and where CMP can't be used instead).
> >>>
> >>> Compiler may generate TEST with GOT.
> >>
> >> Can it? Why would it? (IOW: Again you didn't really address my request.)
> >>
> >
> > [hjl@gnu-tgl-3 tmp]$ cat x.c
> > extern int foo __attribute__ ((weak));
> >
> > extern void bar (void);
> >
> > __attribute__ ((regparm(3)))
> > void
> > _start (long int p)
> > {
> > if (((unsigned long) &foo) & p)
> > bar ();
> > }
> > [hjl@gnu-tgl-3 tmp]$ gcc -c -O2 -m32 x.c -mno-direct-extern-access
> >
> > [hjl@gnu-tgl-3 tmp]$ objdump -dwr x.o
> >
> > x.o: file format elf32-i386
> >
> >
> > Disassembly of section .text:
> >
> > 00000000 <_start>:
> > 0: 85 05 00 00 00 00 test %eax,0x0 2: R_386_GOT32X foo
> > 6: 75 08 jne 10 <_start+0x10>
> > 8: c3 ret
> > 9: 8d b4 26 00 00 00 00 lea 0x0(%esi,%eiz,1),%esi
> > 10: e9 fc ff ff ff jmp 11 <_start+0x11> 11: R_386_PC32 bar
> > [hjl@gnu-tgl-3 tmp]$
> >
> > Which request?
>
> "... learn of a code sequence where TEST would sensibly be used" in my
> earlier reply. So yes, you provided a contrived example now. I'm afraid
> I can't assign meaning to "if (((unsigned long) &foo) & p)", though.
It can be used to check pointer alignment.
> Instead, slightly adjusting your example, I can see PUSH being used by
> the compiler in exactly the way we can (but so far didn't) optimize.
> That's ordinary argument passing, which I'm inclined to call "sensible".
Please don't add a segment register.
> IOW the question remains as to why optimization of TEST was added,
> beyond the "because we can" reasoning. As before, ADD, SUB, and perhaps
> CMP make sense when operating on pointers. For AND, OR, XOR, ADC, SBB,
> and TEST I'm having a hard time seeing good uses. Since we deal with
> them, we also ought to deal with IMUL in the same manner (where
> possible). Since beyond these arithmetic ops we also deal with MOV, we
> similarly should handle PUSH.
>
> Jan
On 06.02.2025 03:28, H.J. Lu wrote:
> On Wed, Feb 5, 2025 at 7:37 PM Jan Beulich <jbeulich@suse.com> wrote:
>>
>> On 04.02.2025 13:14, H.J. Lu wrote:
>>> On Tue, Feb 4, 2025 at 7:16 PM Jan Beulich <jbeulich@suse.com> wrote:
>>>> On 04.02.2025 12:12, H.J. Lu wrote:
>>>>> On Tue, Feb 4, 2025 at 7:02 PM Jan Beulich <jbeulich@suse.com> wrote:
>>>>>> Thinking of it: With TEST being special-cased in the logic involved, I'm
>>>>>> also curious to learn of a code sequence where TEST would sensibly be
>>>>>> used (and where CMP can't be used instead).
>>>>>
>>>>> Compiler may generate TEST with GOT.
>>>>
>>>> Can it? Why would it? (IOW: Again you didn't really address my request.)
>>>>
>>>
>>> [hjl@gnu-tgl-3 tmp]$ cat x.c
>>> extern int foo __attribute__ ((weak));
>>>
>>> extern void bar (void);
>>>
>>> __attribute__ ((regparm(3)))
>>> void
>>> _start (long int p)
>>> {
>>> if (((unsigned long) &foo) & p)
>>> bar ();
>>> }
>>> [hjl@gnu-tgl-3 tmp]$ gcc -c -O2 -m32 x.c -mno-direct-extern-access
>>>
>>> [hjl@gnu-tgl-3 tmp]$ objdump -dwr x.o
>>>
>>> x.o: file format elf32-i386
>>>
>>>
>>> Disassembly of section .text:
>>>
>>> 00000000 <_start>:
>>> 0: 85 05 00 00 00 00 test %eax,0x0 2: R_386_GOT32X foo
>>> 6: 75 08 jne 10 <_start+0x10>
>>> 8: c3 ret
>>> 9: 8d b4 26 00 00 00 00 lea 0x0(%esi,%eiz,1),%esi
>>> 10: e9 fc ff ff ff jmp 11 <_start+0x11> 11: R_386_PC32 bar
>>> [hjl@gnu-tgl-3 tmp]$
>>>
>>> Which request?
>>
>> "... learn of a code sequence where TEST would sensibly be used" in my
>> earlier reply. So yes, you provided a contrived example now. I'm afraid
>> I can't assign meaning to "if (((unsigned long) &foo) & p)", though.
>
> It can be used to check pointer alignment.
Hmm, yes, I didn't think of that. Yet then TESTB (and perhaps even TESTW)
would also want to be supported.
>> Instead, slightly adjusting your example, I can see PUSH being used by
>> the compiler in exactly the way we can (but so far didn't) optimize.
>> That's ordinary argument passing, which I'm inclined to call "sensible".
>
> Please don't add a segment register.
Actually I have to re-raise the why question: We use CS: in some of the
multi-byte NOPs we emit, both for 32- and for 64-bit.
Plus, as said before, I view it as pretty undesirable to split a single
insn into two (NOP of appropriate size followed by shrunk insn).
Alternatively for 32-bit's PUSH we could use an address size override,
albeit this doesn't look very desirable either. For 64-bit we could use
dummy REX prefixes, unless we're dealing with a REX2 encoding. For REX2
forms of PUSH we could either choose to not use REX2, or again use an
address override. An address size override could also be used for
immediate-to-register MOV.
Also: If anything, these prefixes could only gain hint-like meaning
anyway (like two of them once did for conditional jumps), so there would
not be any functional issue. As for their use in multi-byte NOPs, we can
deal with eventual performance issues once we know how bad things are.
Jan
On Thu, Feb 6, 2025 at 8:08 PM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 06.02.2025 03:28, H.J. Lu wrote:
> > On Wed, Feb 5, 2025 at 7:37 PM Jan Beulich <jbeulich@suse.com> wrote:
> >>
> >> On 04.02.2025 13:14, H.J. Lu wrote:
> >>> On Tue, Feb 4, 2025 at 7:16 PM Jan Beulich <jbeulich@suse.com> wrote:
> >>>> On 04.02.2025 12:12, H.J. Lu wrote:
> >>>>> On Tue, Feb 4, 2025 at 7:02 PM Jan Beulich <jbeulich@suse.com> wrote:
> >>>>>> Thinking of it: With TEST being special-cased in the logic involved, I'm
> >>>>>> also curious to learn of a code sequence where TEST would sensibly be
> >>>>>> used (and where CMP can't be used instead).
> >>>>>
> >>>>> Compiler may generate TEST with GOT.
> >>>>
> >>>> Can it? Why would it? (IOW: Again you didn't really address my request.)
> >>>>
> >>>
> >>> [hjl@gnu-tgl-3 tmp]$ cat x.c
> >>> extern int foo __attribute__ ((weak));
> >>>
> >>> extern void bar (void);
> >>>
> >>> __attribute__ ((regparm(3)))
> >>> void
> >>> _start (long int p)
> >>> {
> >>> if (((unsigned long) &foo) & p)
> >>> bar ();
> >>> }
> >>> [hjl@gnu-tgl-3 tmp]$ gcc -c -O2 -m32 x.c -mno-direct-extern-access
> >>>
> >>> [hjl@gnu-tgl-3 tmp]$ objdump -dwr x.o
> >>>
> >>> x.o: file format elf32-i386
> >>>
> >>>
> >>> Disassembly of section .text:
> >>>
> >>> 00000000 <_start>:
> >>> 0: 85 05 00 00 00 00 test %eax,0x0 2: R_386_GOT32X foo
> >>> 6: 75 08 jne 10 <_start+0x10>
> >>> 8: c3 ret
> >>> 9: 8d b4 26 00 00 00 00 lea 0x0(%esi,%eiz,1),%esi
> >>> 10: e9 fc ff ff ff jmp 11 <_start+0x11> 11: R_386_PC32 bar
> >>> [hjl@gnu-tgl-3 tmp]$
> >>>
> >>> Which request?
> >>
> >> "... learn of a code sequence where TEST would sensibly be used" in my
> >> earlier reply. So yes, you provided a contrived example now. I'm afraid
> >> I can't assign meaning to "if (((unsigned long) &foo) & p)", though.
> >
> > It can be used to check pointer alignment.
>
> Hmm, yes, I didn't think of that. Yet then TESTB (and perhaps even TESTW)
> would also want to be supported.
>
> >> Instead, slightly adjusting your example, I can see PUSH being used by
> >> the compiler in exactly the way we can (but so far didn't) optimize.
> >> That's ordinary argument passing, which I'm inclined to call "sensible".
> >
> > Please don't add a segment register.
>
> Actually I have to re-raise the why question: We use CS: in some of the
> multi-byte NOPs we emit, both for 32- and for 64-bit.
CS is better than ES.
> Plus, as said before, I view it as pretty undesirable to split a single
> insn into two (NOP of appropriate size followed by shrunk insn).
>
> Alternatively for 32-bit's PUSH we could use an address size override,
> albeit this doesn't look very desirable either. For 64-bit we could use
> dummy REX prefixes, unless we're dealing with a REX2 encoding. For REX2
> forms of PUSH we could either choose to not use REX2, or again use an
> address override. An address size override could also be used for
> immediate-to-register MOV.
>
> Also: If anything, these prefixes could only gain hint-like meaning
> anyway (like two of them once did for conditional jumps), so there would
> not be any functional issue. As for their use in multi-byte NOPs, we can
> deal with eventual performance issues once we know how bad things are.
>
> Jan
@@ -1209,6 +1209,10 @@ elf_i386_tls_transition (struct bfd_link
to
test $foo, %reg1
and convert
+ push foo@GOT[(%reg)]
+ to
+ push $foo
+ and convert
binop foo@GOT[(%reg1)], %reg2
to
binop $foo, %reg2
@@ -1233,7 +1237,7 @@ elf_i386_convert_load_reloc (bfd *abfd,
unsigned int addend;
unsigned int nop;
bfd_vma nop_offset;
- bool is_pic;
+ bool is_pic, is_branch = false;
bool to_reloc_32;
bool abs_symbol;
unsigned int r_type;
@@ -1301,6 +1305,23 @@ elf_i386_convert_load_reloc (bfd *abfd,
opcode = bfd_get_8 (abfd, contents + roff - 2);
+ if (opcode == 0xff)
+ {
+ switch (modrm & 0x38)
+ {
+ case 0x10: /* CALL */
+ case 0x20: /* JMP */
+ is_branch = true;
+ break;
+
+ case 0x30: /* PUSH */
+ break;
+
+ default:
+ return true;
+ }
+ }
+
/* Convert to R_386_32 if PIC is false or there is no base
register. */
to_reloc_32 = !is_pic || baseless;
@@ -1311,7 +1332,7 @@ elf_i386_convert_load_reloc (bfd *abfd,
reloc. */
if (h == NULL)
{
- if (opcode == 0x0ff)
+ if (is_branch)
/* Convert "call/jmp *foo@GOT[(%reg)]". */
goto convert_branch;
else
@@ -1327,7 +1348,7 @@ elf_i386_convert_load_reloc (bfd *abfd,
&& !eh->linker_def
&& local_ref)
{
- if (opcode == 0xff)
+ if (is_branch)
{
/* No direct branch to 0 for PIC. */
if (is_pic)
@@ -1343,7 +1364,7 @@ elf_i386_convert_load_reloc (bfd *abfd,
}
}
- if (opcode == 0xff)
+ if (is_branch)
{
/* We have "call/jmp *foo@GOT[(%reg)]". */
if ((h->root.type == bfd_link_hash_defined
@@ -1399,7 +1420,8 @@ elf_i386_convert_load_reloc (bfd *abfd,
else
{
/* We have "mov foo@GOT[(%re1g)], %reg2",
- "test %reg1, foo@GOT(%reg2)" and
+ "test %reg1, foo@GOT(%reg2)",
+ "push foo@GOT[(%reg)]", or
"binop foo@GOT[(%reg1)], %reg2".
Avoid optimizing _DYNAMIC since ld.so may use its
@@ -1460,6 +1482,13 @@ elf_i386_convert_load_reloc (bfd *abfd,
modrm = 0xc0 | ((modrm & 0x38) >> 3) | (opcode & 0x38);
opcode = 0x81;
}
+ else if (opcode == 0xff)
+ {
+ /* Convert "push foo@GOT(%reg)" to
+ "push $foo". */
+ modrm = 0x68; /* Really the opcode. */
+ opcode = 0x26; /* Really a meaningless %es: prefix. */
+ }
else
return true;
@@ -1739,13 +1739,16 @@ elf_x86_64_need_pic (struct bfd_link_inf
}
/* Move the R bits to the B bits in EVEX payload byte 1. */
-static unsigned int evex_move_r_to_b (unsigned int byte1)
+static unsigned int evex_move_r_to_b (unsigned int byte1, bool copy)
{
byte1 = (byte1 & ~(1 << 5)) | ((byte1 & (1 << 7)) >> 2); /* R3 -> B3 */
byte1 = (byte1 & ~(1 << 3)) | ((~byte1 & (1 << 4)) >> 1); /* R4 -> B4 */
/* Set both R bits, as they're inverted. */
- return byte1 | (1 << 4) | (1 << 7);
+ if (!copy)
+ byte1 |= (1 << 4) | (1 << 7);
+
+ return byte1;
}
/* With the local symbol, foo, we convert
@@ -1762,10 +1765,14 @@ static unsigned int evex_move_r_to_b (un
to
test $foo, %reg
and convert
+ push foo@GOTPCREL(%rip)
+ to
+ push $foo
+ and convert
binop foo@GOTPCREL(%rip), %reg
to
binop $foo, %reg
- where binop is one of adc, add, and, cmp, or, sbb, sub, xor
+ where binop is one of adc, add, and, cmp, imul, or, sbb, sub, xor
instructions. */
static bool
@@ -1781,6 +1788,7 @@ elf_x86_64_convert_load_reloc (bfd *abfd
bool is_pic;
bool no_overflow;
bool relocx;
+ bool is_branch = false;
bool to_reloc_pc32;
bool abs_symbol;
bool local_ref;
@@ -1873,6 +1881,23 @@ elf_x86_64_convert_load_reloc (bfd *abfd
r_symndx = htab->r_sym (irel->r_info);
opcode = bfd_get_8 (abfd, contents + roff - 2);
+ modrm = bfd_get_8 (abfd, contents + roff - 1);
+ if (opcode == 0xff)
+ {
+ switch (modrm & 0x38)
+ {
+ case 0x10: /* CALL */
+ case 0x20: /* JMP */
+ is_branch = true;
+ break;
+
+ case 0x30: /* PUSH */
+ break;
+
+ default:
+ return true;
+ }
+ }
/* Convert mov to lea since it has been done for a while. */
if (opcode != 0x8b)
@@ -1890,7 +1915,7 @@ elf_x86_64_convert_load_reloc (bfd *abfd
3. no_overflow is true.
4. PIC.
*/
- to_reloc_pc32 = (opcode == 0xff
+ to_reloc_pc32 = (is_branch
|| !relocx
|| no_overflow
|| is_pic);
@@ -1942,7 +1967,7 @@ elf_x86_64_convert_load_reloc (bfd *abfd
&& !eh->linker_def
&& local_ref))
{
- if (opcode == 0xff)
+ if (is_branch)
{
/* Skip for branch instructions since R_X86_64_PC32
may overflow. */
@@ -2009,7 +2034,7 @@ elf_x86_64_convert_load_reloc (bfd *abfd
return true;
convert:
- if (opcode == 0xff)
+ if (is_branch)
{
/* We have "call/jmp *foo@GOTPCREL(%rip)". */
unsigned int nop;
@@ -2018,7 +2043,6 @@ elf_x86_64_convert_load_reloc (bfd *abfd
/* Convert R_X86_64_GOTPCRELX and R_X86_64_REX_GOTPCRELX to
R_X86_64_PC32. */
- modrm = bfd_get_8 (abfd, contents + roff - 1);
if (modrm == 0x25)
{
/* Convert to "jmp foo nop". */
@@ -2064,11 +2088,12 @@ elf_x86_64_convert_load_reloc (bfd *abfd
}
else if (r_type == R_X86_64_CODE_6_GOTPCRELX && opcode != 0x8b)
{
+ bool move_v_r = false;
+
/* R_X86_64_PC32 isn't supported. */
if (to_reloc_pc32)
return true;
- modrm = bfd_get_8 (abfd, contents + roff - 1);
if (opcode == 0x85)
{
/* Convert "ctest<cc> %reg, foo@GOTPCREL(%rip)" to
@@ -2094,6 +2119,23 @@ elf_x86_64_convert_load_reloc (bfd *abfd
modrm = 0xc0 | ((modrm & 0x38) >> 3) | (opcode & 0x38);
opcode = 0x81;
}
+ else if (opcode == 0xaf)
+ {
+ if (!(evex[2] & 0x10))
+ {
+ /* Convert "imul foo@GOTPCREL(%rip), %reg" to
+ "imul $foo, %reg, %reg". */
+ modrm = 0xc0 | ((modrm & 0x38) >> 3) | (modrm & 0x38);
+ }
+ else
+ {
+ /* Convert "imul foo@GOTPCREL(%rip), %reg1, %reg2" to
+ "imul $foo, %reg1, %reg2". */
+ modrm = 0xc0 | ((modrm & 0x38) >> 3) | (~evex[1] & 0x38);
+ move_v_r = true;
+ }
+ opcode = 0x69;
+ }
else
return true;
@@ -2119,7 +2161,23 @@ elf_x86_64_convert_load_reloc (bfd *abfd
bfd_put_8 (abfd, opcode, contents + roff - 2);
bfd_put_8 (abfd, modrm, contents + roff - 1);
- evex[0] = evex_move_r_to_b (evex[0]);
+ evex[0] = evex_move_r_to_b (evex[0], opcode == 0x69 && !move_v_r);
+ if (move_v_r)
+ {
+ /* Move the top two V bits to the R bits in EVEX payload byte 1.
+ Note that evex_move_r_to_b() set both R bits. */
+ if (!(evex[1] & (1 << 6)))
+ evex[0] &= ~(1 << 7); /* V3 -> R3 */
+ if (!(evex[2] & (1 << 3)))
+ evex[0] &= ~(1 << 4); /* V4 -> R4 */
+ /* Set all V bits, as they're inverted. */
+ evex[1] |= 0xf << 3;
+ evex[2] |= 1 << 3;
+ /* Clear the ND (ZU) bit (it ought to be ignored anyway). */
+ evex[2] &= ~(1 << 4);
+ bfd_put_8 (abfd, evex[2], contents + roff - 3);
+ bfd_put_8 (abfd, evex[1], contents + roff - 4);
+ }
bfd_put_8 (abfd, evex[0], contents + roff - 5);
/* No addend for R_X86_64_32/R_X86_64_32S relocations. */
@@ -2162,7 +2220,10 @@ elf_x86_64_convert_load_reloc (bfd *abfd
{
if (bfd_get_8 (abfd, contents + roff - 4) == 0xd5)
{
- rex2 = bfd_get_8 (abfd, contents + roff - 3);
+ /* Make sure even an all-zero payload leaves a non-zero value
+ in the variable. */
+ rex2 = bfd_get_8 (abfd, contents + roff - 3) | 0x100;
+ rex2_mask |= 0x100;
rex_w = (rex2 & REX_W) != 0;
}
else if (bfd_get_8 (abfd, contents + roff - 4) == 0x0f)
@@ -2195,7 +2256,6 @@ elf_x86_64_convert_load_reloc (bfd *abfd
/* Convert "mov foo@GOTPCREL(%rip), %reg" to
"mov $foo, %reg". */
opcode = 0xc7;
- modrm = bfd_get_8 (abfd, contents + roff - 1);
modrm = 0xc0 | (modrm & 0x38) >> 3;
if (rex_w && ABI_64_P (link_info->output_bfd))
{
@@ -2222,7 +2282,6 @@ elf_x86_64_convert_load_reloc (bfd *abfd
if (to_reloc_pc32)
return true;
- modrm = bfd_get_8 (abfd, contents + roff - 1);
if (opcode == 0x85)
{
/* Convert "test %reg, foo@GOTPCREL(%rip)" to
@@ -2237,6 +2296,39 @@ elf_x86_64_convert_load_reloc (bfd *abfd
modrm = 0xc0 | ((modrm & 0x38) >> 3) | (opcode & 0x38);
opcode = 0x81;
}
+ else if (opcode == 0xaf && (rex2 & (REX2_M << 4)))
+ {
+ /* Convert "imul foo@GOTPCREL(%rip), %reg" to
+ "imul $foo, %reg, %reg". */
+ modrm = 0xc0 | ((modrm & 0x38) >> 3) | (modrm & 0x38);
+ rex_mask = 0;
+ rex2_mask = REX2_M << 4;
+ opcode = 0x69;
+ }
+ else if (opcode == 0xff && !(rex2 & (REX2_M << 4)))
+ {
+ /* Convert "push foo@GOTPCREL(%rip)" to
+ "push $foo". */
+ bfd_put_8 (abfd, 0x68, contents + roff - 1);
+ if (rex)
+ {
+ bfd_put_8 (abfd, 0x26, contents + roff - 3);
+ bfd_put_8 (abfd, rex, contents + roff - 2);
+ }
+ else if (rex2)
+ {
+ bfd_put_8 (abfd, 0x26, contents + roff - 4);
+ bfd_put_8 (abfd, 0xd5, contents + roff - 3);
+ bfd_put_8 (abfd, rex2, contents + roff - 2);
+ }
+ else
+ bfd_put_8 (abfd, 0x26, contents + roff - 2);
+
+ r_type = R_X86_64_32S;
+ /* No addend for R_X86_64_32S relocations. */
+ irel->r_addend = 0;
+ goto finish;
+ }
else
return true;
@@ -2297,6 +2389,7 @@ elf_x86_64_convert_load_reloc (bfd *abfd
}
}
+ finish:
*r_type_p = r_type;
irel->r_info = htab->r_info (r_symndx,
r_type | R_X86_64_converted_reloc_bit);
@@ -4378,7 +4471,7 @@ elf_x86_64_relocate_section (bfd *output
continue;
}
- byte1 = evex_move_r_to_b (byte1);
+ byte1 = evex_move_r_to_b (byte1, false);
bfd_put_8 (output_bfd, byte1, contents + roff - 5);
bfd_put_8 (output_bfd, 0x81, contents + roff - 2);
bfd_put_8 (output_bfd, 0xc0 | reg, contents + roff - 1);
@@ -12928,9 +12928,9 @@ output_disp (fragS *insn_start_frag, off
else if (object_64bit)
continue;
- /* Check for "call/jmp *mem", "mov mem, %reg", "movrs mem, %reg",
- "test %reg, mem" and "binop mem, %reg" where binop
- is one of adc, add, and, cmp, or, sbb, sub, xor
+ /* Check for "call/jmp *mem", "push mem", "mov mem, %reg",
+ "movrs mem, %reg", "test %reg, mem" and "binop mem, %reg" where
+ binop is one of adc, add, and, cmp, or, sbb, sub, xor, or imul
instructions without data prefix. Always generate
R_386_GOT32X for "sym*GOT" operand in 32-bit mode. */
unsigned int space = dot_insn () ? i.insn_opcode_space
@@ -12940,7 +12940,7 @@ output_disp (fragS *insn_start_frag, off
|| (i.rm.mode == 0 && i.rm.regmem == 5))
&& ((space == SPACE_BASE
&& i.tm.base_opcode == 0xff
- && (i.rm.reg == 2 || i.rm.reg == 4))
+ && (i.rm.reg == 2 || i.rm.reg == 4 || i.rm.reg == 6))
|| ((space == SPACE_BASE
|| space == SPACE_0F38
|| space == SPACE_MAP4)
@@ -12949,7 +12949,13 @@ output_disp (fragS *insn_start_frag, off
|| space == SPACE_MAP4)
&& (i.tm.base_opcode == 0x85
|| (i.tm.base_opcode
- | (i.operands > 2 ? 0x3a : 0x38)) == 0x3b))))
+ | (i.operands > 2 ? 0x3a : 0x38)) == 0x3b))
+ || (((space == SPACE_0F
+ /* Because of the 0F prefix, no suitable relocation
+ exists for this unless it's REX2-encoded. */
+ && is_apx_rex2_encoding ())
+ || space == SPACE_MAP4)
+ && i.tm.base_opcode == 0xaf)))
{
if (object_64bit)
{
@@ -373,6 +373,8 @@ run_dump_test "load5a"
run_dump_test "load5b"
run_dump_test "load6"
run_dump_test "load7"
+run_dump_test "load8a"
+run_dump_test "load8b"
run_dump_test "pr19175"
run_dump_test "pr19615"
run_dump_test "pr19636-1a"
@@ -0,0 +1,20 @@
+ .data
+ .type bar, @object
+bar:
+ .byte 1
+ .size bar, .-bar
+ .globl foo
+ .type foo, @object
+foo:
+ .byte 1
+ .size foo, .-foo
+ .text
+ .globl _start
+ .type _start, @function
+_start:
+ push bar@GOT(%ecx)
+ push foo@GOT(%edx)
+ .ifndef PIC
+ push foo@GOT
+ .endif
+ .size _start, .-_start
@@ -0,0 +1,14 @@
+#source: load8.s
+#as: --32 -mrelax-relocations=yes
+#ld: -melf_i386 -z noseparate-code
+#objdump: -dw
+
+.*: +file format .*
+
+Disassembly of section .text:
+
+0+8048074 <_start>:
+[ ]*[a-f0-9]+: 26 68 86 90 04 08 es push \$0x8049086
+[ ]*[a-f0-9]+: 26 68 87 90 04 08 es push \$0x8049087
+[ ]*[a-f0-9]+: 26 68 87 90 04 08 es push \$0x8049087
+#pass
@@ -0,0 +1,13 @@
+#source: load8.s
+#as: --32 -mshared -mrelax-relocations=yes --defsym PIC=1
+#ld: -melf_i386 -shared -z noseparate-code
+#objdump: -dw
+
+.*: +file format .*
+
+Disassembly of section .text:
+
+0+[0-9a-f]+ <_start>:
+[ ]*[0-9a-f]+: ff b1 f8 ff ff ff push -0x8\(%ecx\)
+[ ]*[0-9a-f]+: ff b2 fc ff ff ff push -0x4\(%edx\)
+#pass
@@ -118,5 +118,11 @@ _start:
sub %rbp, bar@GOTPCREL(%rip), %r21
xor %rsi, bar@GOTPCREL(%rip), %r22
+ imul bar@GOTPCREL(%rip), %r17
+ {nf} imul bar@GOTPCREL(%rip), %r17
+ imul bar@GOTPCREL(%rip), %r17, %rdx
+ imul bar@GOTPCREL(%rip), %rcx, %r18
+ {rex2} pushq bar@GOTPCREL(%rip)
+
.size _start, .-_start
.p2align 12, 0x90
@@ -115,4 +115,9 @@ Disassembly of section .text:
+[a-f0-9]+: 62 f4 dc 10 19 25 74 0c 20 00 sbb %rsp,0x200c74\(%rip\),%r20 # 602000 <.*>
+[a-f0-9]+: 62 f4 d4 10 29 2d 6a 0c 20 00 sub %rbp,0x200c6a\(%rip\),%r21 # 602000 <.*>
+[a-f0-9]+: 62 f4 cc 10 81 f6 20 20 60 00 xor \$0x602020,%rsi,%r22
+ +[a-f0-9]+: d5 58 69 c9 20 20 60 00 imul \$0x602020,%r17,%r17
+ +[a-f0-9]+: 62 ec fc 0c 69 c9 20 20 60 00 \{nf\} imul \$0x602020,%r17,%r17
+ +[a-f0-9]+: 62 fc fc 08 69 d1 20 20 60 00 imul \$0x602020,%r17,%rdx
+ +[a-f0-9]+: 62 e4 fc 08 69 d1 20 20 60 00 imul \$0x602020,%rcx,%r18
+ +[a-f0-9]+: 26 d5 00 68 20 20 60 00 es \{rex2 0x0\} push \$0x602020
#pass
@@ -108,4 +108,9 @@ Disassembly of section .text:
+[a-f0-9]+: 62 f4 dc 10 19 25 54 0d 20 00 sbb %rsp,0x200d54\(%rip\),%r20 # 2020e0 <.*>
+[a-f0-9]+: 62 f4 d4 10 29 2d 4a 0d 20 00 sub %rbp,0x200d4a\(%rip\),%r21 # 2020e0 <.*>
+[a-f0-9]+: 62 f4 cc 10 31 35 40 0d 20 00 xor %rsi,0x200d40\(%rip\),%r22 # 2020e0 <.*>
+ +[a-f0-9]+: d5 c8 af 0d 38 0d 20 00 imul 0x200d38\(%rip\),%r17 # 2020e0 <.*>
+ +[a-f0-9]+: 62 e4 fc 0c af 0d 2e 0d 20 00 \{nf\} imul 0x200d2e\(%rip\),%r17 # 2020e0 <.*>
+ +[a-f0-9]+: 62 e4 ec 18 af 0d 24 0d 20 00 imul 0x200d24\(%rip\),%r17,%rdx # 2020e0 <.*>
+ +[a-f0-9]+: 62 f4 ec 10 af 0d 1a 0d 20 00 imul 0x200d1a\(%rip\),%rcx,%r18 # 2020e0 <.*>
+ +[a-f0-9]+: d5 00 ff 35 12 0d 20 00 \{rex2 0x0\} push 0x200d12\(%rip\) # 2020e0 <.*>
#pass
@@ -108,4 +108,9 @@ Disassembly of section .text:
+[a-f0-9]+: 62 f4 dc 10 19 25 e4 0c 20 00 sbb %rsp,0x200ce4\(%rip\),%r20 # 202070 <.*>
+[a-f0-9]+: 62 f4 d4 10 29 2d da 0c 20 00 sub %rbp,0x200cda\(%rip\),%r21 # 202070 <.*>
+[a-f0-9]+: 62 f4 cc 10 31 35 d0 0c 20 00 xor %rsi,0x200cd0\(%rip\),%r22 # 202070 <.*>
+ +[a-f0-9]+: d5 c8 af 0d c8 0c 20 00 imul 0x200cc8\(%rip\),%r17 # 202070 <.*>
+ +[a-f0-9]+: 62 e4 fc 0c af 0d be 0c 20 00 \{nf\} imul 0x200cbe\(%rip\),%r17 # 202070 <.*>
+ +[a-f0-9]+: 62 e4 ec 18 af 0d b4 0c 20 00 imul 0x200cb4\(%rip\),%r17,%rdx # 202070 <.*>
+ +[a-f0-9]+: 62 f4 ec 10 af 0d aa 0c 20 00 imul 0x200caa\(%rip\),%rcx,%r18 # 202070 <.*>
+ +[a-f0-9]+: d5 00 ff 35 a2 0c 20 00 \{rex2 0x0\} push 0x200ca2\(%rip\) # 202070 <.*>
#pass
@@ -0,0 +1,17 @@
+ .data
+ .type bar, @object
+bar:
+ .byte 1
+ .size bar, .-bar
+ .globl foo
+ .type foo, @object
+foo:
+ .byte 1
+ .size foo, .-foo
+ .text
+ .globl _start
+ .type _start, @function
+_start:
+ pushq bar@GOTPCREL(%rip)
+ {rex} pushq foo@GOTPCREL(%rip)
+ .size _start, .-_start
@@ -0,0 +1,15 @@
+#source: load5.s
+#as: --64 -mrelax-relocations=yes
+#ld: -melf_x86_64
+#objdump: -dw
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+#...
+[a-f0-9]+ <_start>:
+[ ]*[a-f0-9]+: 26 68 ([0-9a-f]{2} ){4} * es push \$0x[a-f0-9]+
+[ ]*[a-f0-9]+: 26 40 68 ([0-9a-f]{2} ){4} * es rex push \$0x[a-f0-9]+
+#pass
@@ -0,0 +1,15 @@
+#source: load5.s
+#as: --64 -mrelax-relocations=yes
+#ld: -shared -melf_x86_64
+#objdump: -dw
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+#...
+[a-f0-9]+ <_start>:
+[ ]*[a-f0-9]+: ff 35 ([0-9a-f]{2} ){4} * push +0x[a-f0-9]+\(%rip\) # [a-f0-9]+ <.*>
+[ ]*[a-f0-9]+: 40 ff 35 ([0-9a-f]{2} ){4} * rex push 0x[a-f0-9]+\(%rip\) # [a-f0-9]+ <.*>
+#pass
@@ -654,6 +654,8 @@ run_dump_test "load2"
run_dump_test "load3a"
run_dump_test "load3b"
run_dump_test "load4"
+run_dump_test "load5a"
+run_dump_test "load5b"
run_dump_test "call1a"
run_dump_test "call1b"
run_dump_test "call1c"