[v2] ld/ELF: Add --image-base command line option to the ELF linker

Message ID gTw_y4zQzrQWCgeIfJpHXVBqOv79yYqSmcZlv4xQZLtoM8gj1zhVNpha9jaJ6ycwFMEk0blcJbLZYqJdtOn7TvfdmEhhR6XoVAc3BdkT2WI=@protonmail.com
State New
Headers
Series [v2] ld/ELF: Add --image-base command line option to the ELF linker |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 success Test passed
linaro-tcwg-bot/tcwg_binutils_check--master-arm success Test passed

Commit Message

Hakan Candar Oct. 28, 2024, 11:01 a.m. UTC
  LLD has dropped the option -Ttext-segment for specifying image base
addresses, instead forcing the use of the --image-base option for both
ELF and PE targets. As it stands, GNU LD and LLVM LLD are incompatible,
having two different options for the same functionality.

This patch enables the use of --image-base on ELF targets, advancing
consistency and compatibility.

See: https://reviews.llvm.org/D70468
     https://maskray.me/blog/2020-11-15-explain-gnu-linker-options#address-related
     https://sourceware.org/bugzilla/show_bug.cgi?id=25207

Moreover, a new test has been added to ensure -z separate-code behaviour
when used with -Ttext-segment stays the same. When this combination is
used, -Ttext-segment sets the address of the first segment (R), not the
text segment (RX), and like with -z noseparate-code, no segments lesser
than the specified address are created. If this behaviour was to change,
the first (R) segment of the ELF file would begin in a lesser address
than the specified text (RX) segment, breaking traditional use of this
option for specifying image base address.

---
Changes in v2:
  - Fix the testcase for failing targets

v1: https://sourceware.org/pipermail/binutils/2024-October/137412.html
---

ld/
	PR 25207
	* emultempl/pe.em: Remove redeclaration of --image-base.
	* emultempl/pep.em: Ditto.
	* emultempl/beos.em: Ditto.
	* ldlex.h (enum option_values): Move OPTION_IMAGE_BASE.
	* lexsup.c (ld_options): Add --image-base.
	(parse_args): On ELF, make --image-base follow -Ttext-segment behaviour.
	* ld.texi: Document the new option and re-define -Ttext-segment
	with established behaviour.
	* testsuite/ld-elf/pr25207.d: New file.
	* testsuite/ld-elf/pr25207.s: New file.
	* NEWS: Mention the new feature.

Signed-off-by: Hakan Candar <hakancandar@protonmail.com>
---
 ld/NEWS                       |  3 +++
 ld/emultempl/beos.em          |  1 -
 ld/emultempl/pe.em            |  1 -
 ld/emultempl/pep.em           |  1 -
 ld/ld.texi                    | 26 ++++++++++++++------------
 ld/ldlex.h                    |  2 +-
 ld/lexsup.c                   |  5 +++++
 ld/testsuite/ld-elf/pr25207.d | 11 +++++++++++
 ld/testsuite/ld-elf/pr25207.s | 13 +++++++++++++
 9 files changed, 47 insertions(+), 16 deletions(-)
 create mode 100644 ld/testsuite/ld-elf/pr25207.d
 create mode 100644 ld/testsuite/ld-elf/pr25207.s
  

Comments

Alan Modra Oct. 29, 2024, 9:10 a.m. UTC | #1
pushed
  
H.J. Lu Oct. 30, 2024, 9:01 p.m. UTC | #2
On Tue, Oct 29, 2024 at 5:10 PM Alan Modra <amodra@gmail.com> wrote:
>
> pushed
>
> --
> Alan Modra

On Linux/x86-64, I got

FAIL: ld-elf/pr25207
  
H.J. Lu Oct. 30, 2024, 9:08 p.m. UTC | #3
On Thu, Oct 31, 2024 at 5:01 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Tue, Oct 29, 2024 at 5:10 PM Alan Modra <amodra@gmail.com> wrote:
> >
> > pushed
> >
> > --
> > Alan Modra
>
> On Linux/x86-64, I got
>
> FAIL: ld-elf/pr25207
>

This is what I checked in.
  

Patch

diff --git a/ld/NEWS b/ld/NEWS
index 1f14dd6bc77..f7de85bd3ce 100644
--- a/ld/NEWS
+++ b/ld/NEWS
@@ -2,6 +2,9 @@ 
 
 Changes in 2.44:
 
+* Add --image-base=<ADDR> option to the ELF linker to behave the same
+  as -Ttext-segment for compatibility with LLD.
+
 * Add a "--build-id=xx" option, if built with the xxhash library.  This
   produces a 128-bit hash, 2-4x faster than md5 or sha1.
 
diff --git a/ld/emultempl/beos.em b/ld/emultempl/beos.em
index 29c386c61f8..54d03537de6 100644
--- a/ld/emultempl/beos.em
+++ b/ld/emultempl/beos.em
@@ -82,7 +82,6 @@  gld${EMULATION_NAME}_add_options
     {"dll", no_argument, NULL, OPTION_DLL},
     {"file-alignment", required_argument, NULL, OPTION_FILE_ALIGNMENT},
     {"heap", required_argument, NULL, OPTION_HEAP},
-    {"image-base", required_argument, NULL, OPTION_IMAGE_BASE},
     {"major-image-version", required_argument, NULL, OPTION_MAJOR_IMAGE_VERSION},
     {"major-os-version", required_argument, NULL, OPTION_MAJOR_OS_VERSION},
     {"major-subsystem-version", required_argument, NULL, OPTION_MAJOR_SUBSYSTEM_VERSION},
diff --git a/ld/emultempl/pe.em b/ld/emultempl/pe.em
index 9ab5e6ef481..869ffd3f623 100644
--- a/ld/emultempl/pe.em
+++ b/ld/emultempl/pe.em
@@ -251,7 +251,6 @@  gld${EMULATION_NAME}_add_options
     {"dll", no_argument, NULL, OPTION_DLL},
     {"file-alignment", required_argument, NULL, OPTION_FILE_ALIGNMENT},
     {"heap", required_argument, NULL, OPTION_HEAP},
-    {"image-base", required_argument, NULL, OPTION_IMAGE_BASE},
     {"major-image-version", required_argument, NULL, OPTION_MAJOR_IMAGE_VERSION},
     {"major-os-version", required_argument, NULL, OPTION_MAJOR_OS_VERSION},
     {"major-subsystem-version", required_argument, NULL, OPTION_MAJOR_SUBSYSTEM_VERSION},
diff --git a/ld/emultempl/pep.em b/ld/emultempl/pep.em
index 00c4ea9e15a..c225d052fb8 100644
--- a/ld/emultempl/pep.em
+++ b/ld/emultempl/pep.em
@@ -261,7 +261,6 @@  gld${EMULATION_NAME}_add_options
     {"dll", no_argument, NULL, OPTION_DLL},
     {"file-alignment", required_argument, NULL, OPTION_FILE_ALIGNMENT},
     {"heap", required_argument, NULL, OPTION_HEAP},
-    {"image-base", required_argument, NULL, OPTION_IMAGE_BASE},
     {"major-image-version", required_argument, NULL, OPTION_MAJOR_IMAGE_VERSION},
     {"major-os-version", required_argument, NULL, OPTION_MAJOR_OS_VERSION},
     {"major-subsystem-version", required_argument, NULL, OPTION_MAJOR_SUBSYSTEM_VERSION},
diff --git a/ld/ld.texi b/ld/ld.texi
index b5733df0031..c66ac5de4b2 100644
--- a/ld/ld.texi
+++ b/ld/ld.texi
@@ -2732,6 +2732,19 @@  for compatibility with other linkers, you may omit the leading
 should be no white space between @var{sectionname}, the equals
 sign (``@key{=}''), and @var{org}.
 
+@kindex --image-base=@var{org}
+@item --image-base=@var{org}
+@cindex image base address, cmd line
+When using ELF, same as @option{-Ttext-segment}, with both options effectively
+setting the base address of the ELF executable.
+
+When using PE, use @var{value} as the base address of your program or dll.
+This is the lowest memory location that will be used when your program or
+dll is loaded.  To reduce the need to relocate and improve performance of
+your dlls, each should have a unique base address and not overlap any
+other dlls.  The default is 0x400000 for executables, and 0x10000000
+for dlls.
+
 @kindex -Tbss=@var{org}
 @kindex -Tdata=@var{org}
 @kindex -Ttext=@var{org}
@@ -2746,7 +2759,7 @@  Same as @option{--section-start}, with @code{.bss}, @code{.data} or
 @item -Ttext-segment=@var{org}
 @cindex text segment origin, cmd line
 When creating an ELF executable, it will set the address of the first
-byte of the text segment.  Note that when @option{-pie} is used with
+byte of the first segment.  Note that when @option{-pie} is used with
 @option{-Ttext-segment=@var{org}}, the output executable is marked
 ET_EXEC so that the address of the first byte of the text segment will
 be guaranteed to be @var{org} at run time.
@@ -3392,17 +3405,6 @@  to be used as heap for this program.  The default is 1MB reserved, 4K
 committed.
 [This option is specific to the i386 PE targeted port of the linker]
 
-@cindex image base
-@kindex --image-base
-@item --image-base @var{value}
-Use @var{value} as the base address of your program or dll.  This is
-the lowest memory location that will be used when your program or dll
-is loaded.  To reduce the need to relocate and improve performance of
-your dlls, each should have a unique base address and not overlap any
-other dlls.  The default is 0x400000 for executables, and 0x10000000
-for dlls.
-[This option is specific to the i386 PE targeted port of the linker]
-
 @kindex --kill-at
 @item --kill-at
 If given, the stdcall suffixes (@@@var{nn}) will be stripped from
diff --git a/ld/ldlex.h b/ld/ldlex.h
index defe3fcbbb9..bb431101fb2 100644
--- a/ld/ldlex.h
+++ b/ld/ldlex.h
@@ -67,6 +67,7 @@  enum option_values
   OPTION_SYMBOLIC,
   OPTION_SYMBOLIC_FUNCTIONS,
   OPTION_TASK_LINK,
+  OPTION_IMAGE_BASE,
   OPTION_TBSS,
   OPTION_TDATA,
   OPTION_TTEXT,
@@ -325,7 +326,6 @@  enum option_values
   /* Used by emultempl/pe.em, emultempl/pep.em and emultempl/beos.em.  */
   OPTION_DLL,
   OPTION_FILE_ALIGNMENT,
-  OPTION_IMAGE_BASE,
   OPTION_MAJOR_IMAGE_VERSION,
   OPTION_MAJOR_OS_VERSION,
   OPTION_MAJOR_SUBSYSTEM_VERSION,
diff --git a/ld/lexsup.c b/ld/lexsup.c
index 37d746652ca..533535c2b89 100644
--- a/ld/lexsup.c
+++ b/ld/lexsup.c
@@ -510,6 +510,8 @@  static const struct ld_option ld_options[] =
   { {"section-start", required_argument, NULL, OPTION_SECTION_START},
     '\0', N_("SECTION=ADDRESS"), N_("Set address of named section"),
     TWO_DASHES },
+  { {"image-base", required_argument, NULL, OPTION_IMAGE_BASE},
+    '\0', N_("ADDRESS"), N_("Set image base address"), TWO_DASHES },
   { {"Tbss", required_argument, NULL, OPTION_TBSS},
     '\0', N_("ADDRESS"), N_("Set address of .bss section"), ONE_DASH },
   { {"Tdata", required_argument, NULL, OPTION_TDATA},
@@ -1477,6 +1479,9 @@  parse_args (unsigned argc, char **argv)
 	case OPTION_TTEXT:
 	  set_segment_start (".text", optarg);
 	  break;
+	case OPTION_IMAGE_BASE:
+	  /* Unless PE, --image-base and -Ttext-segment behavior is the same
+	     PE-specific functionality is implemented in emultempl/{pe, pep, beos}.em  */
 	case OPTION_TTEXT_SEGMENT:
 	  set_segment_start (".text-segment", optarg);
 	  break;
diff --git a/ld/testsuite/ld-elf/pr25207.d b/ld/testsuite/ld-elf/pr25207.d
new file mode 100644
index 00000000000..edec7774e60
--- /dev/null
+++ b/ld/testsuite/ld-elf/pr25207.d
@@ -0,0 +1,11 @@ 
+#source: pr25207.s
+#ld: -z separate-code -Ttext-segment=0x120000 -z max-page-size=0x10000
+#readelf: -l --wide
+#target: *-*-linux* *-*-gnu* arm*-*-uclinuxfdpiceabi
+# changing -Ttext-segment behaviour will break --image-base (pr25207)
+# -Ttext-segment=<ADDR> should set the first segment address,
+# not necessarily the first executable segment.
+
+#...
+  LOAD +0x0+ 0x0*120000 0x0*120000 0x[0-9a-f]+ 0x[0-9a-f]+ R   .*
+#pass
diff --git a/ld/testsuite/ld-elf/pr25207.s b/ld/testsuite/ld-elf/pr25207.s
new file mode 100644
index 00000000000..9f7af218db3
--- /dev/null
+++ b/ld/testsuite/ld-elf/pr25207.s
@@ -0,0 +1,13 @@ 
+        .section .text, "ax"
+	.globl  main
+	.globl  start
+	.globl  _start
+	.globl  __start
+main:
+start:
+_start:
+__start:
+	.byte 0
+
+	.section .rodata
+	.byte 0