diff mbox series

install: Delete scripts/output-format.sed and support lld

Message ID 20200411232340.135070-1-maskray@google.com
State New
Headers show
Series install: Delete scripts/output-format.sed and support lld | expand

Commit Message

Fangrui Song April 11, 2020, 11:23 p.m. UTC
GNU ld and gold have supported --print-output-format since 2011. glibc
requires binutils>=2.25 (2015), so if LD is GNU ld or gold, we can
assume the option is supported.

lld is by default a cross linker supporting multiple targets. It auto
detects the file format and does not need OUTPUT_FORMAT. It does not
support --print-output-format, so we can repurpose the
output-format=unknown branch to make format.lds empty.

GNU ld and gold can skip a script with incompatible OUTPUT_FORMAT and
find the next script in the search path. So libc.so linked with lld
cannot be skipped automatically. However, this is considered a minor
functionality loss by lld maintainers.
---
 Makerules                                     | 12 +++----
 .../strcoll-inputs/filelist#en_US.UTF-8       |  1 -
 scripts/output-format.sed                     | 35 -------------------
 3 files changed, 4 insertions(+), 44 deletions(-)
 delete mode 100644 scripts/output-format.sed

Comments

Fangrui Song April 30, 2020, 5:39 a.m. UTC | #1
On 2020-04-11, Fangrui Song wrote:
>GNU ld and gold have supported --print-output-format since 2011. glibc
>requires binutils>=2.25 (2015), so if LD is GNU ld or gold, we can
>assume the option is supported.
>
>lld is by default a cross linker supporting multiple targets. It auto
>detects the file format and does not need OUTPUT_FORMAT. It does not
>support --print-output-format, so we can repurpose the
>output-format=unknown branch to make format.lds empty.
>
>GNU ld and gold can skip a script with incompatible OUTPUT_FORMAT and
>find the next script in the search path. So libc.so linked with lld
>cannot be skipped automatically. However, this is considered a minor
>functionality loss by lld maintainers.
>---
> Makerules                                     | 12 +++----
> .../strcoll-inputs/filelist#en_US.UTF-8       |  1 -
> scripts/output-format.sed                     | 35 -------------------
> 3 files changed, 4 insertions(+), 44 deletions(-)
> delete mode 100644 scripts/output-format.sed
>
>diff --git a/Makerules b/Makerules
>index 1e9c18f0d8..6a6e5579ae 100644
>--- a/Makerules
>+++ b/Makerules
>@@ -1067,18 +1067,14 @@ install: $(inst_slibdir)/libc.so$(libc.so-version)
> # for the configuration we are building.  We put this statement into
> # the linker scripts we install for -lc et al so that they will not be
> # used by a link for a different format on a multi-architecture system.
>-$(common-objpfx)format.lds: $(..)scripts/output-format.sed \
>-			    $(common-objpfx)config.make \
>+$(common-objpfx)format.lds: $(common-objpfx)config.make \
> 			    $(common-objpfx)config.h $(..)Makerules
> ifneq (unknown,$(output-format))
> 	echo > $@.new 'OUTPUT_FORMAT($(output-format))'
> else
>-	$(LINK.o) -shared $(sysdep-LDFLAGS) $(rtld-LDFLAGS) \
>-		  $(LDFLAGS.so) $(LDFLAGS-lib.so) \
>-		  -x c /dev/null -o $@.so -Wl,--verbose -v 2>&1 \
>-	| sed -n -f $< > $@.new
>-	test -s $@.new
>-	rm -f $@.so
>+# This branch is taken by lld, which auto detects the file format and does not
>+# need OUTPUT_FORMAT.
>+	echo > $@.new
> endif
> 	mv -f $@.new $@
> common-generated += format.lds
>diff --git a/benchtests/strcoll-inputs/filelist#en_US.UTF-8 b/benchtests/strcoll-inputs/filelist#en_US.UTF-8
>index aa44107ad6..cb981de88c 100644
>--- a/benchtests/strcoll-inputs/filelist#en_US.UTF-8
>+++ b/benchtests/strcoll-inputs/filelist#en_US.UTF-8
>@@ -9450,7 +9450,6 @@ move-if-change
> check-execstack.awk
> pylint
> pylintrc
>-output-format.sed
> merge-test-results.sh
> update-copyrights
> config-uname.sh
>diff --git a/scripts/output-format.sed b/scripts/output-format.sed
>deleted file mode 100644
>index 364f52059f..0000000000
>--- a/scripts/output-format.sed
>+++ /dev/null
>@@ -1,35 +0,0 @@
>-/ld.*[ 	]-E[BL]/b f
>-/collect.*[ 	]-E[BL]/b f
>-/OUTPUT_FORMAT[^)]*$/{N
>-s/\n[	 ]*/ /
>-}
>-t o
>-: o
>-s/^.*OUTPUT_FORMAT(\([^,]*\), \1, \1).*$/OUTPUT_FORMAT(\1)/
>-t q
>-s/^.*OUTPUT_FORMAT(\([^,]*\), \([^,]*\), \([^,]*\)).*$/\1,\2,\3/
>-t s
>-s/^.*OUTPUT_FORMAT(\([^,)]*\).*$)/OUTPUT_FORMAT(\1)/
>-t q
>-d
>-: s
>-s/"//g
>-G
>-s/\n//
>-s/^\([^,]*\),\([^,]*\),\([^,]*\),B/OUTPUT_FORMAT(\2)/p
>-s/^\([^,]*\),\([^,]*\),\([^,]*\),L/OUTPUT_FORMAT(\3)/p
>-s/^\([^,]*\),\([^,]*\),\([^,]*\)/OUTPUT_FORMAT(\1)/p
>-/,/s|^|*** BUG in libc/scripts/output-format.sed *** |p
>-q
>-: q
>-s/"//g
>-p
>-q
>-: f
>-s/^.*[ 	]-E\([BL]\)[ 	].*$/,\1/
>-t h
>-s/^.*[ 	]-E\([BL]\)$/,\1/
>-t h
>-d
>-: h
>-h
>-- 
>2.26.0.110.g2183baf09c-goog
>
Ping :)
Florian Weimer May 3, 2020, 8:30 p.m. UTC | #2
* Fangrui Song via Libc-alpha:

> GNU ld and gold can skip a script with incompatible OUTPUT_FORMAT and
> find the next script in the search path. So libc.so linked with lld
> cannot be skipped automatically. However, this is considered a minor
> functionality loss by lld maintainers.

That completely depends on how typical installations set up their
linker search paths.  Figuring that out is likely more work than
providing the correct OUTPUT_FORMAT information in the linker script,
so I'm not sure if the patch goes in the right direction.

In any case, the choice of linker for building glibc should result in
as little semantic difference in the produced artifacts, to avoid
inflating the compatibility matrix.  If OUTPUT_FORMAT is indeed
unnecessary (based on the evaluation I mentioned), it should be
removed unconditionally.

But as I said, it's probably better to find an alternative way to
generate the right OUTPUT_FORMAT directive for the linker script.
Fangrui Song May 3, 2020, 8:53 p.m. UTC | #3
On 2020-05-03, Florian Weimer wrote:
>* Fangrui Song via Libc-alpha:
>
>> GNU ld and gold can skip a script with incompatible OUTPUT_FORMAT and
>> find the next script in the search path. So libc.so linked with lld
>> cannot be skipped automatically. However, this is considered a minor
>> functionality loss by lld maintainers.
>
>That completely depends on how typical installations set up their
>linker search paths.  Figuring that out is likely more work than
>providing the correct OUTPUT_FORMAT information in the linker script,
>so I'm not sure if the patch goes in the right direction.

For typical installations, OUTPUT_FORMAT is not useful.
Even in multilib installations, OUTPUT_FORMAT is not useful unless, for
example, /usr/lib is in both -m32 and -m64's library search paths (this
is likely a misconfiguration).

`OUTPUT_FORMAT(elf64-x86-64)` can provide some protection for GNU ld and
gold.

>But as I said, it's probably better to find an alternative way to
>generate the right OUTPUT_FORMAT directive for the linker script.

We (LLD developers) consider OUTPUT_FORMAT with GNU ld/gold semantic has
very little value. See https://bugs.llvm.org/show_bug.cgi?id=37432 and
https://bugs.llvm.org/show_bug.cgi?id=43740

>In any case, the choice of linker for building glibc should result in
>as little semantic difference in the produced artifacts, to avoid
>inflating the compatibility matrix.  If OUTPUT_FORMAT is indeed
>unnecessary (based on the evaluation I mentioned), it should be
>removed unconditionally.

If there is concern about the aforementioned value, then we probably
should not drop OUTPUT_FORMAT for now for GNU ld/gold. If people think
we can remove OUTPUT_FORMAT as well, I will be happy to create a
follow-up patch.
H.J. Lu May 3, 2020, 9:11 p.m. UTC | #4
On Sun, May 3, 2020 at 1:54 PM Fangrui Song via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> On 2020-05-03, Florian Weimer wrote:
> >* Fangrui Song via Libc-alpha:
> >
> >> GNU ld and gold can skip a script with incompatible OUTPUT_FORMAT and
> >> find the next script in the search path. So libc.so linked with lld
> >> cannot be skipped automatically. However, this is considered a minor
> >> functionality loss by lld maintainers.
> >
> >That completely depends on how typical installations set up their
> >linker search paths.  Figuring that out is likely more work than
> >providing the correct OUTPUT_FORMAT information in the linker script,
> >so I'm not sure if the patch goes in the right direction.
>
> For typical installations, OUTPUT_FORMAT is not useful.
> Even in multilib installations, OUTPUT_FORMAT is not useful unless, for
> example, /usr/lib is in both -m32 and -m64's library search paths (this
> is likely a misconfiguration).
>
> `OUTPUT_FORMAT(elf64-x86-64)` can provide some protection for GNU ld and
> gold.
>
> >But as I said, it's probably better to find an alternative way to
> >generate the right OUTPUT_FORMAT directive for the linker script.
>
> We (LLD developers) consider OUTPUT_FORMAT with GNU ld/gold semantic has
> very little value. See https://bugs.llvm.org/show_bug.cgi?id=37432 and
> https://bugs.llvm.org/show_bug.cgi?id=43740
>
> >In any case, the choice of linker for building glibc should result in
> >as little semantic difference in the produced artifacts, to avoid
> >inflating the compatibility matrix.  If OUTPUT_FORMAT is indeed
> >unnecessary (based on the evaluation I mentioned), it should be
> >removed unconditionally.
>
> If there is concern about the aforementioned value, then we probably
> should not drop OUTPUT_FORMAT for now for GNU ld/gold. If people think
> we can remove OUTPUT_FORMAT as well, I will be happy to create a
> follow-up patch.

Historical usage of OUTPUT_FORMAT was used to link object files of
non-Linux/x86 formats and generate Linux/x86 output.  It was very useful
during the days when many binary only libraries weren't available for
Linux/x86.  It is still somewhat useful to link ELF object files for a different
OS.  In both cases, the format of the first input file isn't the format of the
output.  That is where OUTPUT_FORMAT comes in.
diff mbox series

Patch

diff --git a/Makerules b/Makerules
index 1e9c18f0d8..6a6e5579ae 100644
--- a/Makerules
+++ b/Makerules
@@ -1067,18 +1067,14 @@  install: $(inst_slibdir)/libc.so$(libc.so-version)
 # for the configuration we are building.  We put this statement into
 # the linker scripts we install for -lc et al so that they will not be
 # used by a link for a different format on a multi-architecture system.
-$(common-objpfx)format.lds: $(..)scripts/output-format.sed \
-			    $(common-objpfx)config.make \
+$(common-objpfx)format.lds: $(common-objpfx)config.make \
 			    $(common-objpfx)config.h $(..)Makerules
 ifneq (unknown,$(output-format))
 	echo > $@.new 'OUTPUT_FORMAT($(output-format))'
 else
-	$(LINK.o) -shared $(sysdep-LDFLAGS) $(rtld-LDFLAGS) \
-		  $(LDFLAGS.so) $(LDFLAGS-lib.so) \
-		  -x c /dev/null -o $@.so -Wl,--verbose -v 2>&1 \
-	| sed -n -f $< > $@.new
-	test -s $@.new
-	rm -f $@.so
+# This branch is taken by lld, which auto detects the file format and does not
+# need OUTPUT_FORMAT.
+	echo > $@.new
 endif
 	mv -f $@.new $@
 common-generated += format.lds
diff --git a/benchtests/strcoll-inputs/filelist#en_US.UTF-8 b/benchtests/strcoll-inputs/filelist#en_US.UTF-8
index aa44107ad6..cb981de88c 100644
--- a/benchtests/strcoll-inputs/filelist#en_US.UTF-8
+++ b/benchtests/strcoll-inputs/filelist#en_US.UTF-8
@@ -9450,7 +9450,6 @@  move-if-change
 check-execstack.awk
 pylint
 pylintrc
-output-format.sed
 merge-test-results.sh
 update-copyrights
 config-uname.sh
diff --git a/scripts/output-format.sed b/scripts/output-format.sed
deleted file mode 100644
index 364f52059f..0000000000
--- a/scripts/output-format.sed
+++ /dev/null
@@ -1,35 +0,0 @@ 
-/ld.*[ 	]-E[BL]/b f
-/collect.*[ 	]-E[BL]/b f
-/OUTPUT_FORMAT[^)]*$/{N
-s/\n[	 ]*/ /
-}
-t o
-: o
-s/^.*OUTPUT_FORMAT(\([^,]*\), \1, \1).*$/OUTPUT_FORMAT(\1)/
-t q
-s/^.*OUTPUT_FORMAT(\([^,]*\), \([^,]*\), \([^,]*\)).*$/\1,\2,\3/
-t s
-s/^.*OUTPUT_FORMAT(\([^,)]*\).*$)/OUTPUT_FORMAT(\1)/
-t q
-d
-: s
-s/"//g
-G
-s/\n//
-s/^\([^,]*\),\([^,]*\),\([^,]*\),B/OUTPUT_FORMAT(\2)/p
-s/^\([^,]*\),\([^,]*\),\([^,]*\),L/OUTPUT_FORMAT(\3)/p
-s/^\([^,]*\),\([^,]*\),\([^,]*\)/OUTPUT_FORMAT(\1)/p
-/,/s|^|*** BUG in libc/scripts/output-format.sed *** |p
-q
-: q
-s/"//g
-p
-q
-: f
-s/^.*[ 	]-E\([BL]\)[ 	].*$/,\1/
-t h
-s/^.*[ 	]-E\([BL]\)$/,\1/
-t h
-d
-: h
-h