diff mbox series

powerpc: Define USE_PPC64_NOTOC iff compiler and linker also supports it

Message ID 20211108113316.8867-1-adhemerval.zanella@linaro.org
State Committed
Headers show
Series powerpc: Define USE_PPC64_NOTOC iff compiler and linker also supports it | expand

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent
dj/TryBot-32bit success Build for i686

Commit Message

Adhemerval Zanella Nov. 8, 2021, 11:33 a.m. UTC
The @ntoc usage only yields any advantage on ISA 3.1+ machine (power10)
and for ld.bfd also when it see pcrel relocations used on the code
(generated if compiler targets ISA 3.1+).  On bfd case ISA 3.1+
instruction on stubs are used iff linker also sees the new pc-relative
relocations (for instance R_PPC64_D34), otherwise it generates default
stubs (ppc64_elf_check_relocs:4700).

This patch also help on linkers that do not implement this optimization,
since building for older ISA (such as 3.0 / power9) will also trigger
power10 stubs generation in the assembly code uses the NOTOC imacro.

Checked on powerpc64le-linux-gnu.
---
 sysdeps/powerpc/powerpc64/configure    | 42 ++++++++++++++++----------
 sysdeps/powerpc/powerpc64/configure.ac | 25 ++++++++++-----
 2 files changed, 43 insertions(+), 24 deletions(-)

Comments

Fangrui Song Nov. 8, 2021, 10:48 p.m. UTC | #1
On 2021-11-08, Adhemerval Zanella wrote:
>The @ntoc usage only yields any advantage on ISA 3.1+ machine (power10)
>and for ld.bfd also when it see pcrel relocations used on the code
>(generated if compiler targets ISA 3.1+).  On bfd case ISA 3.1+
>instruction on stubs are used iff linker also sees the new pc-relative
>relocations (for instance R_PPC64_D34), otherwise it generates default
>stubs (ppc64_elf_check_relocs:4700).
>
>This patch also help on linkers that do not implement this optimization,
>since building for older ISA (such as 3.0 / power9) will also trigger
>power10 stubs generation in the assembly code uses the NOTOC imacro.
>
>Checked on powerpc64le-linux-gnu.
>---
> sysdeps/powerpc/powerpc64/configure    | 42 ++++++++++++++++----------
> sysdeps/powerpc/powerpc64/configure.ac | 25 ++++++++++-----
> 2 files changed, 43 insertions(+), 24 deletions(-)
>
>diff --git a/sysdeps/powerpc/powerpc64/configure b/sysdeps/powerpc/powerpc64/configure
>index 5ce77af631..db18791a55 100644
>--- a/sysdeps/powerpc/powerpc64/configure
>+++ b/sysdeps/powerpc/powerpc64/configure
>@@ -32,26 +32,36 @@ if test x$libc_cv_overlapping_opd = xyes; then
>
> fi
>
>-# @notoc started to be supported in GNU Binutils 2.31.
>-
>-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the assembler supports @notoc" >&5
>-$as_echo_n "checking if the assembler supports @notoc... " >&6; }
>+# We check if compiler supports @notoc generation since there is no
>+# gain by enabling it if it will be optimized away by the linker.
>+# It also helps linkers that might not optimize it and end up
>+# generating stubs with ISA 3.1 instruction even targetting older ISA.
>+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the compiler supports @notoc" >&5
>+$as_echo_n "checking if the compiler supports @notoc... " >&6; }
> if ${libc_cv_ppc64_notoc+:} false; then :
>   $as_echo_n "(cached) " >&6
> else
>-
>-	       cat confdefs.h - <<_ACEOF >conftest.$ac_ext
>-/* end confdefs.h.  */
>-
>-void foo (void) {asm("b foo@notoc");}
>-
>-_ACEOF
>-if ac_fn_c_try_compile "$LINENO"; then :
>-  libc_cv_ppc64_notoc=yes
>-else
>+    cat > conftest.c <<EOF
>+int bar (void);
>+int foo (void) { return bar () + 1; }
>+EOF
>   libc_cv_ppc64_notoc=no
>-fi
>-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
>+  if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -S -o conftest.s conftest.c'
>+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
>+  (eval $ac_try) 2>&5
>+  ac_status=$?
>+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
>+  test $ac_status = 0; }; } \
>+     && { ac_try='grep -q -E 'bar@notoc' conftest.s'
>+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
>+  (eval $ac_try) 2>&5
>+  ac_status=$?
>+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
>+  test $ac_status = 0; }; }
>+  then
>+    libc_cv_ppc64_notoc=yes
>+  fi
>+  rm -rf conftest.*
> fi
> { $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_ppc64_notoc" >&5
> $as_echo "$libc_cv_ppc64_notoc" >&6; }
>diff --git a/sysdeps/powerpc/powerpc64/configure.ac b/sysdeps/powerpc/powerpc64/configure.ac
>index b77156f696..05f0000807 100644
>--- a/sysdeps/powerpc/powerpc64/configure.ac
>+++ b/sysdeps/powerpc/powerpc64/configure.ac
>@@ -22,13 +22,22 @@ if test x$libc_cv_overlapping_opd = xyes; then
>   AC_DEFINE(USE_PPC64_OVERLAPPING_OPD)
> fi
>
>-# @notoc started to be supported in GNU Binutils 2.31.
>-AC_CACHE_CHECK([if the assembler supports @notoc],
>-	       libc_cv_ppc64_notoc, [
>-	       AC_COMPILE_IFELSE([AC_LANG_SOURCE([
>-void foo (void) {asm("b foo@notoc");}
>-		  ])],
>-		  [libc_cv_ppc64_notoc=yes],
>-		  [libc_cv_ppc64_notoc=no])])
>+# We check if compiler supports @notoc generation since there is no
>+# gain by enabling it if it will be optimized away by the linker.
>+# It also helps linkers that might not optimize it and end up
>+# generating stubs with ISA 3.1 instruction even targetting older ISA.
>+AC_CACHE_CHECK([if the compiler supports @notoc],
>+	       libc_cv_ppc64_notoc, [dnl
>+  cat > conftest.c <<EOF
>+int bar (void);
>+int foo (void) { return bar () + 1; }
>+EOF
>+  libc_cv_ppc64_notoc=no
>+  if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -S -o conftest.s conftest.c]) \
>+     && AC_TRY_COMMAND([grep -q -E 'bar@notoc' conftest.s])
>+  then
>+    libc_cv_ppc64_notoc=yes
>+  fi
>+  rm -rf conftest.*])
> AS_IF([test x$libc_cv_ppc64_notoc = xyes],
>       [AC_DEFINE(USE_PPC64_NOTOC)])
>-- 
>2.32.0
>

I think the original fix to https://sourceware.org/bugzilla/show_bug.cgi?id=26173
complicated things. A simpler fix is to just check whether CC CFLAGS
generates @notoc and use that to guide whether assembly code needs @notoc.

That will fix ld.lld 13.0.0 build which doesn't have the heuristic GNU
ld currently has.

AIUI If the jump target (e.g. __syscall_error) is non-preemptible and
doesn't use PC-relative, we will not hit `lacks nop, can't restore toc`.
Alan Modra Nov. 9, 2021, 11:05 a.m. UTC | #2
I have a question about powerpc64/ppc-mcount.S.  Why is the assembly
wrapper using ENTRY and a NOTOC call?  By using ENTRY you are saying
that calls to _mcount must have r2 valid.  Given that r2 is valid, why
then use NOTOC?
Adhemerval Zanella Nov. 9, 2021, 1:03 p.m. UTC | #3
On 08/11/2021 19:48, Fangrui Song wrote:
> On 2021-11-08, Adhemerval Zanella wrote:
>> The @ntoc usage only yields any advantage on ISA 3.1+ machine (power10)
>> and for ld.bfd also when it see pcrel relocations used on the code
>> (generated if compiler targets ISA 3.1+).  On bfd case ISA 3.1+
>> instruction on stubs are used iff linker also sees the new pc-relative
>> relocations (for instance R_PPC64_D34), otherwise it generates default
>> stubs (ppc64_elf_check_relocs:4700).
>>
>> This patch also help on linkers that do not implement this optimization,
>> since building for older ISA (such as 3.0 / power9) will also trigger
>> power10 stubs generation in the assembly code uses the NOTOC imacro.
>>
>> Checked on powerpc64le-linux-gnu.
>> ---
>> sysdeps/powerpc/powerpc64/configure    | 42 ++++++++++++++++----------
>> sysdeps/powerpc/powerpc64/configure.ac | 25 ++++++++++-----
>> 2 files changed, 43 insertions(+), 24 deletions(-)
>>
>> diff --git a/sysdeps/powerpc/powerpc64/configure b/sysdeps/powerpc/powerpc64/configure
>> index 5ce77af631..db18791a55 100644
>> --- a/sysdeps/powerpc/powerpc64/configure
>> +++ b/sysdeps/powerpc/powerpc64/configure
>> @@ -32,26 +32,36 @@ if test x$libc_cv_overlapping_opd = xyes; then
>>
>> fi
>>
>> -# @notoc started to be supported in GNU Binutils 2.31.
>> -
>> -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the assembler supports @notoc" >&5
>> -$as_echo_n "checking if the assembler supports @notoc... " >&6; }
>> +# We check if compiler supports @notoc generation since there is no
>> +# gain by enabling it if it will be optimized away by the linker.
>> +# It also helps linkers that might not optimize it and end up
>> +# generating stubs with ISA 3.1 instruction even targetting older ISA.
>> +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the compiler supports @notoc" >&5
>> +$as_echo_n "checking if the compiler supports @notoc... " >&6; }
>> if ${libc_cv_ppc64_notoc+:} false; then :
>>   $as_echo_n "(cached) " >&6
>> else
>> -
>> -           cat confdefs.h - <<_ACEOF >conftest.$ac_ext
>> -/* end confdefs.h.  */
>> -
>> -void foo (void) {asm("b foo@notoc");}
>> -
>> -_ACEOF
>> -if ac_fn_c_try_compile "$LINENO"; then :
>> -  libc_cv_ppc64_notoc=yes
>> -else
>> +    cat > conftest.c <<EOF
>> +int bar (void);
>> +int foo (void) { return bar () + 1; }
>> +EOF
>>   libc_cv_ppc64_notoc=no
>> -fi
>> -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
>> +  if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -S -o conftest.s conftest.c'
>> +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
>> +  (eval $ac_try) 2>&5
>> +  ac_status=$?
>> +  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
>> +  test $ac_status = 0; }; } \
>> +     && { ac_try='grep -q -E 'bar@notoc' conftest.s'
>> +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
>> +  (eval $ac_try) 2>&5
>> +  ac_status=$?
>> +  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
>> +  test $ac_status = 0; }; }
>> +  then
>> +    libc_cv_ppc64_notoc=yes
>> +  fi
>> +  rm -rf conftest.*
>> fi
>> { $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_ppc64_notoc" >&5
>> $as_echo "$libc_cv_ppc64_notoc" >&6; }
>> diff --git a/sysdeps/powerpc/powerpc64/configure.ac b/sysdeps/powerpc/powerpc64/configure.ac
>> index b77156f696..05f0000807 100644
>> --- a/sysdeps/powerpc/powerpc64/configure.ac
>> +++ b/sysdeps/powerpc/powerpc64/configure.ac
>> @@ -22,13 +22,22 @@ if test x$libc_cv_overlapping_opd = xyes; then
>>   AC_DEFINE(USE_PPC64_OVERLAPPING_OPD)
>> fi
>>
>> -# @notoc started to be supported in GNU Binutils 2.31.
>> -AC_CACHE_CHECK([if the assembler supports @notoc],
>> -           libc_cv_ppc64_notoc, [
>> -           AC_COMPILE_IFELSE([AC_LANG_SOURCE([
>> -void foo (void) {asm("b foo@notoc");}
>> -          ])],
>> -          [libc_cv_ppc64_notoc=yes],
>> -          [libc_cv_ppc64_notoc=no])])
>> +# We check if compiler supports @notoc generation since there is no
>> +# gain by enabling it if it will be optimized away by the linker.
>> +# It also helps linkers that might not optimize it and end up
>> +# generating stubs with ISA 3.1 instruction even targetting older ISA.
>> +AC_CACHE_CHECK([if the compiler supports @notoc],
>> +           libc_cv_ppc64_notoc, [dnl
>> +  cat > conftest.c <<EOF
>> +int bar (void);
>> +int foo (void) { return bar () + 1; }
>> +EOF
>> +  libc_cv_ppc64_notoc=no
>> +  if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -S -o conftest.s conftest.c]) \
>> +     && AC_TRY_COMMAND([grep -q -E 'bar@notoc' conftest.s])
>> +  then
>> +    libc_cv_ppc64_notoc=yes
>> +  fi
>> +  rm -rf conftest.*])
>> AS_IF([test x$libc_cv_ppc64_notoc = xyes],
>>       [AC_DEFINE(USE_PPC64_NOTOC)])
>> -- 
>> 2.32.0
>>
> 
> I think the original fix to https://sourceware.org/bugzilla/show_bug.cgi?id=26173
> complicated things. A simpler fix is to just check whether CC CFLAGS
> generates @notoc and use that to guide whether assembly code needs @notoc.

This is exactly what this patch does in fact: it check if $CC $CFLAGS
generates @notoc and defines USE_PPC64_NOTOC which is used internally
on NOTOC macro.

The title is misleading in fact, is should be "powerpc: Define USE_PPC64_NOTOC 
iff compiler supports it"
Fangrui Song Nov. 9, 2021, 5:44 p.m. UTC | #4
On 2021-11-09, Adhemerval Zanella wrote:
>
>
>On 08/11/2021 19:48, Fangrui Song wrote:
>> On 2021-11-08, Adhemerval Zanella wrote:
>>> The @ntoc usage only yields any advantage on ISA 3.1+ machine (power10)

typo: @notoc

any => an ?

>>> and for ld.bfd also when it see pcrel relocations used on the code

sees

>>> (generated if compiler targets ISA 3.1+).  On bfd case ISA 3.1+
>>> instruction on stubs are used iff linker also sees the new pc-relative
>>> relocations (for instance R_PPC64_D34), otherwise it generates default
>>> stubs (ppc64_elf_check_relocs:4700).
>>>
>>> This patch also help on linkers that do not implement this optimization,
>>> since building for older ISA (such as 3.0 / power9) will also trigger
>>> power10 stubs generation in the assembly code uses the NOTOC imacro.
>>>
>>> Checked on powerpc64le-linux-gnu.
>>> ---
>>> sysdeps/powerpc/powerpc64/configure    | 42 ++++++++++++++++----------
>>> sysdeps/powerpc/powerpc64/configure.ac | 25 ++++++++++-----
>>> 2 files changed, 43 insertions(+), 24 deletions(-)
>>>
>>> diff --git a/sysdeps/powerpc/powerpc64/configure b/sysdeps/powerpc/powerpc64/configure
>>> index 5ce77af631..db18791a55 100644
>>> --- a/sysdeps/powerpc/powerpc64/configure
>>> +++ b/sysdeps/powerpc/powerpc64/configure
>>> @@ -32,26 +32,36 @@ if test x$libc_cv_overlapping_opd = xyes; then
>>>
>>> fi
>>>
>>> -# @notoc started to be supported in GNU Binutils 2.31.
>>> -
>>> -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the assembler supports @notoc" >&5
>>> -$as_echo_n "checking if the assembler supports @notoc... " >&6; }
>>> +# We check if compiler supports @notoc generation since there is no
>>> +# gain by enabling it if it will be optimized away by the linker.
>>> +# It also helps linkers that might not optimize it and end up
>>> +# generating stubs with ISA 3.1 instruction even targetting older ISA.
>>> +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the compiler supports @notoc" >&5
>>> +$as_echo_n "checking if the compiler supports @notoc... " >&6; }
>>> if ${libc_cv_ppc64_notoc+:} false; then :
>>>   $as_echo_n "(cached) " >&6
>>> else
>>> -
>>> -           cat confdefs.h - <<_ACEOF >conftest.$ac_ext
>>> -/* end confdefs.h.  */
>>> -
>>> -void foo (void) {asm("b foo@notoc");}
>>> -
>>> -_ACEOF
>>> -if ac_fn_c_try_compile "$LINENO"; then :
>>> -  libc_cv_ppc64_notoc=yes
>>> -else
>>> +    cat > conftest.c <<EOF
>>> +int bar (void);
>>> +int foo (void) { return bar () + 1; }
>>> +EOF
>>>   libc_cv_ppc64_notoc=no
>>> -fi
>>> -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
>>> +  if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -S -o conftest.s conftest.c'
>>> +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
>>> +  (eval $ac_try) 2>&5
>>> +  ac_status=$?
>>> +  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
>>> +  test $ac_status = 0; }; } \
>>> +     && { ac_try='grep -q -E 'bar@notoc' conftest.s'
>>> +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
>>> +  (eval $ac_try) 2>&5
>>> +  ac_status=$?
>>> +  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
>>> +  test $ac_status = 0; }; }
>>> +  then
>>> +    libc_cv_ppc64_notoc=yes
>>> +  fi
>>> +  rm -rf conftest.*
>>> fi
>>> { $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_ppc64_notoc" >&5
>>> $as_echo "$libc_cv_ppc64_notoc" >&6; }
>>> diff --git a/sysdeps/powerpc/powerpc64/configure.ac b/sysdeps/powerpc/powerpc64/configure.ac
>>> index b77156f696..05f0000807 100644
>>> --- a/sysdeps/powerpc/powerpc64/configure.ac
>>> +++ b/sysdeps/powerpc/powerpc64/configure.ac
>>> @@ -22,13 +22,22 @@ if test x$libc_cv_overlapping_opd = xyes; then
>>>   AC_DEFINE(USE_PPC64_OVERLAPPING_OPD)
>>> fi
>>>
>>> -# @notoc started to be supported in GNU Binutils 2.31.
>>> -AC_CACHE_CHECK([if the assembler supports @notoc],
>>> -           libc_cv_ppc64_notoc, [
>>> -           AC_COMPILE_IFELSE([AC_LANG_SOURCE([
>>> -void foo (void) {asm("b foo@notoc");}
>>> -          ])],
>>> -          [libc_cv_ppc64_notoc=yes],
>>> -          [libc_cv_ppc64_notoc=no])])
>>> +# We check if compiler supports @notoc generation since there is no
>>> +# gain by enabling it if it will be optimized away by the linker.
>>> +# It also helps linkers that might not optimize it and end up
>>> +# generating stubs with ISA 3.1 instruction even targetting older ISA.
>>> +AC_CACHE_CHECK([if the compiler supports @notoc],
>>> +           libc_cv_ppc64_notoc, [dnl
>>> +  cat > conftest.c <<EOF
>>> +int bar (void);
>>> +int foo (void) { return bar () + 1; }
>>> +EOF
>>> +  libc_cv_ppc64_notoc=no
>>> +  if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -S -o conftest.s conftest.c]) \
>>> +     && AC_TRY_COMMAND([grep -q -E 'bar@notoc' conftest.s])
>>> +  then
>>> +    libc_cv_ppc64_notoc=yes
>>> +  fi
>>> +  rm -rf conftest.*])
>>> AS_IF([test x$libc_cv_ppc64_notoc = xyes],
>>>       [AC_DEFINE(USE_PPC64_NOTOC)])
>>> -- 
>>> 2.32.0
>>>
>>
>> I think the original fix to https://sourceware.org/bugzilla/show_bug.cgi?id=26173
>> complicated things. A simpler fix is to just check whether CC CFLAGS
>> generates @notoc and use that to guide whether assembly code needs @notoc.
>
>This is exactly what this patch does in fact: it check if $CC $CFLAGS
>generates @notoc and defines USE_PPC64_NOTOC which is used internally
>on NOTOC macro.
>
>The title is misleading in fact, is should be "powerpc: Define USE_PPC64_NOTOC
>iff compiler supports it"

Thanks for the clarification. The new title looks good to me.

Alan has a question about an assembly usage, but that's orthogonal to
the configure.ac fix.

Reviewed-by: Fangrui Song <maskray@google.com>
Adhemerval Zanella Nov. 9, 2021, 6:44 p.m. UTC | #5
On 09/11/2021 14:44, Fangrui Song wrote:
> On 2021-11-09, Adhemerval Zanella wrote:
>>
>>
>> On 08/11/2021 19:48, Fangrui Song wrote:
>>> On 2021-11-08, Adhemerval Zanella wrote:
>>>> The @ntoc usage only yields any advantage on ISA 3.1+ machine (power10)
> 
> typo: @notoc
> 
> any => an ?

Ack.

> 
>>>> and for ld.bfd also when it see pcrel relocations used on the code
> 
> sees

Ack.

> 
>>>> (generated if compiler targets ISA 3.1+).  On bfd case ISA 3.1+
>>>> instruction on stubs are used iff linker also sees the new pc-relative
>>>> relocations (for instance R_PPC64_D34), otherwise it generates default
>>>> stubs (ppc64_elf_check_relocs:4700).
>>>>
>>>> This patch also help on linkers that do not implement this optimization,
>>>> since building for older ISA (such as 3.0 / power9) will also trigger
>>>> power10 stubs generation in the assembly code uses the NOTOC imacro.
>>>>
>>>> Checked on powerpc64le-linux-gnu.
>>>> ---
>>>> sysdeps/powerpc/powerpc64/configure    | 42 ++++++++++++++++----------
>>>> sysdeps/powerpc/powerpc64/configure.ac | 25 ++++++++++-----
>>>> 2 files changed, 43 insertions(+), 24 deletions(-)
>>>>
>>>> diff --git a/sysdeps/powerpc/powerpc64/configure b/sysdeps/powerpc/powerpc64/configure
>>>> index 5ce77af631..db18791a55 100644
>>>> --- a/sysdeps/powerpc/powerpc64/configure
>>>> +++ b/sysdeps/powerpc/powerpc64/configure
>>>> @@ -32,26 +32,36 @@ if test x$libc_cv_overlapping_opd = xyes; then
>>>>
>>>> fi
>>>>
>>>> -# @notoc started to be supported in GNU Binutils 2.31.
>>>> -
>>>> -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the assembler supports @notoc" >&5
>>>> -$as_echo_n "checking if the assembler supports @notoc... " >&6; }
>>>> +# We check if compiler supports @notoc generation since there is no
>>>> +# gain by enabling it if it will be optimized away by the linker.
>>>> +# It also helps linkers that might not optimize it and end up
>>>> +# generating stubs with ISA 3.1 instruction even targetting older ISA.
>>>> +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the compiler supports @notoc" >&5
>>>> +$as_echo_n "checking if the compiler supports @notoc... " >&6; }
>>>> if ${libc_cv_ppc64_notoc+:} false; then :
>>>>   $as_echo_n "(cached) " >&6
>>>> else
>>>> -
>>>> -           cat confdefs.h - <<_ACEOF >conftest.$ac_ext
>>>> -/* end confdefs.h.  */
>>>> -
>>>> -void foo (void) {asm("b foo@notoc");}
>>>> -
>>>> -_ACEOF
>>>> -if ac_fn_c_try_compile "$LINENO"; then :
>>>> -  libc_cv_ppc64_notoc=yes
>>>> -else
>>>> +    cat > conftest.c <<EOF
>>>> +int bar (void);
>>>> +int foo (void) { return bar () + 1; }
>>>> +EOF
>>>>   libc_cv_ppc64_notoc=no
>>>> -fi
>>>> -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
>>>> +  if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -S -o conftest.s conftest.c'
>>>> +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
>>>> +  (eval $ac_try) 2>&5
>>>> +  ac_status=$?
>>>> +  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
>>>> +  test $ac_status = 0; }; } \
>>>> +     && { ac_try='grep -q -E 'bar@notoc' conftest.s'
>>>> +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
>>>> +  (eval $ac_try) 2>&5
>>>> +  ac_status=$?
>>>> +  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
>>>> +  test $ac_status = 0; }; }
>>>> +  then
>>>> +    libc_cv_ppc64_notoc=yes
>>>> +  fi
>>>> +  rm -rf conftest.*
>>>> fi
>>>> { $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_ppc64_notoc" >&5
>>>> $as_echo "$libc_cv_ppc64_notoc" >&6; }
>>>> diff --git a/sysdeps/powerpc/powerpc64/configure.ac b/sysdeps/powerpc/powerpc64/configure.ac
>>>> index b77156f696..05f0000807 100644
>>>> --- a/sysdeps/powerpc/powerpc64/configure.ac
>>>> +++ b/sysdeps/powerpc/powerpc64/configure.ac
>>>> @@ -22,13 +22,22 @@ if test x$libc_cv_overlapping_opd = xyes; then
>>>>   AC_DEFINE(USE_PPC64_OVERLAPPING_OPD)
>>>> fi
>>>>
>>>> -# @notoc started to be supported in GNU Binutils 2.31.
>>>> -AC_CACHE_CHECK([if the assembler supports @notoc],
>>>> -           libc_cv_ppc64_notoc, [
>>>> -           AC_COMPILE_IFELSE([AC_LANG_SOURCE([
>>>> -void foo (void) {asm("b foo@notoc");}
>>>> -          ])],
>>>> -          [libc_cv_ppc64_notoc=yes],
>>>> -          [libc_cv_ppc64_notoc=no])])
>>>> +# We check if compiler supports @notoc generation since there is no
>>>> +# gain by enabling it if it will be optimized away by the linker.
>>>> +# It also helps linkers that might not optimize it and end up
>>>> +# generating stubs with ISA 3.1 instruction even targetting older ISA.
>>>> +AC_CACHE_CHECK([if the compiler supports @notoc],
>>>> +           libc_cv_ppc64_notoc, [dnl
>>>> +  cat > conftest.c <<EOF
>>>> +int bar (void);
>>>> +int foo (void) { return bar () + 1; }
>>>> +EOF
>>>> +  libc_cv_ppc64_notoc=no
>>>> +  if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -S -o conftest.s conftest.c]) \
>>>> +     && AC_TRY_COMMAND([grep -q -E 'bar@notoc' conftest.s])
>>>> +  then
>>>> +    libc_cv_ppc64_notoc=yes
>>>> +  fi
>>>> +  rm -rf conftest.*])
>>>> AS_IF([test x$libc_cv_ppc64_notoc = xyes],
>>>>       [AC_DEFINE(USE_PPC64_NOTOC)])
>>>> -- 
>>>> 2.32.0
>>>>
>>>
>>> I think the original fix to https://sourceware.org/bugzilla/show_bug.cgi?id=26173
>>> complicated things. A simpler fix is to just check whether CC CFLAGS
>>> generates @notoc and use that to guide whether assembly code needs @notoc.
>>
>> This is exactly what this patch does in fact: it check if $CC $CFLAGS
>> generates @notoc and defines USE_PPC64_NOTOC which is used internally
>> on NOTOC macro.
>>
>> The title is misleading in fact, is should be "powerpc: Define USE_PPC64_NOTOC
>> iff compiler supports it"
> 
> Thanks for the clarification. The new title looks good to me.
> 
> Alan has a question about an assembly usage, but that's orthogonal to
> the configure.ac fix.

I will let Tulio answer it because he was the one that actually pushed
for the change.  I haven't investigate it why we need to NOTOC on the
code, maybe we can simplify it.

> 
> Reviewed-by: Fangrui Song <maskray@google.com>
Tulio Magno Quites Machado Filho Nov. 16, 2021, 7:42 p.m. UTC | #6
Alan Modra via Libc-alpha <libc-alpha@sourceware.org> writes:

> I have a question about powerpc64/ppc-mcount.S.  Why is the assembly
> wrapper using ENTRY and a NOTOC call?  By using ENTRY you are saying
> that calls to _mcount must have r2 valid.  Given that r2 is valid, why
> then use NOTOC?

I wouldn't be surprised if that was a misinterpretation of the ABI on my part.

Looking at table 2.20 "Protocols for Local Function Calls", I think this is a
scenario that falls to the last row of this table, where we have:

- Local call method
- nop is not needed
- Relocation is R_PPC64_REL24_NOTOC.

If this interpretation is incorrect, what is the recommended solution for this
case? Use ENTRY_TOCLESS?
Tulio Magno Quites Machado Filho Nov. 16, 2021, 8:20 p.m. UTC | #7
Tulio Magno Quites Machado Filho via Libc-alpha <libc-alpha@sourceware.org> writes:

> Alan Modra via Libc-alpha <libc-alpha@sourceware.org> writes:
>
>> I have a question about powerpc64/ppc-mcount.S.  Why is the assembly
>> wrapper using ENTRY and a NOTOC call?  By using ENTRY you are saying
>> that calls to _mcount must have r2 valid.  Given that r2 is valid, why
>> then use NOTOC?
>
> I wouldn't be surprised if that was a misinterpretation of the ABI on my part.
>
> Looking at table 2.20 "Protocols for Local Function Calls", I think this is a
> scenario that falls to the last row of this table, where we have:

OK, I should be looking at table 2.19 because this is not a call within the
same compilation unit.

I guess that answers my previous question and all these calls should have nop
after the branch.
Florian Weimer Nov. 16, 2021, 8:48 p.m. UTC | #8
* Tulio Magno Quites Machado Filho via Libc-alpha:

> Tulio Magno Quites Machado Filho via Libc-alpha <libc-alpha@sourceware.org> writes:
>
>> Alan Modra via Libc-alpha <libc-alpha@sourceware.org> writes:
>>
>>> I have a question about powerpc64/ppc-mcount.S.  Why is the assembly
>>> wrapper using ENTRY and a NOTOC call?  By using ENTRY you are saying
>>> that calls to _mcount must have r2 valid.  Given that r2 is valid, why
>>> then use NOTOC?
>>
>> I wouldn't be surprised if that was a misinterpretation of the ABI on my part.
>>
>> Looking at table 2.20 "Protocols for Local Function Calls", I think this is a
>> scenario that falls to the last row of this table, where we have:
>
> OK, I should be looking at table 2.19 because this is not a call within the
> same compilation unit.

I'm not entirely sure about that.  I think “local” in this context means
“the caller has complete information about the callee”, and this is true
for most direct calls within libc.so.6.  It's also necessary to make
LTO-based calling convention optimizations effective.

Thanks,
Florian
Fangrui Song Nov. 16, 2021, 10:12 p.m. UTC | #9
On Tue, Nov 16, 2021 at 12:49 PM Florian Weimer via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> * Tulio Magno Quites Machado Filho via Libc-alpha:
>
> > Tulio Magno Quites Machado Filho via Libc-alpha <libc-alpha@sourceware.org> writes:
> >
> >> Alan Modra via Libc-alpha <libc-alpha@sourceware.org> writes:
> >>
> >>> I have a question about powerpc64/ppc-mcount.S.  Why is the assembly
> >>> wrapper using ENTRY and a NOTOC call?  By using ENTRY you are saying
> >>> that calls to _mcount must have r2 valid.  Given that r2 is valid, why
> >>> then use NOTOC?
> >>
> >> I wouldn't be surprised if that was a misinterpretation of the ABI on my part.
> >>
> >> Looking at table 2.20 "Protocols for Local Function Calls", I think this is a
> >> scenario that falls to the last row of this table, where we have:
> >
> > OK, I should be looking at table 2.19 because this is not a call within the
> > same compilation unit.
>
> I'm not entirely sure about that.  I think “local” in this context means
> “the caller has complete information about the callee”, and this is true
> for most direct calls within libc.so.6.  It's also necessary to make
> LTO-based calling convention optimizations effective.
>
> Thanks,
> Florian
>

I think "external" in the context means the symbol may be preemptible
at link time (and therefore at run time).
"local" in the context means it must be non-preemptible.

STV_DEFAULT STB_GLOBAL (-shared or non-definition) symbols may be
preemptible (the compiler needs to be conservative and assumes that
the linker option --version-script, --dynamic-list,
-Bsymbolic-functions may not be specified) and use the external call
protocol.
Cornea, Marius via Libc-alpha Nov. 16, 2021, 10:46 p.m. UTC | #10
On 11/16/21 2:48 PM, Florian Weimer via Libc-alpha wrote:
> * Tulio Magno Quites Machado Filho via Libc-alpha:
>
>> Tulio Magno Quites Machado Filho via Libc-alpha <libc-alpha@sourceware.org> writes:
>>
>>> Alan Modra via Libc-alpha <libc-alpha@sourceware.org> writes:
>>>
>>>> I have a question about powerpc64/ppc-mcount.S.  Why is the assembly
>>>> wrapper using ENTRY and a NOTOC call?  By using ENTRY you are saying
>>>> that calls to _mcount must have r2 valid.  Given that r2 is valid, why
>>>> then use NOTOC?
>>> I wouldn't be surprised if that was a misinterpretation of the ABI on my part.
>>>
>>> Looking at table 2.20 "Protocols for Local Function Calls", I think this is a
>>> scenario that falls to the last row of this table, where we have:
>> OK, I should be looking at table 2.19 because this is not a call within the
>> same compilation unit.
> I'm not entirely sure about that.  I think “local” in this context means
> “the caller has complete information about the callee”, and this is true
> for most direct calls within libc.so.6.  It's also necessary to make
> LTO-based calling convention optimizations effective.


See the text preceding these tables.  "A local function call is one where the 
callee is known and visible within the unit of code being compiled or assembled."

Some optimizations may occur when program portions are statically linked, but
this is the rule the compiler and/or assembly writer should follow.

Bill

>
> Thanks,
> Florian
>
Alan Modra Nov. 17, 2021, 2:16 a.m. UTC | #11
On Tue, Nov 16, 2021 at 04:42:15PM -0300, Tulio Magno Quites Machado Filho wrote:
> Alan Modra via Libc-alpha <libc-alpha@sourceware.org> writes:
> 
> > I have a question about powerpc64/ppc-mcount.S.  Why is the assembly
> > wrapper using ENTRY and a NOTOC call?  By using ENTRY you are saying
> > that calls to _mcount must have r2 valid.  Given that r2 is valid, why
> > then use NOTOC?
> 
> I wouldn't be surprised if that was a misinterpretation of the ABI on my part.
> 
> Looking at table 2.20 "Protocols for Local Function Calls", I think this is a
> scenario that falls to the last row of this table, where we have:

As Bill said, the ABI does define what is meant by local.  However, it
is true that if you have enough control over the full build process
(you never do when !SHARED in glibc) and know what you're doing, you
may use a wider definition of "local".  It's something like "I know
this function linkage will be exactly the same as a static function in
the current compilation unit".  Which implies known local definition
at link time that cannot be overridden, *and* same toc group.  The
latter condition is often forgotten, and bites people using ELFv1
-mcmodel=small code when the TOC exceeds 64k.

> - Local call method
> - nop is not needed
> - Relocation is R_PPC64_REL24_NOTOC.
> 
> If this interpretation is incorrect, what is the recommended solution for this
> case? Use ENTRY_TOCLESS?

If you use ENTRY_TOCLESS you are saying that the function neither uses
nor changes r2.  This situation is not covered by table 2.19 or 2.20
in the ABI, because ENTRY_TOCLESS functions generally do not make
calls!  There is a note saying that it is not forbidden but then *you*
must ensure that the call does not change r2.  Now it might be that
__mcount_internal currently does not change r2, but that won't
necessarily hold for all future compilers.  Power10 pcrel code may use
r2 as a scratch register..

I recommend losing the NOTOC on the call.  You already have the nop
after the call when !SHARED, which is correct and necessary for a
normal call.  You also likely have enough control over SHARED builds
to omit the nop in that case.

Note that having a localentry:8 _mcount is not ideal if someone wants
to build -mcpu=power10 glibc, but _mcount is quite a way down on the
priority list of removing localentry:8 functions for -mcpu=power10.
Alan Modra Nov. 17, 2021, 2:30 a.m. UTC | #12
On Wed, Nov 17, 2021 at 12:46:08PM +1030, Alan Modra wrote:
> I recommend losing the NOTOC on the call.  You already have the nop
> after the call when !SHARED, which is correct and necessary for a
> normal call.  You also likely have enough control over SHARED builds
> to omit the nop in that case.

Heh, I should have tried a compile first before saying the nop could
be omitted for SHARED.  It can't for -mcpu=power10, where you need a
toc save stub to go from _mcount to __mcount_internal.
Tulio Magno Quites Machado Filho Nov. 19, 2021, 3:52 p.m. UTC | #13
Adhemerval Zanella via Libc-alpha <libc-alpha@sourceware.org> writes:

> diff --git a/sysdeps/powerpc/powerpc64/configure.ac b/sysdeps/powerpc/powerpc64/configure.ac
> index b77156f696..05f0000807 100644
> --- a/sysdeps/powerpc/powerpc64/configure.ac
> +++ b/sysdeps/powerpc/powerpc64/configure.ac
> @@ -22,13 +22,22 @@ if test x$libc_cv_overlapping_opd = xyes; then
>    AC_DEFINE(USE_PPC64_OVERLAPPING_OPD)
>  fi
>  
> -# @notoc started to be supported in GNU Binutils 2.31.
> -AC_CACHE_CHECK([if the assembler supports @notoc],
> -	       libc_cv_ppc64_notoc, [
> -	       AC_COMPILE_IFELSE([AC_LANG_SOURCE([
> -void foo (void) {asm("b foo@notoc");}
> -		  ])],
> -		  [libc_cv_ppc64_notoc=yes],
> -		  [libc_cv_ppc64_notoc=no])])
> +# We check if compiler supports @notoc generation since there is no
> +# gain by enabling it if it will be optimized away by the linker.
> +# It also helps linkers that might not optimize it and end up
> +# generating stubs with ISA 3.1 instruction even targetting older ISA.
> +AC_CACHE_CHECK([if the compiler supports @notoc],
> +	       libc_cv_ppc64_notoc, [dnl
> +  cat > conftest.c <<EOF
> +int bar (void);
> +int foo (void) { return bar () + 1; }
> +EOF
> +  libc_cv_ppc64_notoc=no
> +  if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -S -o conftest.s conftest.c]) \

In order to use the -mcpu flag set via --with-cpu=power10, this command should be:

    ${CC-cc} $libc_cv_cc_submachine $CFLAGS $CPPFLAGS -S -o conftest.s conftest.c

LGTM with that fix.

Reviewed-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>

Notice that I'm still not able to complete building the tests using lld 13.0 on
powerpc64le after this patch.  I've seen 2 issues:

1. Some distributions modify the version output, causing an error when parsing
   LLD's version, e.g.

    $ ld.lld --version
    Debian LLD 13.0.0 (compatible with GNU linkers)

The following change to the case test in configure fixes this issue:

-  "LLD"*)
+  *"LLD"*)

Is this a GNU Bash-ism?


2. LLD itself crashes while linking the tests.
   I need to collect more details.
Adhemerval Zanella Nov. 22, 2021, 5:54 p.m. UTC | #14
On 19/11/2021 12:52, Tulio Magno Quites Machado Filho wrote:
> Adhemerval Zanella via Libc-alpha <libc-alpha@sourceware.org> writes:
> 
>> diff --git a/sysdeps/powerpc/powerpc64/configure.ac b/sysdeps/powerpc/powerpc64/configure.ac
>> index b77156f696..05f0000807 100644
>> --- a/sysdeps/powerpc/powerpc64/configure.ac
>> +++ b/sysdeps/powerpc/powerpc64/configure.ac
>> @@ -22,13 +22,22 @@ if test x$libc_cv_overlapping_opd = xyes; then
>>    AC_DEFINE(USE_PPC64_OVERLAPPING_OPD)
>>  fi
>>  
>> -# @notoc started to be supported in GNU Binutils 2.31.
>> -AC_CACHE_CHECK([if the assembler supports @notoc],
>> -	       libc_cv_ppc64_notoc, [
>> -	       AC_COMPILE_IFELSE([AC_LANG_SOURCE([
>> -void foo (void) {asm("b foo@notoc");}
>> -		  ])],
>> -		  [libc_cv_ppc64_notoc=yes],
>> -		  [libc_cv_ppc64_notoc=no])])
>> +# We check if compiler supports @notoc generation since there is no
>> +# gain by enabling it if it will be optimized away by the linker.
>> +# It also helps linkers that might not optimize it and end up
>> +# generating stubs with ISA 3.1 instruction even targetting older ISA.
>> +AC_CACHE_CHECK([if the compiler supports @notoc],
>> +	       libc_cv_ppc64_notoc, [dnl
>> +  cat > conftest.c <<EOF
>> +int bar (void);
>> +int foo (void) { return bar () + 1; }
>> +EOF
>> +  libc_cv_ppc64_notoc=no
>> +  if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -S -o conftest.s conftest.c]) \
> 
> In order to use the -mcpu flag set via --with-cpu=power10, this command should be:
> 
>     ${CC-cc} $libc_cv_cc_submachine $CFLAGS $CPPFLAGS -S -o conftest.s conftest.c
> 
> LGTM with that fix.
> 
> Reviewed-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>

Ack.

> 
> Notice that I'm still not able to complete building the tests using lld 13.0 on
> powerpc64le after this patch.  I've seen 2 issues:
> 
> 1. Some distributions modify the version output, causing an error when parsing
>    LLD's version, e.g.
> 
>     $ ld.lld --version
>     Debian LLD 13.0.0 (compatible with GNU linkers)

I am using the provided package from llvm.org itself, where it returns:

$ ld.lld --version
LLD 13.0.0 (https://github.com/llvm/llvm-project/ 24c8eaec9467b2aaf70b0db33a4e4dd415139a50) (compatible with GNU linkers)

And I could build everything, including tests, for default (no --with-cpu),
and with --with-cpu=power8 and --with-cpu=power9.  The --with-cpu=power10
still shows same issues while linking the loader:

ld.lld: error: ../sysdeps/powerpc/powerpc64/dl-trampoline.S:55:(.text+0x29AE4): call to save__dl_fixup lacks nop, can't restore toc
ld.lld: error: ../sysdeps/powerpc/powerpc64/dl-trampoline.S:286:(.text+0x29C58): call to save__dl_profile_fixup lacks nop, can't restore toc
ld.lld: error: ../sysdeps/powerpc/powerpc64/dl-trampoline.S:455:(.text+0x29E78): call to save__dl_call_pltexit lacks nop, can't restore toc
collect2: error: ld returned 1 exit status

Which I am not sure it is something related to lld or something we should
fix on glibc.

> 
> The following change to the case test in configure fixes this issue:
> 
> -  "LLD"*)
> +  *"LLD"*)
> 
> Is this a GNU Bash-ism?

I think Debian is patching LLD, so we will need to support anything before
"LLD" string.

> 
> 
> 2. LLD itself crashes while linking the tests.
>    I need to collect more details.
> 

Do you have a log of what is happening? Could it be something related to the
debian provided LLD? Could you check with the official package?
Tulio Magno Quites Machado Filho Nov. 25, 2021, 5:02 p.m. UTC | #15
Adhemerval Zanella via Libc-alpha <libc-alpha@sourceware.org> writes:

> On 19/11/2021 12:52, Tulio Magno Quites Machado Filho wrote:
>> 2. LLD itself crashes while linking the tests.
>>    I need to collect more details.
>
> Do you have a log of what is happening? Could it be something related to the
> debian provided LLD? Could you check with the official package?

Using the official package and --with-cpu=power9, I'm able to build and run
the tests. I get:

FAIL: elf/ifuncmain1pic
FAIL: elf/ifuncmain1pie
FAIL: elf/ifuncmain1vis
FAIL: elf/ifuncmain1vispic
FAIL: elf/ifuncmain1vispie
FAIL: elf/ifuncmain3
FAIL: elf/ifuncmain6pie
XPASS: elf/tst-protected1a
XPASS: elf/tst-protected1b
FAIL: elf/tst-tlsopt-powerpc
UNSUPPORTED: math/test-fesetexcept-traps
UNSUPPORTED: math/test-fexcept-traps
UNSUPPORTED: misc/tst-adjtimex
UNSUPPORTED: misc/tst-clock_adjtime
UNSUPPORTED: misc/tst-ntp_adjtime
UNSUPPORTED: misc/tst-pkey
FAIL: nptl/tst-execstack
UNSUPPORTED: time/tst-clock_settime
UNSUPPORTED: time/tst-settimeofday
Summary of test results:
      9 FAIL
   4663 PASS
      8 UNSUPPORTED
     16 XFAIL
      2 XPASS

All the 9 failures are unexpected.

I had to kill elf/ifuncmain1pie by hand because it was stuck in an infinite
loop in foo_protected, constantly loading its own address and jumping to it.

The contents of the function are not what I expected.  It's almost identical to
its __plt_ counterpart except that it doesn't save r2:

0000000000010d90 <foo_protected>:
   10d90:       01 00 82 3d     addis   r12,r2,1
   10d94:       a0 80 8c e9     ld      r12,-32608(r12)
   10d98:       a6 03 89 7d     mtctr   r12
   10d9c:       20 04 80 4e     bctr

=======

Going back to the discussion around the crashes, I can only reproduce them with
the Debian build and using a high number of jobs (e.g -j150).  That generates an
unusable log that I can only distinguish that a few lld processes received
SIGABRT.
If I use a lower -j value, e.g. -j10, the crashes do not happen.

This is what I managed to parse:

terminate called after throwing an instance of 'std::system_error'
what():  Resource temporarily unavailable
PLEASE submit a bug report to https://bugs.llvm.org/ and include the crash backtrace.
/usr/lib/powerpc64le-linux-gnu/libLLVM-13.so.1(_ZN4llvm3sys15PrintStackTraceERNS_11raw_ostreamEi+0x4
c)[0x7fffb262c50c]
... <= Hard to understand what is going on here
collect2: fatal error: ld terminated with signal 6 [Aborted]
compilation terminated.
make[2]: *** [../Rules:277: /build/glibc-build.lld/math/test-signgam-ullong-static] Error 1
make[2]: *** [../Rules:277: /build/glibc-build.lld/math/atest-exp] Error 1
make[2]: *** [../Rules:277: /build/glibc-build.lld/math/atest-sincos] Error 1
make[2]: *** [../Rules:230: /build/glibc-build.lld/math/test-matherr] Error 1
make[2]: *** [../Rules:230: /build/glibc-build.lld/math/test-matherr-2] Error 1
make[2]: *** [../Rules:230: /build/glibc-build.lld/math/test-math-isinff] Error 1
make[2]: *** [../Rules:277: /build/glibc-build.lld/math/test-signgam-ullong-init-static] Error 1
Adhemerval Zanella Nov. 25, 2021, 5:16 p.m. UTC | #16
On 25/11/2021 14:02, Tulio Magno Quites Machado Filho wrote:
> Adhemerval Zanella via Libc-alpha <libc-alpha@sourceware.org> writes:
> 
>> On 19/11/2021 12:52, Tulio Magno Quites Machado Filho wrote:
>>> 2. LLD itself crashes while linking the tests.
>>>    I need to collect more details.
>>
>> Do you have a log of what is happening? Could it be something related to the
>> debian provided LLD? Could you check with the official package?
> 
> Using the official package and --with-cpu=power9, I'm able to build and run
> the tests. I get:
> 
> FAIL: elf/ifuncmain1pic
> FAIL: elf/ifuncmain1pie
> FAIL: elf/ifuncmain1vis
> FAIL: elf/ifuncmain1vispic
> FAIL: elf/ifuncmain1vispie
> FAIL: elf/ifuncmain3
> FAIL: elf/ifuncmain6pie
> XPASS: elf/tst-protected1a
> XPASS: elf/tst-protected1b
> FAIL: elf/tst-tlsopt-powerpc
> UNSUPPORTED: math/test-fesetexcept-traps
> UNSUPPORTED: math/test-fexcept-traps
> UNSUPPORTED: misc/tst-adjtimex
> UNSUPPORTED: misc/tst-clock_adjtime
> UNSUPPORTED: misc/tst-ntp_adjtime
> UNSUPPORTED: misc/tst-pkey
> FAIL: nptl/tst-execstack
> UNSUPPORTED: time/tst-clock_settime
> UNSUPPORTED: time/tst-settimeofday
> Summary of test results:
>       9 FAIL
>    4663 PASS
>       8 UNSUPPORTED
>      16 XFAIL
>       2 XPASS
> 
> All the 9 failures are unexpected.

That's what I saw on a power9 machine as well.

> 
> I had to kill elf/ifuncmain1pie by hand because it was stuck in an infinite
> loop in foo_protected, constantly loading its own address and jumping to it.

Yeah, I will fix it by using libsupport for elf/ifuncmain1.c. 

> 
> The contents of the function are not what I expected.  It's almost identical to
> its __plt_ counterpart except that it doesn't save r2:
> 
> 0000000000010d90 <foo_protected>:
>    10d90:       01 00 82 3d     addis   r12,r2,1
>    10d94:       a0 80 8c e9     ld      r12,-32608(r12)
>    10d98:       a6 03 89 7d     mtctr   r12
>    10d9c:       20 04 80 4e     bctr
> 
> =======

This does seem a lld issue and I also see some issues with ifunc and protected
functions on armhf.  I spent some time to debug, and it could get them to work
by forcing the creation of canonical entries on ELF/Relocations.cpp:1501). I 
have not dig into to understand what is happening exactly on lld side.

> 
> Going back to the discussion around the crashes, I can only reproduce them with
> the Debian build and using a high number of jobs (e.g -j150).  That generates an
> unusable log that I can only distinguish that a few lld processes received
> SIGABRT.
> If I use a lower -j value, e.g. -j10, the crashes do not happen.
> 
> This is what I managed to parse:
> 
> terminate called after throwing an instance of 'std::system_error'
> what():  Resource temporarily unavailable
> PLEASE submit a bug report to https://bugs.llvm.org/ and include the crash backtrace.
> /usr/lib/powerpc64le-linux-gnu/libLLVM-13.so.1(_ZN4llvm3sys15PrintStackTraceERNS_11raw_ostreamEi+0x4
> c)[0x7fffb262c50c]
> ... <= Hard to understand what is going on here
> collect2: fatal error: ld terminated with signal 6 [Aborted]
> compilation terminated.
> make[2]: *** [../Rules:277: /build/glibc-build.lld/math/test-signgam-ullong-static] Error 1
> make[2]: *** [../Rules:277: /build/glibc-build.lld/math/atest-exp] Error 1
> make[2]: *** [../Rules:277: /build/glibc-build.lld/math/atest-sincos] Error 1
> make[2]: *** [../Rules:230: /build/glibc-build.lld/math/test-matherr] Error 1
> make[2]: *** [../Rules:230: /build/glibc-build.lld/math/test-matherr-2] Error 1
> make[2]: *** [../Rules:230: /build/glibc-build.lld/math/test-math-isinff] Error 1
> make[2]: *** [../Rules:277: /build/glibc-build.lld/math/test-signgam-ullong-init-static] Error 1
> 

I think this is an unrelated issue and due the fact lld by default uses parallel
algorithms on some operations and they scale based on the system core count.
So running multiple lld will create a *lot* of threads (for instance, -j150 on 
a 128 core machine can potentially create 19200 threads) which might reach some 
system enforced limit (for instance through systemd).

I usually build llvm with -DLLVM_ENABLE_THREADS=false to avoid it, since I also
did not see much speed up by using threads with lld and glibc.
diff mbox series

Patch

diff --git a/sysdeps/powerpc/powerpc64/configure b/sysdeps/powerpc/powerpc64/configure
index 5ce77af631..db18791a55 100644
--- a/sysdeps/powerpc/powerpc64/configure
+++ b/sysdeps/powerpc/powerpc64/configure
@@ -32,26 +32,36 @@  if test x$libc_cv_overlapping_opd = xyes; then
 
 fi
 
-# @notoc started to be supported in GNU Binutils 2.31.
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the assembler supports @notoc" >&5
-$as_echo_n "checking if the assembler supports @notoc... " >&6; }
+# We check if compiler supports @notoc generation since there is no
+# gain by enabling it if it will be optimized away by the linker.
+# It also helps linkers that might not optimize it and end up
+# generating stubs with ISA 3.1 instruction even targetting older ISA.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the compiler supports @notoc" >&5
+$as_echo_n "checking if the compiler supports @notoc... " >&6; }
 if ${libc_cv_ppc64_notoc+:} false; then :
   $as_echo_n "(cached) " >&6
 else
-
-	       cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-
-void foo (void) {asm("b foo@notoc");}
-
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
-  libc_cv_ppc64_notoc=yes
-else
+    cat > conftest.c <<EOF
+int bar (void);
+int foo (void) { return bar () + 1; }
+EOF
   libc_cv_ppc64_notoc=no
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -S -o conftest.s conftest.c'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; } \
+     && { ac_try='grep -q -E 'bar@notoc' conftest.s'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }
+  then
+    libc_cv_ppc64_notoc=yes
+  fi
+  rm -rf conftest.*
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_ppc64_notoc" >&5
 $as_echo "$libc_cv_ppc64_notoc" >&6; }
diff --git a/sysdeps/powerpc/powerpc64/configure.ac b/sysdeps/powerpc/powerpc64/configure.ac
index b77156f696..05f0000807 100644
--- a/sysdeps/powerpc/powerpc64/configure.ac
+++ b/sysdeps/powerpc/powerpc64/configure.ac
@@ -22,13 +22,22 @@  if test x$libc_cv_overlapping_opd = xyes; then
   AC_DEFINE(USE_PPC64_OVERLAPPING_OPD)
 fi
 
-# @notoc started to be supported in GNU Binutils 2.31.
-AC_CACHE_CHECK([if the assembler supports @notoc],
-	       libc_cv_ppc64_notoc, [
-	       AC_COMPILE_IFELSE([AC_LANG_SOURCE([
-void foo (void) {asm("b foo@notoc");}
-		  ])],
-		  [libc_cv_ppc64_notoc=yes],
-		  [libc_cv_ppc64_notoc=no])])
+# We check if compiler supports @notoc generation since there is no
+# gain by enabling it if it will be optimized away by the linker.
+# It also helps linkers that might not optimize it and end up
+# generating stubs with ISA 3.1 instruction even targetting older ISA.
+AC_CACHE_CHECK([if the compiler supports @notoc],
+	       libc_cv_ppc64_notoc, [dnl
+  cat > conftest.c <<EOF
+int bar (void);
+int foo (void) { return bar () + 1; }
+EOF
+  libc_cv_ppc64_notoc=no
+  if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -S -o conftest.s conftest.c]) \
+     && AC_TRY_COMMAND([grep -q -E 'bar@notoc' conftest.s])
+  then
+    libc_cv_ppc64_notoc=yes
+  fi
+  rm -rf conftest.*])
 AS_IF([test x$libc_cv_ppc64_notoc = xyes],
       [AC_DEFINE(USE_PPC64_NOTOC)])