[v2,3/3] powerpc64le: Add glibc-hwcaps support

Message ID e1b80826a62d3f03e1a70bd191f82ee31548b4e9.1602515612.git.fweimer@redhat.com
State Deferred
Headers
Series glibc-hwcaps support for LD_LIBRARY_PATH |

Commit Message

Florian Weimer Oct. 12, 2020, 3:22 p.m. UTC
  The "power10" and "power9" subdirectories are selected.
---
 sysdeps/powerpc/powerpc64/le/Makefile         | 22 ++++++++
 .../powerpc/powerpc64/le/dl-hwcaps-subdirs.c  | 39 ++++++++++++++
 .../powerpc/powerpc64/le/tst-glibc-hwcaps.c   | 54 +++++++++++++++++++
 3 files changed, 115 insertions(+)
 create mode 100644 sysdeps/powerpc/powerpc64/le/dl-hwcaps-subdirs.c
 create mode 100644 sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c
  

Comments

Paul A. Clarke Oct. 13, 2020, 4:36 p.m. UTC | #1
On Mon, Oct 12, 2020 at 05:22:11PM +0200, Florian Weimer via Libc-alpha wrote:
> The "power10" and "power9" subdirectories are selected.
> ---
[snip]
> diff --git a/sysdeps/powerpc/powerpc64/le/dl-hwcaps-subdirs.c b/sysdeps/powerpc/powerpc64/le/dl-hwcaps-subdirs.c
> new file mode 100644
> index 0000000000..1fa3735a8c
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/le/dl-hwcaps-subdirs.c
> @@ -0,0 +1,39 @@
> +/* Architecture-specific glibc-hwcaps subdirectories.  powerpc64le version.
> +   Copyright (C) 2020 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <dl-hwcaps.h>
> +#include <ldsodefs.h>
> +
> +const char _dl_hwcaps_subdirs[] = "power10:power9";
> +enum { subdirs_count = 2 };
> +
> +uint32_t
> +_dl_hwcaps_subdirs_active (void)
> +{
> +  int active = 0;
> +
> +  if ((GLRO (dl_hwcap2) & PPC_FEATURE2_ARCH_3_00) == 0)
> +    return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);

Since active == 0, this is just "return 0", but it's consistent with
below... OK.

> +  ++active;
> +
> +  if ((GLRO (dl_hwcap2) & PPC_FEATURE2_ARCH_3_1) == 0)
> +    return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
> +  ++active;
> +
> +  return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
> +}

OK.

The maintainers will need to be careful with ordering, in both the 
_dl_hwcaps_subdirs string and the code within the function.
The string must be in priority order and the code stanzas must be in
reverse priority order, and the result must be a contiguous range.

Comments might help.

LGTM.

PC
  
Paul A. Clarke Oct. 20, 2020, 5:23 p.m. UTC | #2
On Mon, Oct 12, 2020 at 05:22:11PM +0200, Florian Weimer via Libc-alpha wrote:
> The "power10" and "power9" subdirectories are selected.
> ---
[snip]
> diff --git a/sysdeps/powerpc/powerpc64/le/dl-hwcaps-subdirs.c b/sysdeps/powerpc/powerpc64/le/dl-hwcaps-subdirs.c
> new file mode 100644
> index 0000000000..1fa3735a8c
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/le/dl-hwcaps-subdirs.c
> @@ -0,0 +1,39 @@
> +/* Architecture-specific glibc-hwcaps subdirectories.  powerpc64le version.
> +   Copyright (C) 2020 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <dl-hwcaps.h>
> +#include <ldsodefs.h>
> +
> +const char _dl_hwcaps_subdirs[] = "power10:power9";
> +enum { subdirs_count = 2 };
> +
> +uint32_t
> +_dl_hwcaps_subdirs_active (void)
> +{
> +  int active = 0;
> +
> +  if ((GLRO (dl_hwcap2) & PPC_FEATURE2_ARCH_3_00) == 0)
> +    return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
> +  ++active;
> +
> +  if ((GLRO (dl_hwcap2) & PPC_FEATURE2_ARCH_3_1) == 0)
> +    return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
> +  ++active;
> +
> +  return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
> +}
[snip]

LGTM

PC
  
Florian Weimer Oct. 29, 2020, 4:26 p.m. UTC | #3
* Florian Weimer via Libc-alpha:

> diff --git a/sysdeps/powerpc/powerpc64/le/dl-hwcaps-subdirs.c b/sysdeps/powerpc/powerpc64/le/dl-hwcaps-subdirs.c
> new file mode 100644
> index 0000000000..1fa3735a8c
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/le/dl-hwcaps-subdirs.c
> @@ -0,0 +1,39 @@
> +/* Architecture-specific glibc-hwcaps subdirectories.  powerpc64le version.
> +   Copyright (C) 2020 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <dl-hwcaps.h>
> +#include <ldsodefs.h>
> +
> +const char _dl_hwcaps_subdirs[] = "power10:power9";
> +enum { subdirs_count = 2 };
> +
> +uint32_t
> +_dl_hwcaps_subdirs_active (void)
> +{
> +  int active = 0;
> +
> +  if ((GLRO (dl_hwcap2) & PPC_FEATURE2_ARCH_3_00) == 0)
> +    return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
> +  ++active;
> +
> +  if ((GLRO (dl_hwcap2) & PPC_FEATURE2_ARCH_3_1) == 0)
> +    return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
> +  ++active;
> +
> +  return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
> +}

This patch came up in an off-list discussion.  There were some concerns
with it:

* The bits PPC_FEATURE2_ARCH_3_00 and PPC_FEATURE2_ARCH_3_1 may not be
  sufficient to select the "power9" and "power10" subdirectories.  Or
  worded differently, -mcpu=power9 and -mcpu=power10 in GCC enable more
  than that.  For -mcpu=power9, I see _ARCH_PWR9, __FLOAT128_HARDWARE__,
  __POWER9_VECTOR__.  For -mcpu=power10, I see _ARCH_PWR10, __MMA__,
  __PCREL__.  This suggests to me that we need to check additional
  AT_HWCAP/AT_HWCAP2 bits for these directories.

* The names "power9" and "power10" may be too implementation-specific in
  the future.

I would like to postpone this patch for now, but I'd like to resolve
both issues.  Any suggestions how to proceed?

Thanks,
Florian
  
Tulio Magno Quites Machado Filho Oct. 30, 2020, 11:10 p.m. UTC | #4
Florian Weimer via Libc-alpha <libc-alpha@sourceware.org> writes:

> The "power10" and "power9" subdirectories are selected.

Tested on power10.

Should this patch also include modification to
glibc-hwcaps-first-subdirs-for-tests ?

Is it intentional that other architectures end up with the following file?

    elf/glibc-hwcaps/x86-64-v2/markermod1.so

> diff --git a/sysdeps/powerpc/powerpc64/le/dl-hwcaps-subdirs.c b/sysdeps/powerpc/powerpc64/le/dl-hwcaps-subdirs.c
> new file mode 100644
> index 0000000000..1fa3735a8c
> --- /dev/null
> +++ b/sysdeps/powerpc/powerpc64/le/dl-hwcaps-subdirs.c
> @@ -0,0 +1,39 @@
>...
> +
> +uint32_t
> +_dl_hwcaps_subdirs_active (void)
> +{
> +  int active = 0;
> +
> +  if ((GLRO (dl_hwcap2) & PPC_FEATURE2_ARCH_3_00) == 0)
> +    return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
> +  ++active;
> +
> +  if ((GLRO (dl_hwcap2) & PPC_FEATURE2_ARCH_3_1) == 0)
> +    return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);

This is the tricky part.  I like your proposal to match with the behavior
of -mcpu.
In that case we would have:

  power9:
    ((GLRO (dl_hwcap2) & PPC_FEATURE2_ARCH_3_00) == 0
     || (GLRO (dl_hwcap2) & PPC_FEATURE2_HAS_IEEE128) == 0
     || (GLRO (dl_hwcap) & PPC_FEATURE_HAS_ALTIVEC) == 0
     || (GLRO (dl_hwcap) & PPC_FEATURE_HAS_VSX) == 0)

  power10:
    /* power10 also requires altivec, vsx and ieee128 availability, but these
       features have already been tested.  */
    ((GLRO (dl_hwcap2) & (PPC_FEATURE2_ARCH_3_1 | PPC_FEATURE2_MMA)) == 0)

This would mean that a processor that implements POWER ISA 3.0, but does not
implement altivec, would not be able to benefit from that particular library
build, falling back to the general build (power8), e.g. microwatt would fall
in this category right now.

>   * The names "power9" and "power10" may be too implementation-specific in
>     the future.

I do agree, but I don't have a better suggestion.
It's hard to be future-proof here.
I don't think that using the POWER ISA level would help much though,
because new processors may decide to not implement a particular feature
in the future that we believe is essential right now.
  
Florian Weimer Nov. 2, 2020, 10:15 a.m. UTC | #5
* Tulio Magno Quites Machado Filho:

> Florian Weimer via Libc-alpha <libc-alpha@sourceware.org> writes:
>
>> The "power10" and "power9" subdirectories are selected.
>
> Tested on power10.
>
> Should this patch also include modification to
> glibc-hwcaps-first-subdirs-for-tests ?

Yes, it should.  In the posted version, it depended on power9 being
included in the first patch tht added the generic test.

> Is it intentional that other architectures end up with the following file?
>
>     elf/glibc-hwcaps/x86-64-v2/markermod1.so

Yes, it doesn't hurt, and it allows us to keep the test generic.

>> diff --git a/sysdeps/powerpc/powerpc64/le/dl-hwcaps-subdirs.c b/sysdeps/powerpc/powerpc64/le/dl-hwcaps-subdirs.c
>> new file mode 100644
>> index 0000000000..1fa3735a8c
>> --- /dev/null
>> +++ b/sysdeps/powerpc/powerpc64/le/dl-hwcaps-subdirs.c
>> @@ -0,0 +1,39 @@
>>...
>> +
>> +uint32_t
>> +_dl_hwcaps_subdirs_active (void)
>> +{
>> +  int active = 0;
>> +
>> +  if ((GLRO (dl_hwcap2) & PPC_FEATURE2_ARCH_3_00) == 0)
>> +    return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
>> +  ++active;
>> +
>> +  if ((GLRO (dl_hwcap2) & PPC_FEATURE2_ARCH_3_1) == 0)
>> +    return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
>
> This is the tricky part.  I like your proposal to match with the behavior
> of -mcpu.
> In that case we would have:
>
>   power9:
>     ((GLRO (dl_hwcap2) & PPC_FEATURE2_ARCH_3_00) == 0
>      || (GLRO (dl_hwcap2) & PPC_FEATURE2_HAS_IEEE128) == 0
>      || (GLRO (dl_hwcap) & PPC_FEATURE_HAS_ALTIVEC) == 0
>      || (GLRO (dl_hwcap) & PPC_FEATURE_HAS_VSX) == 0)
>
>   power10:
>     /* power10 also requires altivec, vsx and ieee128 availability, but these
>        features have already been tested.  */
>     ((GLRO (dl_hwcap2) & (PPC_FEATURE2_ARCH_3_1 | PPC_FEATURE2_MMA)) == 0)
>
> This would mean that a processor that implements POWER ISA 3.0, but does not
> implement altivec, would not be able to benefit from that particular library
> build, falling back to the general build (power8), e.g. microwatt would fall
> in this category right now.

I think we need documentation what it means for a processor to implement
ISA 3.0, and not altivec.  Or for that matter, what an implementation of
powerpc64le-*-linux-gnu without altivec looks like.  Presumably, it will
be different yet again from the original hardware used during
architecture bring-up.

Once we have no-altivec support for powerpc64le in the GNU toolchain, we
probably should add a power8 subdirectory that stores libraries that use
altivec/vsx.  (This directory would be searched even on systems which
are built to use altivec because they use the POWER8 baseline.)

>>   * The names "power9" and "power10" may be too implementation-specific in
>>     the future.
>
> I do agree, but I don't have a better suggestion.
> It's hard to be future-proof here.
> I don't think that using the POWER ISA level would help much though,
> because new processors may decide to not implement a particular feature
> in the future that we believe is essential right now.

One way to address this is to restrict the selected ISA features for
each level to something that has the most benefit and is unlikely to go
away.

ISA features that cannot automatically and pervasively used by compilers
can be excluded as well.  MMA could be in that category, and I think
cryptography-related instructions generally are.

Thanks,
Florian
  
Tulio Magno Quites Machado Filho Nov. 3, 2020, 3:14 p.m. UTC | #6
Florian Weimer <fweimer@redhat.com> writes:

> I think we need documentation what it means for a processor to implement
> ISA 3.0, and not altivec.  Or for that matter, what an implementation of
> powerpc64le-*-linux-gnu without altivec looks like.  Presumably, it will
> be different yet again from the original hardware used during
> architecture bring-up.

I think we already have this documented in a couple of places in different
documents from the OpenPOWER Foundation.

Up to POWER ISA 2.07 (included) [1], there existed categories of features that
processors may not implement.  Category Vector (aka. altivec) and category
Vector-Scalar Extension (aka. vsx) exist there and are listed in the
OpenPOWER Instruction Set Architecture Profile specification v1.1.0 [2] as
required in the OpenPOWER chip architecture.

POWER ISA 3.0 [3] dropped support for categories, but the OpenPOWER ISA
Compliance Definition 2.0 [4] lists specific sections of the POWER ISA document
that are required, with references to the Vector and Vector-Scalar chapters.

POWER ISA 3.1 [5] added the concept of OpenISA compliancy subset (page vi).
In this new concept, both Vector and Vector-Scalar are required for Linux
compliancy (aka. SIMD in ISA 3.1).

Furthermore, the POWER 64-bit ELF V2 ABI 1.5 [6], which is under public review,
states that:

    It expects an OpenPOWER-compliant processor to implement at least Power ISA
    V2.07B with all OpenPOWER Architecture instruction categories as well as
    OpenPOWER-defined implementation characteristics for some
    implementation-specific features.

It's also worth mentioning that it's removing the list of categories that was
duplicated from [2] which used to mention Vector and Vector-Scalar.

With all that said, it's clear to notice these requirements add barriers for
new processors to start booting Linux. In order to minimize that, I've been
working on some ifunc functions in glibc so that they do not make assumptions
based on IBM processors and are either adapted to work on new processors or
are correctly ignored.

[1] https://openpowerfoundation.org/?resource_lib=ibm-power-isa-version-2-07-b
[2] http://cdn.openpowerfoundation.org/wp-content/uploads/resources/isa-profile/content/ch_profile.html
[3] https://openpowerfoundation.org/?resource_lib=power-isa-version-3-0
[4] http://cdn.openpowerfoundation.org/wp-content/uploads/resources/openpower-isa-thts-V2-1/content/_Ref436814652.html
[5] https://ibm.box.com/s/hhjfw0x0lrbtyzmiaffnbxh2fuo0fog0
[6] https://openpowerfoundation.org/?resource_lib=64-bit-elf-v2-abi-specification-review-draft

> Once we have no-altivec support for powerpc64le in the GNU toolchain, we
> probably should add a power8 subdirectory that stores libraries that use
> altivec/vsx.  (This directory would be searched even on systems which
> are built to use altivec because they use the POWER8 baseline.)

Per my previous explanation, I don't think this is necessary for
OpenPOWER-compliant systems.

>>>   * The names "power9" and "power10" may be too implementation-specific in
>>>     the future.
>>
>> I do agree, but I don't have a better suggestion.
>> It's hard to be future-proof here.
>> I don't think that using the POWER ISA level would help much though,
>> because new processors may decide to not implement a particular feature
>> in the future that we believe is essential right now.
>
> One way to address this is to restrict the selected ISA features for
> each level to something that has the most benefit and is unlikely to go
> away.

As I've just reviewed the OpenPOWER ISA Compliance documents [2] [4], they do
make the usage of the terms "power8" and "power9".  So, I feel better in using
them.  Notice that a definition for power10 is not available yet, except
for the OpenISA compliancy subset from the POWER ISA 3.1, which does not
mention power10.

> ISA features that cannot automatically and pervasively used by compilers
> can be excluded as well.  MMA could be in that category, and I think
> cryptography-related instructions generally are.

MMA is indeed optional for Linux.
AFAICS, cryptography-related instructions are part of SIMD and should be
required for Linux.
  
Florian Weimer Nov. 3, 2020, 4:29 p.m. UTC | #7
* Tulio Magno Quites Machado Filho:

> Florian Weimer <fweimer@redhat.com> writes:
>
>> I think we need documentation what it means for a processor to implement
>> ISA 3.0, and not altivec.  Or for that matter, what an implementation of
>> powerpc64le-*-linux-gnu without altivec looks like.  Presumably, it will
>> be different yet again from the original hardware used during
>> architecture bring-up.
>
> I think we already have this documented in a couple of places in different
> documents from the OpenPOWER Foundation.

Yes, but all those documents say that Altivec + VSX are required for
powerpc64le-*-linux-gnu.  Your summary below seems to re-confirm that.

My point was that if we want powerpc64le-*-linux-gnu to stand for
something different (without Altivec/VSX), we need (new) documentation
that says what it means.

>> ISA features that cannot automatically and pervasively used by compilers
>> can be excluded as well.  MMA could be in that category, and I think
>> cryptography-related instructions generally are.
>
> MMA is indeed optional for Linux.
> AFAICS, cryptography-related instructions are part of SIMD and should be
> required for Linux.

Then I think we should change GCC not to enable MMA with -mcpu=power10.

The other change is that I should check for PPC_FEATURE2_HAS_IEEE128 for
power9, and add a comment that ALTIVEC and VSX are implied by the place
in the source tree (I deliberately made all this specific to
powerpc64le-*-linux-gnu on the glibc side, like we didn't define new ABI
levels for i386).

Thanks,
Florian
  
Segher Boessenkool Nov. 3, 2020, 11:02 p.m. UTC | #8
Hi!

(Cc: Bill)

On Tue, Nov 03, 2020 at 05:29:08PM +0100, Florian Weimer via Libc-alpha wrote:
> * Tulio Magno Quites Machado Filho:
> > Florian Weimer <fweimer@redhat.com> writes:
> >> I think we need documentation what it means for a processor to implement
> >> ISA 3.0, and not altivec.  Or for that matter, what an implementation of
> >> powerpc64le-*-linux-gnu without altivec looks like.  Presumably, it will
> >> be different yet again from the original hardware used during
> >> architecture bring-up.
> >
> > I think we already have this documented in a couple of places in different
> > documents from the OpenPOWER Foundation.
> 
> Yes, but all those documents say that Altivec + VSX are required for
> powerpc64le-*-linux-gnu.  Your summary below seems to re-confirm that.
> 
> My point was that if we want powerpc64le-*-linux-gnu to stand for
> something different (without Altivec/VSX), we need (new) documentation
> that says what it means.

Not going to happen.  A new powerpc64le-linux-novec triple (or whatever
naming, this is just an example) can be made of course, but the existing
name will keep standing for the existing ABI!

> >> ISA features that cannot automatically and pervasively used by compilers
> >> can be excluded as well.  MMA could be in that category, and I think
> >> cryptography-related instructions generally are.
> >
> > MMA is indeed optional for Linux.
> > AFAICS, cryptography-related instructions are part of SIMD and should be
> > required for Linux.
> 
> Then I think we should change GCC not to enable MMA with -mcpu=power10.

No.

-mcpu=power10 enables MMA.  If you do not want all Power10 features, you
should not use -mcpu=power10.  It is that simple.

Since powerpc64le-linux requires Power8 or later, you always have VMX
and VSX enabled there.  In exactly that same way.

GCC never generates anything MMA that the user did not explicitly ask
for in the source code (with builtins, say), so this is not an issue.
Compare this with hardware DFP.

> The other change is that I should check for PPC_FEATURE2_HAS_IEEE128 for
> power9, and add a comment that ALTIVEC and VSX are implied by the place
> in the source tree (I deliberately made all this specific to
> powerpc64le-*-linux-gnu on the glibc side, like we didn't define new ABI
> levels for i386).

All the VMX, VSX, QP float, MMA, whatever stuff is all the same on *all*
GCC Power targets, not just those that are powerpc64le-linux.  If you
use -mcpu=whatever, the resulting program can only be run on CPUs with
all features that <whatever> has.  This is what -mcpu= means.


Segher
  
Florian Weimer Nov. 4, 2020, 8:28 a.m. UTC | #9
* Segher Boessenkool:

> Not going to happen.  A new powerpc64le-linux-novec triple (or whatever
> naming, this is just an example) can be made of course, but the existing
> name will keep standing for the existing ABI!

Fine with me.

>> >> ISA features that cannot automatically and pervasively used by compilers
>> >> can be excluded as well.  MMA could be in that category, and I think
>> >> cryptography-related instructions generally are.
>> >
>> > MMA is indeed optional for Linux.
>> > AFAICS, cryptography-related instructions are part of SIMD and should be
>> > required for Linux.
>> 
>> Then I think we should change GCC not to enable MMA with -mcpu=power10.
>
> No.
>
> -mcpu=power10 enables MMA.  If you do not want all Power10 features, you
> should not use -mcpu=power10.  It is that simple.

Then we need a different name, or require MMA for the "power10"
glibc-hwcaps subdirectory.

> Since powerpc64le-linux requires Power8 or later, you always have VMX
> and VSX enabled there.  In exactly that same way.
>
> GCC never generates anything MMA that the user did not explicitly ask
> for in the source code (with builtins, say), so this is not an issue.
> Compare this with hardware DFP.

GCC defines __MMA__ for -mcpu=power10, and source code will evenually be
sensitive to that macro.

I think it is important that -mcpu=power10 and the "power10"
subdirectory mean the same thing.

Thanks,
Florian
  
Segher Boessenkool Nov. 4, 2020, 7:36 p.m. UTC | #10
Hi Florian,

On Wed, Nov 04, 2020 at 09:28:33AM +0100, Florian Weimer wrote:
> * Segher Boessenkool:
> 
> > Not going to happen.  A new powerpc64le-linux-novec triple (or whatever
> > naming, this is just an example) can be made of course, but the existing
> > name will keep standing for the existing ABI!
> 
> Fine with me.
> 
> >> >> ISA features that cannot automatically and pervasively used by compilers
> >> >> can be excluded as well.  MMA could be in that category, and I think
> >> >> cryptography-related instructions generally are.
> >> >
> >> > MMA is indeed optional for Linux.
> >> > AFAICS, cryptography-related instructions are part of SIMD and should be
> >> > required for Linux.
> >> 
> >> Then I think we should change GCC not to enable MMA with -mcpu=power10.
> >
> > No.
> >
> > -mcpu=power10 enables MMA.  If you do not want all Power10 features, you
> > should not use -mcpu=power10.  It is that simple.
> 
> Then we need a different name, or require MMA for the "power10"
> glibc-hwcaps subdirectory.

Or do nothing.  Glibc doesn't use any MMA code, does it?  This is never
generated automatically, you need to really ask for it in your source
code.

> > Since powerpc64le-linux requires Power8 or later, you always have VMX
> > and VSX enabled there.  In exactly that same way.
> >
> > GCC never generates anything MMA that the user did not explicitly ask
> > for in the source code (with builtins, say), so this is not an issue.
> > Compare this with hardware DFP.
> 
> GCC defines __MMA__ for -mcpu=power10, and source code will evenually be
> sensitive to that macro.

That macro simply says that source code can use MMA builtins and the
like.  Is it important for glibc whether user code uses MMA?

> I think it is important that -mcpu=power10 and the "power10"
> subdirectory mean the same thing.

-mcpu=power10 means "generate code optimised for power10" (and: "it will
probably not run on cpus not compatible to power10").

Is that what that subdir holds as well?


Segher
  
Florian Weimer Nov. 4, 2020, 7:56 p.m. UTC | #11
* Segher Boessenkool:

>> > -mcpu=power10 enables MMA.  If you do not want all Power10 features, you
>> > should not use -mcpu=power10.  It is that simple.
>> 
>> Then we need a different name, or require MMA for the "power10"
>> glibc-hwcaps subdirectory.
>
> Or do nothing.  Glibc doesn't use any MMA code, does it?  This is never
> generated automatically, you need to really ask for it in your source
> code.

glibc's internal use does not matter in this context.  Programmers must
be able to drop their own libraries built with -mcpu=power10 into the
power10 subdirectory.  If GCC turns on MMA by default for this switch
and glibc selects the power10 subdirectory without checking for MMA
support, then this isn't guaranteed to work.

We have been through this with x86-64 already.  I don't want to produce
the same bug.

>> > Since powerpc64le-linux requires Power8 or later, you always have VMX
>> > and VSX enabled there.  In exactly that same way.
>> >
>> > GCC never generates anything MMA that the user did not explicitly ask
>> > for in the source code (with builtins, say), so this is not an issue.
>> > Compare this with hardware DFP.
>> 
>> GCC defines __MMA__ for -mcpu=power10, and source code will evenually be
>> sensitive to that macro.
>
> That macro simply says that source code can use MMA builtins and the
> like.  Is it important for glibc whether user code uses MMA?

Yes, we can only load code that is built to use MMA unconditionally
(potentially) if the system supports MMA.

And in the future, GCC might recognize common code patterns with
-ftree-loop-vectorize and replace them with MMA intrinsics.

>> I think it is important that -mcpu=power10 and the "power10"
>> subdirectory mean the same thing.
>
> -mcpu=power10 means "generate code optimised for power10" (and: "it will
> probably not run on cpus not compatible to power10").
>
> Is that what that subdir holds as well?

Yes, that's the idea.  The programmer can also drop a -mcpu=power9
library into the power9 subdirectory.  The difference to the existing
AT_PLATFORM subdirectory is that on POWER10, both subdirectories (power9
and power10) are searched.  With AT_PLATFORM on POWER10, only power10
would be searched.  (And we're also fixing a silent on-disk format
change in the way AT_PLATFORM libraries are represented in ld.so.cache.)

Thanks,
Florian
  
Segher Boessenkool Nov. 4, 2020, 9:58 p.m. UTC | #12
On Wed, Nov 04, 2020 at 08:56:05PM +0100, Florian Weimer wrote:
> * Segher Boessenkool:
> 
> >> > -mcpu=power10 enables MMA.  If you do not want all Power10 features, you
> >> > should not use -mcpu=power10.  It is that simple.
> >> 
> >> Then we need a different name, or require MMA for the "power10"
> >> glibc-hwcaps subdirectory.
> >
> > Or do nothing.  Glibc doesn't use any MMA code, does it?  This is never
> > generated automatically, you need to really ask for it in your source
> > code.
> 
> glibc's internal use does not matter in this context.  Programmers must
> be able to drop their own libraries built with -mcpu=power10 into the
> power10 subdirectory.  If GCC turns on MMA by default for this switch
> and glibc selects the power10 subdirectory without checking for MMA
> support, then this isn't guaranteed to work.

Are you saying that it is *normal* for people to put very different code
into libc like this?  Wow.

> We have been through this with x86-64 already.  I don't want to produce
> the same bug.

No code in libc should ever use MMA, imnsho.

> >> > Since powerpc64le-linux requires Power8 or later, you always have VMX
> >> > and VSX enabled there.  In exactly that same way.
> >> >
> >> > GCC never generates anything MMA that the user did not explicitly ask
> >> > for in the source code (with builtins, say), so this is not an issue.
> >> > Compare this with hardware DFP.
> >> 
> >> GCC defines __MMA__ for -mcpu=power10, and source code will evenually be
> >> sensitive to that macro.
> >
> > That macro simply says that source code can use MMA builtins and the
> > like.  Is it important for glibc whether user code uses MMA?
> 
> Yes, we can only load code that is built to use MMA unconditionally
> (potentially) if the system supports MMA.
> 
> And in the future, GCC might recognize common code patterns with
> -ftree-loop-vectorize and replace them with MMA intrinsics.

No, not really.  Maybe 20 years from now though, sure.

MMA uses its own register set.  Moves to other regs are expensive.  You
cannot pass those MMA registers around at all, either.

So sure, only load modules with MMA code if the hwcap says you have MMA,
but I don't see why you would refuse power10 code without it.  But, ask
Tulio, of course -- I just don't see the point of having a separate
hwcap for it at all, if you do not use it!

> >> I think it is important that -mcpu=power10 and the "power10"
> >> subdirectory mean the same thing.
> >
> > -mcpu=power10 means "generate code optimised for power10" (and: "it will
> > probably not run on cpus not compatible to power10").
> >
> > Is that what that subdir holds as well?
> 
> Yes, that's the idea.  The programmer can also drop a -mcpu=power9
> library into the power9 subdirectory.  The difference to the existing
> AT_PLATFORM subdirectory is that on POWER10, both subdirectories (power9
> and power10) are searched.  With AT_PLATFORM on POWER10, only power10
> would be searched.  (And we're also fixing a silent on-disk format
> change in the way AT_PLATFORM libraries are represented in ld.so.cache.)

HtH, cheers,


Segher
  
Florian Weimer Nov. 5, 2020, 11:40 a.m. UTC | #13
* Segher Boessenkool:

> On Wed, Nov 04, 2020 at 08:56:05PM +0100, Florian Weimer wrote:
>> * Segher Boessenkool:
>> 
>> >> > -mcpu=power10 enables MMA.  If you do not want all Power10 features, you
>> >> > should not use -mcpu=power10.  It is that simple.
>> >> 
>> >> Then we need a different name, or require MMA for the "power10"
>> >> glibc-hwcaps subdirectory.
>> >
>> > Or do nothing.  Glibc doesn't use any MMA code, does it?  This is never
>> > generated automatically, you need to really ask for it in your source
>> > code.
>> 
>> glibc's internal use does not matter in this context.  Programmers must
>> be able to drop their own libraries built with -mcpu=power10 into the
>> power10 subdirectory.  If GCC turns on MMA by default for this switch
>> and glibc selects the power10 subdirectory without checking for MMA
>> support, then this isn't guaranteed to work.
>
> Are you saying that it is *normal* for people to put very different code
> into libc like this?  Wow.
>
>> We have been through this with x86-64 already.  I don't want to produce
>> the same bug.
>
> No code in libc should ever use MMA, imnsho.

Oh, I see now.  I think we don't agree about the scope of the
glibc-hwcaps feature.

It's going to be used to ELF multilibs in general, not just glibc
components.  So a BLAS implementation could use it and drop its
implementation DSOs into the appropriate directories.

That's why vector features such as MMA matter in this context.

Given this additional context, I hope we can agree that a rule for
programmers like “build with -mcpu=power10 for the power10 glibc-hwcaps
subdirectory“ has a lot of value.

Thanks,
Florian
  
Segher Boessenkool Nov. 5, 2020, 9:42 p.m. UTC | #14
Hi Florian,

On Thu, Nov 05, 2020 at 12:40:29PM +0100, Florian Weimer wrote:
> * Segher Boessenkool:
> > Are you saying that it is *normal* for people to put very different code
> > into libc like this?  Wow.
> >
> >> We have been through this with x86-64 already.  I don't want to produce
> >> the same bug.
> >
> > No code in libc should ever use MMA, imnsho.
> 
> Oh, I see now.  I think we don't agree about the scope of the
> glibc-hwcaps feature.

I did not know about this new feature at all, I thought this was about
the exiting power10/ etc. directories :-)

> It's going to be used to ELF multilibs in general, not just glibc
> components.  So a BLAS implementation could use it and drop its
> implementation DSOs into the appropriate directories.
> 
> That's why vector features such as MMA matter in this context.

Sure.

> Given this additional context, I hope we can agree that a rule for
> programmers like “build with -mcpu=power10 for the power10 glibc-hwcaps
> subdirectory“ has a lot of value.

But you cannot change what -mcpu=power10 means...  It should keep the
same scheme as we have used for very long already.

If there is anything else GCC can do that is *not* a huge problem for
all our users, we can talk about that of course (but on the GCC lists!)


Segher
  
Florian Weimer Nov. 9, 2020, 6:32 p.m. UTC | #15
* Segher Boessenkool:

> Hi Florian,
>
> On Thu, Nov 05, 2020 at 12:40:29PM +0100, Florian Weimer wrote:
>> * Segher Boessenkool:
>> > Are you saying that it is *normal* for people to put very different code
>> > into libc like this?  Wow.
>> >
>> >> We have been through this with x86-64 already.  I don't want to produce
>> >> the same bug.
>> >
>> > No code in libc should ever use MMA, imnsho.
>> 
>> Oh, I see now.  I think we don't agree about the scope of the
>> glibc-hwcaps feature.
>
> I did not know about this new feature at all, I thought this was about
> the exiting power10/ etc. directories :-)
>
>> It's going to be used to ELF multilibs in general, not just glibc
>> components.  So a BLAS implementation could use it and drop its
>> implementation DSOs into the appropriate directories.
>> 
>> That's why vector features such as MMA matter in this context.
>
> Sure.
>
>> Given this additional context, I hope we can agree that a rule for
>> programmers like “build with -mcpu=power10 for the power10 glibc-hwcaps
>> subdirectory“ has a lot of value.
>
> But you cannot change what -mcpu=power10 means...  It should keep the
> same scheme as we have used for very long already.

I don't have a problem with selecting the power10 subdirectory for CPUs
which support ISA 3.1 *and* MMA (plus ISA 3.0 plus float128 in
hardware).  This would align glibc with the current GCC option.

The new glibc scheme allows us to add something between power10 and
power9 easily once the need arises (without backwards-incompatible
/etc/ld.so.cache format changes *cough* *cough*).

Thanks,
Florian
  
Tulio Magno Quites Machado Filho Nov. 16, 2020, 2:51 p.m. UTC | #16
Florian Weimer <fweimer@redhat.com> writes:

> GCC defines __MMA__ for -mcpu=power10, and source code will evenually be
> sensitive to that macro.
>
> I think it is important that -mcpu=power10 and the "power10"
> subdirectory mean the same thing.

I agree to stick with -mcpu values.

It was a bad interpretation of my part when I used OpenPOWER Foundation's
documents to explain the availability of features on GCC.  These are different
usages.
GCC uses -mcpu to:

    ... specify a specific processor.  Code generated under those options
    runs best on that processor, and may not run at all on others.

So, -mcpu=power10 is the IBM POWER10 processor that supports MMA.

But then, we're back to this point you had raised:

> I think we need documentation what it means for a processor to implement
> ISA 3.0, and not altivec.

Unfortunately, I think this documentation will only exist after a new -mcpu
value is created.
  
Segher Boessenkool Nov. 16, 2020, 7:35 p.m. UTC | #17
Hi!

On Mon, Nov 16, 2020 at 11:51:27AM -0300, Tulio Magno Quites Machado Filho wrote:
> But then, we're back to this point you had raised:
> 
> > I think we need documentation what it means for a processor to implement
> > ISA 3.0, and not altivec.
> 
> Unfortunately, I think this documentation will only exist after a new -mcpu
> value is created.

Nothing that uses the vector registers can work, and disabling all
instructions that touch a vector register gets you 99.9% there.  This is
completely analoguous with -msoft-float (which really means "no FPRs").

With -mno-altivec you also have no VSCR and VRSAVE registers, just like
you lose FPSCR with -msoft-float.

But yes, this should be documented in the ABI, it matters for the
calling sequences etc.

A "normal" compilation may use the vector registers if the -mcpu= you
used allows that, so it cannot run on hardware without vector regs.  The
compiler can also use vector registers if you did not explicitly ask for
them (again analogous to the floating point registers), if you do not
explicitly forbid it of course.


Segher
  
Florian Weimer Nov. 23, 2020, 10:20 a.m. UTC | #18
* Segher Boessenkool:

> On Mon, Nov 16, 2020 at 11:51:27AM -0300, Tulio Magno Quites Machado Filho wrote:
>> But then, we're back to this point you had raised:
>> 
>> > I think we need documentation what it means for a processor to implement
>> > ISA 3.0, and not altivec.
>> 
>> Unfortunately, I think this documentation will only exist after a new -mcpu
>> value is created.
>
> Nothing that uses the vector registers can work, and disabling all
> instructions that touch a vector register gets you 99.9% there.  This is
> completely analoguous with -msoft-float (which really means "no FPRs").
>
> With -mno-altivec you also have no VSCR and VRSAVE registers, just like
> you lose FPSCR with -msoft-float.
>
> But yes, this should be documented in the ABI, it matters for the
> calling sequences etc.
>
> A "normal" compilation may use the vector registers if the -mcpu= you
> used allows that, so it cannot run on hardware without vector regs.  The
> compiler can also use vector registers if you did not explicitly ask for
> them (again analogous to the floating point registers), if you do not
> explicitly forbid it of course.

Fair enough.  I take it that the subdirectory logic in the patch is
correct.  If changes are needed, they will come in the form of new
subdirectories.

Thanks,
Florian
  

Patch

diff --git a/sysdeps/powerpc/powerpc64/le/Makefile b/sysdeps/powerpc/powerpc64/le/Makefile
index 033dc77b01..74715677ed 100644
--- a/sysdeps/powerpc/powerpc64/le/Makefile
+++ b/sysdeps/powerpc/powerpc64/le/Makefile
@@ -188,3 +188,25 @@  ifeq ($(subdir),nptl)
 CFLAGS-tst-thread_local1.cc += -mno-float128
 CFLAGS-tst-minstack-throw.cc += -mno-float128
 endif
+
+ifeq ($(subdir),elf)
+$(objpfx)tst-glibc-hwcaps: \
+  $(objpfx)markermod2-1.so $(objpfx)markermod3-1.so
+$(objpfx)tst-glibc-hwcaps.out: \
+  $(objpfx)markermod2.so \
+    $(objpfx)glibc-hwcaps/power9/markermod2.so \
+  $(objpfx)markermod3.so \
+    $(objpfx)glibc-hwcaps/power9/markermod3.so \
+    $(objpfx)glibc-hwcaps/power10/markermod3.so \
+
+$(objpfx)glibc-hwcaps/power9/markermod2.so: $(objpfx)markermod2-2.so
+	$(make-target-directory)
+	cp $< $@
+$(objpfx)glibc-hwcaps/power9/markermod3.so: $(objpfx)markermod3-2.so
+	$(make-target-directory)
+	cp $< $@
+$(objpfx)glibc-hwcaps/power10/markermod3.so: $(objpfx)markermod3-3.so
+	$(make-target-directory)
+	cp $< $@
+
+endif # $(subdir) == elf
diff --git a/sysdeps/powerpc/powerpc64/le/dl-hwcaps-subdirs.c b/sysdeps/powerpc/powerpc64/le/dl-hwcaps-subdirs.c
new file mode 100644
index 0000000000..1fa3735a8c
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/dl-hwcaps-subdirs.c
@@ -0,0 +1,39 @@ 
+/* Architecture-specific glibc-hwcaps subdirectories.  powerpc64le version.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <dl-hwcaps.h>
+#include <ldsodefs.h>
+
+const char _dl_hwcaps_subdirs[] = "power10:power9";
+enum { subdirs_count = 2 };
+
+uint32_t
+_dl_hwcaps_subdirs_active (void)
+{
+  int active = 0;
+
+  if ((GLRO (dl_hwcap2) & PPC_FEATURE2_ARCH_3_00) == 0)
+    return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
+  ++active;
+
+  if ((GLRO (dl_hwcap2) & PPC_FEATURE2_ARCH_3_1) == 0)
+    return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
+  ++active;
+
+  return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
+}
diff --git a/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c b/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c
new file mode 100644
index 0000000000..e510fca80a
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c
@@ -0,0 +1,54 @@ 
+/* glibc-hwcaps subdirectory test.  powerpc64le version.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdio.h>
+#include <string.h>
+#include <support/check.h>
+#include <sys/auxv.h>
+#include <sys/param.h>
+
+extern int marker2 (void);
+extern int marker3 (void);
+
+/* Return the POWER level, 8 for the baseline.  */
+static int
+compute_level (void)
+{
+  const char *platform = (const char *) getauxval (AT_PLATFORM);
+  if (strcmp (platform, "power8") == 0)
+    return 8;
+  if (strcmp (platform, "power9") == 0)
+    return 9;
+  if (strcmp (platform, "power10") == 0)
+    return 10;
+  printf ("warning: unrecognized AT_PLATFORM value: %s\n", platform);
+  /* Assume that the new platform supports POWER10.  */
+  return 10;
+}
+
+static int
+do_test (void)
+{
+  int level = compute_level ();
+  printf ("info: detected POWER level: %d\n", level);
+  TEST_COMPARE (marker2 (), MIN (level - 7, 2));
+  TEST_COMPARE (marker3 (), MIN (level - 7, 3));
+  return 0;
+}
+
+#include <support/test-driver.c>