[12/11] s390x: Add Add glibc-hwcaps support

Message ID 87a6v4lzir.fsf@oldenburg2.str.redhat.com
State Superseded
Headers
Series None |

Commit Message

Florian Weimer Nov. 26, 2020, 6:54 p.m. UTC
  Subdirectories z13, z14, z15 can be selected, mostly based on the
level of support for vector instructions.

Compared to the earlier version, this adds tests, slightly more
elaborate AT_HWCAP tests, and _dl_hwcaps_subdirs_build_bitmask is used.

I'm open to using different selection logic.  I couldn't quite figure
out what GCC-generated code actually requires when using, say,
-march=z14.

The tests pass on a z15 and a zEC12 system, both with z/VM.

The patch is also available on the fw/glibc-hwcaps branch.  The
elf/tst-glibc-hwcaps-cache.script patch won't apply against the posted
series.

Thanks,
Florian
---
 elf/Makefile                             |  2 +-
 elf/tst-glibc-hwcaps-cache.script        | 10 ++++
 sysdeps/s390/s390-64/Makefile            | 39 ++++++++++++++++
 sysdeps/s390/s390-64/dl-hwcaps-subdirs.c | 53 ++++++++++++++++++++++
 sysdeps/s390/s390-64/tst-glibc-hwcaps.c  | 78 ++++++++++++++++++++++++++++++++
 5 files changed, 181 insertions(+), 1 deletion(-)
  

Comments

Florian Weimer Dec. 7, 2020, 8:16 a.m. UTC | #1
* Florian Weimer via Libc-alpha:

> diff --git a/elf/tst-glibc-hwcaps-cache.script b/elf/tst-glibc-hwcaps-cache.script
> index 6a4675f9bd..19b06d0adc 100644
> --- a/elf/tst-glibc-hwcaps-cache.script
> +++ b/elf/tst-glibc-hwcaps-cache.script
> @@ -11,6 +11,16 @@ mkdirp 0770 $L/glibc-hwcaps/power10
>  cp $B/elf/libmarkermod3-2.so $L/glibc-hwcaps/power9/libmarkermod3.so
>  cp $B/elf/libmarkermod3-3.so $L/glibc-hwcaps/power10/libmarkermod3.so
>  
> +mkdirp 0770 $L/glibc-hwcaps/z13
> +cp $B/elf/libmarkermod2-2.so $L/glibc-hwcaps/z13/libmarkermod2.so
> +mkdirp 0770 $L/glibc-hwcaps/z14
> +cp $B/elf/libmarkermod3-2.so $L/glibc-hwcaps/z14/libmarkermod3.so
> +cp $B/elf/libmarkermod3-3.so $L/glibc-hwcaps/z14/libmarkermod3.so
> +mkdirp 0770 $L/glibc-hwcaps/z15
> +cp $B/elf/libmarkermod4-2.so $L/glibc-hwcaps/z15/libmarkermod4.so
> +cp $B/elf/libmarkermod4-3.so $L/glibc-hwcaps/z15/libmarkermod4.so
> +cp $B/elf/libmarkermod4-4.so $L/glibc-hwcaps/z15/libmarkermod4.so
> +
>  mkdirp 0770 $L/glibc-hwcaps/x86-64-v2
>  cp $B/elf/libmarkermod2-2.so $L/glibc-hwcaps/x86-64-v2/libmarkermod2.so
>  mkdirp 0770 $L/glibc-hwcaps/x86-64-v3

Stefan pointed out that this should be:

mkdirp 0770 $L/glibc-hwcaps/z13
cp $B/elf/libmarkermod2-2.so $L/glibc-hwcaps/z13/libmarkermod2.so
mkdirp 0770 $L/glibc-hwcaps/z14
cp $B/elf/libmarkermod3-2.so $L/glibc-hwcaps/z13/libmarkermod3.so
cp $B/elf/libmarkermod3-3.so $L/glibc-hwcaps/z14/libmarkermod3.so
mkdirp 0770 $L/glibc-hwcaps/z15
cp $B/elf/libmarkermod4-2.so $L/glibc-hwcaps/z13/libmarkermod4.so
cp $B/elf/libmarkermod4-3.so $L/glibc-hwcaps/z14/libmarkermod4.so
cp $B/elf/libmarkermod4-4.so $L/glibc-hwcaps/z15/libmarkermod4.so

I have fixed it on the fw/glibc-hwcaps branch.

Thanks,
Florian
  
Stefan Liebler Dec. 8, 2020, 3:47 p.m. UTC | #2
On 12/7/20 9:16 AM, Florian Weimer via Libc-alpha wrote:
> * Florian Weimer via Libc-alpha:
> 
>> diff --git a/elf/tst-glibc-hwcaps-cache.script b/elf/tst-glibc-hwcaps-cache.script
>> index 6a4675f9bd..19b06d0adc 100644
>> --- a/elf/tst-glibc-hwcaps-cache.script
>> +++ b/elf/tst-glibc-hwcaps-cache.script
>> @@ -11,6 +11,16 @@ mkdirp 0770 $L/glibc-hwcaps/power10
>>  cp $B/elf/libmarkermod3-2.so $L/glibc-hwcaps/power9/libmarkermod3.so
>>  cp $B/elf/libmarkermod3-3.so $L/glibc-hwcaps/power10/libmarkermod3.so
>>  
>> +mkdirp 0770 $L/glibc-hwcaps/z13
>> +cp $B/elf/libmarkermod2-2.so $L/glibc-hwcaps/z13/libmarkermod2.so
>> +mkdirp 0770 $L/glibc-hwcaps/z14
>> +cp $B/elf/libmarkermod3-2.so $L/glibc-hwcaps/z14/libmarkermod3.so
>> +cp $B/elf/libmarkermod3-3.so $L/glibc-hwcaps/z14/libmarkermod3.so
>> +mkdirp 0770 $L/glibc-hwcaps/z15
>> +cp $B/elf/libmarkermod4-2.so $L/glibc-hwcaps/z15/libmarkermod4.so
>> +cp $B/elf/libmarkermod4-3.so $L/glibc-hwcaps/z15/libmarkermod4.so
>> +cp $B/elf/libmarkermod4-4.so $L/glibc-hwcaps/z15/libmarkermod4.so
>> +
>>  mkdirp 0770 $L/glibc-hwcaps/x86-64-v2
>>  cp $B/elf/libmarkermod2-2.so $L/glibc-hwcaps/x86-64-v2/libmarkermod2.so
>>  mkdirp 0770 $L/glibc-hwcaps/x86-64-v3
> 
> Stefan pointed out that this should be:
> 
> mkdirp 0770 $L/glibc-hwcaps/z13
> cp $B/elf/libmarkermod2-2.so $L/glibc-hwcaps/z13/libmarkermod2.so
> mkdirp 0770 $L/glibc-hwcaps/z14
> cp $B/elf/libmarkermod3-2.so $L/glibc-hwcaps/z13/libmarkermod3.so
> cp $B/elf/libmarkermod3-3.so $L/glibc-hwcaps/z14/libmarkermod3.so
> mkdirp 0770 $L/glibc-hwcaps/z15
> cp $B/elf/libmarkermod4-2.so $L/glibc-hwcaps/z13/libmarkermod4.so
> cp $B/elf/libmarkermod4-3.so $L/glibc-hwcaps/z14/libmarkermod4.so
> cp $B/elf/libmarkermod4-4.so $L/glibc-hwcaps/z15/libmarkermod4.so
> 
> I have fixed it on the fw/glibc-hwcaps branch.
> 
> Thanks,
> Florian
> 

Hi Florian,

I've had a look to your patches. Can you please adjust some lines. Then
this patch is okay for s390x:
- The commit subject-line contains "Add Add"


- If e.g. a machine newer-than-z15 does not have HWCAP_S390_SORT, then
it would fall back to z14:

diff --git a/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c
b/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c
index fa8d2ce1f1..3673808a45 100644
--- a/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c
+++ b/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c
@@ -41,11 +41,12 @@ _dl_hwcaps_subdirs_active (void)
     return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
   ++active;

-  /* z15.  */
+  /* z15.
+     Note: We do not list HWCAP_S390_SORT and HWCAP_S390_DFLT here as,
+     according to the Principles of Operation, those may be replaced or
removed
+     in future.  */
   if (!((GLRO (dl_hwcap) & HWCAP_S390_VXRS_EXT2)
-        && (GLRO (dl_hwcap) & HWCAP_S390_VXRS_PDE)
-        && (GLRO (dl_hwcap) & HWCAP_S390_SORT)
-        && (GLRO (dl_hwcap) & HWCAP_S390_DFLT)))
+        && (GLRO (dl_hwcap) & HWCAP_S390_VXRS_PDE)))
     return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
   ++active;




- I've asked the kernel-guys regarding AT_PLATFORM: The list is
complete, but it never contains archXYZ. This is only available for
binutils/gcc -march=archXYZ.
- If running e.g. on z196 or older, tst-glibc-hwcaps will always fail,
as level would be <= 9 which leads to fails:
TEST_COMPARE (marker2 (), MIN (level - 9, 2));
Therefore compute_level should always return the baseline for older
platforms.
- If we are running on z13 or newer and the kernel was booted with novx,
then AT_PLATFORM is z13 or newer, but _dl_hwcaps_subdirs_active will
return zero and the _dl_hwcaps_subdirs are not searched as HWCAP_S390_VX
and all the other VX.. flags are not set. This leads to a test fail.

diff --git a/sysdeps/s390/s390-64/tst-glibc-hwcaps.c
b/sysdeps/s390/s390-64/tst-glibc-hwcaps.c
index 39f56d0c81..690f0d5fab 100644
--- a/sysdeps/s390/s390-64/tst-glibc-hwcaps.c
+++ b/sysdeps/s390/s390-64/tst-glibc-hwcaps.c
@@ -26,33 +26,37 @@ extern int marker2 (void);
 extern int marker3 (void);
 extern int marker4 (void);

-/* Return the POWER level, 8 for the baseline.  */
+/* Return the arch level, 10 for the baseline libmarkermod*.so's.  */
 static int
 compute_level (void)
 {
   const char *platform = (const char *) getauxval (AT_PLATFORM);

-  int result;
-  if (sscanf (platform, "arch%d", &result) == 1)
-     return result;
-
   /* The arch* versions refer to the edition of the Principles of
      Operation, and they are off by two when compared with the recent
      product names.  (The code below should not be considered an
      accurate mapping to Principles of Operation editions for earlier
      AT_PLATFORM strings).  */
   if (strcmp (platform, "z900") == 0)
-    return 5;
+    return 10;
   if (strcmp (platform, "z990") == 0)
-    return 6;
+    return 10;
   if (strcmp (platform, "z9-109") == 0)
-    return 7;
+    return 10;
   if (strcmp (platform, "z10") == 0)
-    return 8;
+    return 10;
   if (strcmp (platform, "z196") == 0)
-    return 9;
+    return 10;
   if (strcmp (platform, "zEC12") == 0)
     return 10;
+
+  /* If we are running on z13 or newer and the kernel was booted with novx,
+     then AT_PLATFORM is z13 or newer, but _dl_hwcaps_subdirs_active will
+     return zero and the _dl_hwcaps_subdirs are not searched.  */
+  const unsigned long int hwcap = getauxval (AT_HWCAP);
+  if ((hwcap & HWCAP_S390_VX) == 0)
+    return 10;
+
   if (strcmp (platform, "z13") == 0)
     return 11;
   if (strcmp (platform, "z14") == 0)


I've also recognized that if build with gcc 6.5.0, I'll get test-fails:
elf/tst-glibc-hwcaps-cache
elf/tst-glibc-hwcaps-prepend-cache
elf/tst-ldconfig-X
elf/tst-ldconfig-bad-aux-cache
elf/tst-ldconfig-ld_so_conf-update
elf/tst-stringtable

It seems as it always fails with "String table is too large".
I've debugged elf/tst-stringtable into elf/stringtable:185:
else if (__builtin_add_overflow (previous->offset,
                                 previous->length + 1,
                                 &current->offset))
                error (EXIT_FAILURE, 0, _("String table is too large"));

It seems as gcc 6.5.0 __builtin_add_overflow is buggy:
al  %r12,12(%r9)
st  %r12,12(%r7)
jhe 0x1002762 <stringtable_finalize+274>
larl        %r4,0x10050d6
lghi        %r3,0
lghi        %r2,1
brasl       %r14,0x1001bf8 <error@plt>
=> The jump should jump away in case of no overflow.

(gdb) p	*previous (== r9)
$2 = {next = 0x0, length = 3, offset = 7, string = 0x3fffdc0b800 "999"}
(gdb) p	*current (== r7)
$3 = {next = 0x0, length = 10, offset = 11, string = 0x3fffdfbf5c0
"prefix/899"}
(gdb) p	*(uint32_t*) (12 + $r9)
$1 = 7
(gdb) i r r12 cc
r12            0x3ff0000000b	   4393751543819
cc             0x1                 1
=> cc is 1, but "jhe" jumps away for cc==0 or cc==2.
Resulting Condition Code (of al instruction):
0 Result zero; no carry
1 Result not zero; no carry
2 Result zero; carry
3 Result not zero; carry

If build with other GCC versions, I don't see this bug.
Have you recognized such a bug on other archs?


While viewing your previous patches, I've found the following "if" in
commit "elf: Add glibc-hwcaps subdirectory support to ld.so cache
processing":
elf/dl-hwcaps.c:
static void
+sort_priorities_by_name (void)
+{
...
+	int cmp = memcmp (current->name, previous->name, to_compare);
+	if (cmp >= 0
+	    || (cmp == 0 && current->name_length >= previous->name_length))
+	  break;
Is this condition intended? The second part "cmp == 0" will never be
evaluated as in this case, the first part "cmp >= 0" is already true.

Thanks,
Stefan
  
Florian Weimer Dec. 9, 2020, 6:52 p.m. UTC | #3
* Stefan Liebler:

> I've had a look to your patches. Can you please adjust some lines. Then
> this patch is okay for s390x:
> - The commit subject-line contains "Add Add"

Oops, fixed.

> - If e.g. a machine newer-than-z15 does not have HWCAP_S390_SORT, then
> it would fall back to z14:
>
> diff --git a/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c
> b/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c
> index fa8d2ce1f1..3673808a45 100644
> --- a/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c
> +++ b/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c
> @@ -41,11 +41,12 @@ _dl_hwcaps_subdirs_active (void)
>      return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
>    ++active;
>
> -  /* z15.  */
> +  /* z15.
> +     Note: We do not list HWCAP_S390_SORT and HWCAP_S390_DFLT here as,
> +     according to the Principles of Operation, those may be replaced or
> removed
> +     in future.  */
>    if (!((GLRO (dl_hwcap) & HWCAP_S390_VXRS_EXT2)
> -        && (GLRO (dl_hwcap) & HWCAP_S390_VXRS_PDE)
> -        && (GLRO (dl_hwcap) & HWCAP_S390_SORT)
> -        && (GLRO (dl_hwcap) & HWCAP_S390_DFLT)))
> +        && (GLRO (dl_hwcap) & HWCAP_S390_VXRS_PDE)))
>      return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
>    ++active;

Does -march=z15 imply SORT and DFLT?  That bit wasn't clear to me.
If it does not, we should net test for it.

> - I've asked the kernel-guys regarding AT_PLATFORM: The list is
> complete, but it never contains archXYZ. This is only available for
> binutils/gcc -march=archXYZ.

Okay, I went with the current kernel sources and did not check the
history there.

> - If running e.g. on z196 or older, tst-glibc-hwcaps will always fail,
> as level would be <= 9 which leads to fails:
> TEST_COMPARE (marker2 (), MIN (level - 9, 2));
> Therefore compute_level should always return the baseline for older
> platforms.

Oh, good point.

> - If we are running on z13 or newer and the kernel was booted with novx,
> then AT_PLATFORM is z13 or newer, but _dl_hwcaps_subdirs_active will
> return zero and the _dl_hwcaps_subdirs are not searched as HWCAP_S390_VX
> and all the other VX.. flags are not set. This leads to a test fail.

That's quite bad because it breaks the existing AT_PLATFORM subdirectory
for z13 and newer.  (I expect the reason to have such a directory is to
put vectorized code there.)  Given that this is an unsupported
configuration for glibc, maybe the test failure is acceptable.  On the
other hand, if we deprecate the old mechanism (separate discussion),
this becomes a supported configuration, so adapting the test makes
sense.

> I've also recognized that if build with gcc 6.5.0, I'll get test-fails:
> elf/tst-glibc-hwcaps-cache
> elf/tst-glibc-hwcaps-prepend-cache
> elf/tst-ldconfig-X
> elf/tst-ldconfig-bad-aux-cache
> elf/tst-ldconfig-ld_so_conf-update
> elf/tst-stringtable
>
> It seems as it always fails with "String table is too large".
> I've debugged elf/tst-stringtable into elf/stringtable:185:
> else if (__builtin_add_overflow (previous->offset,
>                                  previous->length + 1,
>                                  &current->offset))
>                 error (EXIT_FAILURE, 0, _("String table is too large"));
>
> It seems as gcc 6.5.0 __builtin_add_overflow is buggy:

Sorry, I do not recall a discussion of this particular bug.  I do not
see changes s390.md that would correspond to this (but then I'm not GCC
developer …).

I haven't seen such a failure on other architectures.

> While viewing your previous patches, I've found the following "if" in
> commit "elf: Add glibc-hwcaps subdirectory support to ld.so cache
> processing":
> elf/dl-hwcaps.c:
> static void
> +sort_priorities_by_name (void)
> +{
> ...
> +	int cmp = memcmp (current->name, previous->name, to_compare);
> +	if (cmp >= 0
> +	    || (cmp == 0 && current->name_length >= previous->name_length))
> +	  break;
> Is this condition intended? The second part "cmp == 0" will never be
> evaluated as in this case, the first part "cmp >= 0" is already true.

Thanks, I've pushed a fix for that.

I'll post an update of the s390x patch soon.

Florian
  
Stefan Liebler Dec. 10, 2020, 10:22 a.m. UTC | #4
On 12/9/20 7:52 PM, Florian Weimer wrote:
> * Stefan Liebler:
> 
>> I've had a look to your patches. Can you please adjust some lines. Then
>> this patch is okay for s390x:
>> - The commit subject-line contains "Add Add"
> 
> Oops, fixed.
> 
>> - If e.g. a machine newer-than-z15 does not have HWCAP_S390_SORT, then
>> it would fall back to z14:
>>
>> diff --git a/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c
>> b/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c
>> index fa8d2ce1f1..3673808a45 100644
>> --- a/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c
>> +++ b/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c
>> @@ -41,11 +41,12 @@ _dl_hwcaps_subdirs_active (void)
>>      return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
>>    ++active;
>>
>> -  /* z15.  */
>> +  /* z15.
>> +     Note: We do not list HWCAP_S390_SORT and HWCAP_S390_DFLT here as,
>> +     according to the Principles of Operation, those may be replaced or
>> removed
>> +     in future.  */
>>    if (!((GLRO (dl_hwcap) & HWCAP_S390_VXRS_EXT2)
>> -        && (GLRO (dl_hwcap) & HWCAP_S390_VXRS_PDE)
>> -        && (GLRO (dl_hwcap) & HWCAP_S390_SORT)
>> -        && (GLRO (dl_hwcap) & HWCAP_S390_DFLT)))
>> +        && (GLRO (dl_hwcap) & HWCAP_S390_VXRS_PDE)))
>>      return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
>>    ++active;
> 
> Does -march=z15 imply SORT and DFLT?  That bit wasn't clear to me.
> If it does not, we should net test for it.
No, the corresponding instructions are not emitted by gcc -march=z15.
E.g. dfltcc is manually added to zlib. There is a runtime check if this
facility is available.
> 
>> - I've asked the kernel-guys regarding AT_PLATFORM: The list is
>> complete, but it never contains archXYZ. This is only available for
>> binutils/gcc -march=archXYZ.
> 
> Okay, I went with the current kernel sources and did not check the
> history there.
> 
>> - If running e.g. on z196 or older, tst-glibc-hwcaps will always fail,
>> as level would be <= 9 which leads to fails:
>> TEST_COMPARE (marker2 (), MIN (level - 9, 2));
>> Therefore compute_level should always return the baseline for older
>> platforms.
> 
> Oh, good point.
> 
>> - If we are running on z13 or newer and the kernel was booted with novx,
>> then AT_PLATFORM is z13 or newer, but _dl_hwcaps_subdirs_active will
>> return zero and the _dl_hwcaps_subdirs are not searched as HWCAP_S390_VX
>> and all the other VX.. flags are not set. This leads to a test fail.
> 
> That's quite bad because it breaks the existing AT_PLATFORM subdirectory
> for z13 and newer.  (I expect the reason to have such a directory is to
> put vectorized code there.)  Given that this is an unsupported
> configuration for glibc, maybe the test failure is acceptable.  On the
> other hand, if we deprecate the old mechanism (separate discussion),
> this becomes a supported configuration, so adapting the test makes
> sense.
I assume novx is usually used only for testing purposes. If e.g. RHEL 8
is booted with novx, it would fail on the first vector instruction as
the ALS includes those.

Currently I know, that the dfp-subdirectory was used for libdfp in the
past. Now the distros require a minimum architecture level which always
has support for dfp and the base libdfp library is built with hardware
dfp instructions.

There are also different flavors for libatlas. As far as I know, those
are located in the corresponding vector-hwcap subdirectories instead of
z13/z14/z15.

Do you have an outlook when you plan to deprecate/remove the old mechanism?

> 
>> I've also recognized that if build with gcc 6.5.0, I'll get test-fails:
>> elf/tst-glibc-hwcaps-cache
>> elf/tst-glibc-hwcaps-prepend-cache
>> elf/tst-ldconfig-X
>> elf/tst-ldconfig-bad-aux-cache
>> elf/tst-ldconfig-ld_so_conf-update
>> elf/tst-stringtable
>>
>> It seems as it always fails with "String table is too large".
>> I've debugged elf/tst-stringtable into elf/stringtable:185:
>> else if (__builtin_add_overflow (previous->offset,
>>                                  previous->length + 1,
>>                                  &current->offset))
>>                 error (EXIT_FAILURE, 0, _("String table is too large"));
>>
>> It seems as gcc 6.5.0 __builtin_add_overflow is buggy:
> 
> Sorry, I do not recall a discussion of this particular bug.  I do not
> see changes s390.md that would correspond to this (but then I'm not GCC
> developer …).
> 
> I haven't seen such a failure on other architectures.
I assume, there won't be a further gcc 6 version in future which could
fix it. As ldconfig would fail with "String table is too large" if build
with gcc 6.5, would you recommend to use gcc 7.1 as minumum on
s390/s390x? Currently gcc 6.2 and newer is required in <glibc>/configure.ac.

> 
>> While viewing your previous patches, I've found the following "if" in
>> commit "elf: Add glibc-hwcaps subdirectory support to ld.so cache
>> processing":
>> elf/dl-hwcaps.c:
>> static void
>> +sort_priorities_by_name (void)
>> +{
>> ...
>> +	int cmp = memcmp (current->name, previous->name, to_compare);
>> +	if (cmp >= 0
>> +	    || (cmp == 0 && current->name_length >= previous->name_length))
>> +	  break;
>> Is this condition intended? The second part "cmp == 0" will never be
>> evaluated as in this case, the first part "cmp >= 0" is already true.
> 
> Thanks, I've pushed a fix for that.
> 
> I'll post an update of the s390x patch soon.
> 
> Florian
> 
Thanks,
Stefan
  
Florian Weimer Dec. 10, 2020, 2:54 p.m. UTC | #5
* Stefan Liebler via Libc-alpha:

>> That's quite bad because it breaks the existing AT_PLATFORM subdirectory
>> for z13 and newer.  (I expect the reason to have such a directory is to
>> put vectorized code there.)  Given that this is an unsupported
>> configuration for glibc, maybe the test failure is acceptable.  On the
>> other hand, if we deprecate the old mechanism (separate discussion),
>> this becomes a supported configuration, so adapting the test makes
>> sense.

> I assume novx is usually used only for testing purposes. If e.g. RHEL 8
> is booted with novx, it would fail on the first vector instruction as
> the ALS includes those.
>
> Currently I know, that the dfp-subdirectory was used for libdfp in the
> past. Now the distros require a minimum architecture level which always
> has support for dfp and the base libdfp library is built with hardware
> dfp instructions.
>
> There are also different flavors for libatlas. As far as I know, those
> are located in the corresponding vector-hwcap subdirectories instead of
> z13/z14/z15.
>
> Do you have an outlook when you plan to deprecate/remove the old mechanism?

I'll start a separate thread for this topic once I've dealt with some
urgent priority work on my end.

Thanks,
Florian
  

Patch

diff --git a/elf/Makefile b/elf/Makefile
index 37d3af593c..2b8458a18f 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -1828,7 +1828,7 @@  $(objpfx)argv0test.out: tst-rtld-argv0.sh $(objpfx)ld.so \
 
 # Most likely search subdirectories, for each supported architecture.
 # Used to obtain test coverage wide test coverage.
-glibc-hwcaps-first-subdirs-for-tests = power9 x86-64-v2
+glibc-hwcaps-first-subdirs-for-tests = power9 x86-64-v2 z13
 
 # The test modules are parameterized by preprocessor macros.
 LDFLAGS-libmarkermod1-1.so += -Wl,-soname,libmarkermod1.so
diff --git a/elf/tst-glibc-hwcaps-cache.script b/elf/tst-glibc-hwcaps-cache.script
index 6a4675f9bd..19b06d0adc 100644
--- a/elf/tst-glibc-hwcaps-cache.script
+++ b/elf/tst-glibc-hwcaps-cache.script
@@ -11,6 +11,16 @@  mkdirp 0770 $L/glibc-hwcaps/power10
 cp $B/elf/libmarkermod3-2.so $L/glibc-hwcaps/power9/libmarkermod3.so
 cp $B/elf/libmarkermod3-3.so $L/glibc-hwcaps/power10/libmarkermod3.so
 
+mkdirp 0770 $L/glibc-hwcaps/z13
+cp $B/elf/libmarkermod2-2.so $L/glibc-hwcaps/z13/libmarkermod2.so
+mkdirp 0770 $L/glibc-hwcaps/z14
+cp $B/elf/libmarkermod3-2.so $L/glibc-hwcaps/z14/libmarkermod3.so
+cp $B/elf/libmarkermod3-3.so $L/glibc-hwcaps/z14/libmarkermod3.so
+mkdirp 0770 $L/glibc-hwcaps/z15
+cp $B/elf/libmarkermod4-2.so $L/glibc-hwcaps/z15/libmarkermod4.so
+cp $B/elf/libmarkermod4-3.so $L/glibc-hwcaps/z15/libmarkermod4.so
+cp $B/elf/libmarkermod4-4.so $L/glibc-hwcaps/z15/libmarkermod4.so
+
 mkdirp 0770 $L/glibc-hwcaps/x86-64-v2
 cp $B/elf/libmarkermod2-2.so $L/glibc-hwcaps/x86-64-v2/libmarkermod2.so
 mkdirp 0770 $L/glibc-hwcaps/x86-64-v3
diff --git a/sysdeps/s390/s390-64/Makefile b/sysdeps/s390/s390-64/Makefile
index b4d793bb3d..e5da26871c 100644
--- a/sysdeps/s390/s390-64/Makefile
+++ b/sysdeps/s390/s390-64/Makefile
@@ -6,4 +6,43 @@  ifeq ($(subdir),elf)
 CFLAGS-rtld.c += -Wno-uninitialized -Wno-unused
 CFLAGS-dl-load.c += -Wno-unused
 CFLAGS-dl-reloc.c += -Wno-unused
+
+$(objpfx)tst-glibc-hwcaps: $(objpfx)libmarkermod2-1.so \
+  $(objpfx)libmarkermod3-1.so $(objpfx)libmarkermod4-1.so
+$(objpfx)tst-glibc-hwcaps.out: \
+  $(objpfx)libmarkermod2.so \
+    $(objpfx)glibc-hwcaps/z13/libmarkermod2.so \
+  $(objpfx)libmarkermod3.so \
+    $(objpfx)glibc-hwcaps/z13/libmarkermod3.so \
+    $(objpfx)glibc-hwcaps/z14/libmarkermod3.so \
+  $(objpfx)libmarkermod4.so \
+    $(objpfx)glibc-hwcaps/z13/libmarkermod4.so \
+    $(objpfx)glibc-hwcaps/z14/libmarkermod4.so \
+    $(objpfx)glibc-hwcaps/z15/libmarkermod4.so \
+
+$(objpfx)glibc-hwcaps/z13/libmarkermod2.so: $(objpfx)libmarkermod2-2.so
+	$(make-target-directory)
+	cp $< $@
+$(objpfx)glibc-hwcaps/z13/libmarkermod3.so: $(objpfx)libmarkermod3-2.so
+	$(make-target-directory)
+	cp $< $@
+$(objpfx)glibc-hwcaps/z14/libmarkermod3.so: $(objpfx)libmarkermod3-3.so
+	$(make-target-directory)
+	cp $< $@
+$(objpfx)glibc-hwcaps/z13/libmarkermod4.so: $(objpfx)libmarkermod4-2.so
+	$(make-target-directory)
+	cp $< $@
+$(objpfx)glibc-hwcaps/z14/libmarkermod4.so: $(objpfx)libmarkermod4-3.so
+	$(make-target-directory)
+	cp $< $@
+$(objpfx)glibc-hwcaps/z15/libmarkermod4.so: $(objpfx)libmarkermod4-4.so
+	$(make-target-directory)
+	cp $< $@
+
+ifeq (no,$(build-hardcoded-path-in-tests))
+# This is an ld.so.cache test, and RPATH/RUNPATH in the executable
+# interferes with its test objectives.
+tests-container += tst-glibc-hwcaps-cache
 endif
+
+endif # $(subdir) == elf
diff --git a/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c b/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c
new file mode 100644
index 0000000000..fa8d2ce1f1
--- /dev/null
+++ b/sysdeps/s390/s390-64/dl-hwcaps-subdirs.c
@@ -0,0 +1,53 @@ 
+/* Architecture-specific glibc-hwcaps subdirectories.  s390x version.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <dl-hwcaps.h>
+#include <ldsodefs.h>
+
+const char _dl_hwcaps_subdirs[] = "z15:z14:z13";
+enum { subdirs_count = 3 }; /* Number of components in _dl_hwcaps_subdirs.  */
+
+uint32_t
+_dl_hwcaps_subdirs_active (void)
+{
+  int active = 0;
+
+  /* Test in reverse preference order.  */
+
+  /* z13.  */
+  if (!(GLRO (dl_hwcap) & HWCAP_S390_VX))
+    return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
+  ++active;
+
+  /* z14.  */
+  if (!((GLRO (dl_hwcap) & HWCAP_S390_VXD)
+        && (GLRO (dl_hwcap) & HWCAP_S390_VXE)
+        && (GLRO (dl_hwcap) & HWCAP_S390_GS)))
+    return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
+  ++active;
+
+  /* z15.  */
+  if (!((GLRO (dl_hwcap) & HWCAP_S390_VXRS_EXT2)
+        && (GLRO (dl_hwcap) & HWCAP_S390_VXRS_PDE)
+        && (GLRO (dl_hwcap) & HWCAP_S390_SORT)
+        && (GLRO (dl_hwcap) & HWCAP_S390_DFLT)))
+    return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
+  ++active;
+
+  return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
+}
diff --git a/sysdeps/s390/s390-64/tst-glibc-hwcaps.c b/sysdeps/s390/s390-64/tst-glibc-hwcaps.c
new file mode 100644
index 0000000000..39f56d0c81
--- /dev/null
+++ b/sysdeps/s390/s390-64/tst-glibc-hwcaps.c
@@ -0,0 +1,78 @@ 
+/* glibc-hwcaps subdirectory test.  s390x version.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdio.h>
+#include <string.h>
+#include <support/check.h>
+#include <sys/auxv.h>
+#include <sys/param.h>
+
+extern int marker2 (void);
+extern int marker3 (void);
+extern int marker4 (void);
+
+/* Return the POWER level, 8 for the baseline.  */
+static int
+compute_level (void)
+{
+  const char *platform = (const char *) getauxval (AT_PLATFORM);
+
+  int result;
+  if (sscanf (platform, "arch%d", &result) == 1)
+     return result;
+
+  /* The arch* versions refer to the edition of the Principles of
+     Operation, and they are off by two when compared with the recent
+     product names.  (The code below should not be considered an
+     accurate mapping to Principles of Operation editions for earlier
+     AT_PLATFORM strings).  */
+  if (strcmp (platform, "z900") == 0)
+    return 5;
+  if (strcmp (platform, "z990") == 0)
+    return 6;
+  if (strcmp (platform, "z9-109") == 0)
+    return 7;
+  if (strcmp (platform, "z10") == 0)
+    return 8;
+  if (strcmp (platform, "z196") == 0)
+    return 9;
+  if (strcmp (platform, "zEC12") == 0)
+    return 10;
+  if (strcmp (platform, "z13") == 0)
+    return 11;
+  if (strcmp (platform, "z14") == 0)
+    return 12;
+  if (strcmp (platform, "z15") == 0)
+    return 13;
+  printf ("warning: unrecognized AT_PLATFORM value: %s\n", platform);
+  /* Assume that the new platform supports z15.  */
+  return 13;
+}
+
+static int
+do_test (void)
+{
+  int level = compute_level ();
+  printf ("info: detected architecture level: arch%d\n", level);
+  TEST_COMPARE (marker2 (), MIN (level - 9, 2));
+  TEST_COMPARE (marker3 (), MIN (level - 9, 3));
+  TEST_COMPARE (marker4 (), MIN (level - 9, 4));
+  return 0;
+}
+
+#include <support/test-driver.c>