[16/28] elf: Add glibc-hwcaps support for LD_LIBRARY_PATH

Message ID 47cb6998ed91f70f122de115b2e03ea5e82e5884.1601569371.git.fweimer@redhat.com
State Superseded
Headers
Series glibc-hwcaps support |

Commit Message

Florian Weimer Oct. 1, 2020, 4:33 p.m. UTC
  This hacks non-power-set processing into _dl_important_hwcaps.
Once the legacy hwcaps handling goes away, the subdirectory
handling needs to be reworked, but it is premature to do this
while both approaches are still supported.
---
 elf/Makefile               |   5 +-
 elf/dl-hwcaps-subdirs.c    |  29 ++++++++
 elf/dl-hwcaps.c            | 138 +++++++++++++++++++++++++++++++-----
 elf/dl-hwcaps.h            |  83 ++++++++++++++++++++++
 elf/dl-hwcaps_split.c      |  77 ++++++++++++++++++++
 elf/dl-load.c              |   7 +-
 elf/dl-main.h              |  11 ++-
 elf/dl-support.c           |   5 +-
 elf/dl-usage.c             |  68 +++++++++++++++++-
 elf/rtld.c                 |  18 +++++
 elf/tst-dl-hwcaps_split.c  | 139 +++++++++++++++++++++++++++++++++++++
 sysdeps/generic/ldsodefs.h |  20 ++++--
 12 files changed, 570 insertions(+), 30 deletions(-)
 create mode 100644 elf/dl-hwcaps-subdirs.c
 create mode 100644 elf/dl-hwcaps_split.c
 create mode 100644 elf/tst-dl-hwcaps_split.c
  

Comments

Szabolcs Nagy Oct. 8, 2020, 10:13 a.m. UTC | #1
The 10/01/2020 18:33, Florian Weimer via Libc-alpha wrote:
> This hacks non-power-set processing into _dl_important_hwcaps.
> Once the legacy hwcaps handling goes away, the subdirectory
> handling needs to be reworked, but it is premature to do this
> while both approaches are still supported.
...
> +/* Returns a bitmap of active subdirectories in _dl_hwcaps_subdirs.
> +   Bit 0 (the LSB) corresponds to the first substring in
> +   _dl_hwcaps_subdirs, bit 1 to the second substring, and so on.
> +   There is no direct correspondence between HWCAP bitmasks and this
> +   bitmask.  */
> +int32_t _dl_hwcaps_subdirs_active (void) attribute_hidden;

if this returns a bitmask i would use uint32_t

to avoid overflowing shifts and have 32bits available.
  
Paul A. Clarke Oct. 8, 2020, 11:16 p.m. UTC | #2
just a couple of nits found while browsing...

On Thu, Oct 01, 2020 at 06:33:16PM +0200, Florian Weimer via Libc-alpha wrote:
> diff --git a/elf/dl-hwcaps.c b/elf/dl-hwcaps.c
> index 44dbac099f..4de94759a2 100644
> --- a/elf/dl-hwcaps.c
> +++ b/elf/dl-hwcaps.c
> @@ -26,20 +26,97 @@
>  #include <dl-procinfo.h>
>  #include <dl-hwcaps.h>
> 
> +/* This is the result of counting the substrings in a colon-separated
> +   hwcaps string.  */
> +struct count_hwcaps
> +{
> +  /* Number of substrings.  */
> +  size_t count;
> +
> +  /* Sum of the individual substring lengths (without separates or

separators?

> +     null terminators).  */
> +  size_t total_length;
> +
> +  /* Maximum length of an individual substring.  */
> +  size_t maximum_length;
> +};
> +
> +/* Update *COUNTS according to the contents of HWCAPS.  Skip over
> +   entries whose bit is not set in MASK.  */
> +static void
> +count_hwcaps (struct count_hwcaps *counts, const char *hwcaps,
> +	      int32_t bitmask, const char *mask)
> +{
> +  struct dl_hwcaps_split_masked sp;
> +  _dl_hwcaps_split_masked_init (&sp, hwcaps, bitmask, mask);
> +  while (_dl_hwcaps_split_masked (&sp))
> +    {
> +      ++counts->count;
> +      counts->total_length += sp.split.length;
> +      if (sp.split.length > counts->maximum_length)
> +	counts->maximum_length = sp.split.length;
> +    }
> +}
> +
> +/* State for copy_hwcaps.  Must be initialized to point to
> +   the storage areas for the array and the strings themselves.  */
> +struct copy_hwcaps
> +{
> +  struct r_strlenpair *next_pair;
> +  char *next_string;
> +};
> +
> +/* Copy HWCAPS into the string pairs and strings, advancing *TARGET.
> +   Skip over entries whose bit is not set in MASK.  */
> +static void
> +copy_hwcaps (struct copy_hwcaps *target, const char *hwcaps,
> +	     int32_t bitmask, const char *mask)
> +{
> +  struct dl_hwcaps_split_masked sp;
> +  _dl_hwcaps_split_masked_init (&sp, hwcaps, bitmask, mask);
> +  while (_dl_hwcaps_split_masked (&sp))
> +    {
> +      target->next_pair->str = target->next_string;
> +      char *slash = __mempcpy (__mempcpy (target->next_string,
> +					  GLIBC_HWCAPS_PREFIX,
> +					  strlen (GLIBC_HWCAPS_PREFIX)),
> +			       sp.split.segment, sp.split.length);
> +      *slash = '/';
> +      target->next_pair->len
> +	= strlen (GLIBC_HWCAPS_PREFIX) + sp.split.length + 1;
> +      ++target->next_pair;
> +      target->next_string = slash + 1;
> +    }
> +}
> +
>  /* Return an array of useful/necessary hardware capability names.  */
>  const struct r_strlenpair *
> -_dl_important_hwcaps (size_t *sz, size_t *max_capstrlen)
> +_dl_important_hwcaps (const char *glibc_hwcaps_prepend,
> +		      const char *glibc_hwcaps_mask,
> +		      size_t *sz, size_t *max_capstrlen)
>  {
>    uint64_t hwcap_mask = GET_HWCAP_MASK();
>    /* Determine how many important bits are set.  */
>    uint64_t masked = GLRO(dl_hwcap) & hwcap_mask;
>    size_t cnt = GLRO (dl_platform) != NULL;
>    size_t n, m;
> -  size_t total;
>    struct r_strlenpair *result;
>    struct r_strlenpair *rp;
>    char *cp;
> 
> +  /* glibc-hwcaps subdirectories.  These are exempted from the power
> +     set construction below below.  */

remove one "below" :-)

> +  int32_t hwcaps_subdirs_active = _dl_hwcaps_subdirs_active ();
> +  struct count_hwcaps hwcaps_counts =  { 0, };
> +  count_hwcaps (&hwcaps_counts, glibc_hwcaps_prepend, -1, NULL);
> +  count_hwcaps (&hwcaps_counts, _dl_hwcaps_subdirs, hwcaps_subdirs_active,
> +		glibc_hwcaps_mask);
> +
> +  /* Each hwcaps subdirectory has a GLIBC_HWCAPS_PREFIX string prefix
> +     and a "/" suffix once stored in the result.  */
> +  size_t total = (hwcaps_counts.count * (strlen (GLIBC_HWCAPS_PREFIX) + 1)
> +		  + hwcaps_counts.total_length);
> +
>    /* Count the number of bits set in the masked value.  */
>    for (n = 0; (~((1ULL << n) - 1) & masked) != 0; ++n)
>      if ((masked & (1ULL << n)) != 0)

> diff --git a/elf/dl-hwcaps.h b/elf/dl-hwcaps.h
> index b66da59b89..a6453f15f3 100644
> --- a/elf/dl-hwcaps.h
> +++ b/elf/dl-hwcaps.h
> @@ -28,3 +33,81 @@
>  #  define GET_HWCAP_MASK() (0)
>  # endif
>  #endif
> +
> +#define GLIBC_HWCAPS_SUBDIRECTORY "glibc-hwcaps"
> +#define GLIBC_HWCAPS_PREFIX GLIBC_HWCAPS_SUBDIRECTORY "/"
> +
> +/* Used by _dl_hwcaps_split below, to split strings at ':'
> +   separators.  */
> +struct dl_hwcaps_split
> +{
> +  const char *segment;          /* Start of the current segment.  */
> +  size_t length;                /* Number of bytes until ':' or NUL.  */
> +};
> +
> +/* Prepare *S to parse SUBJECT, for future _dl_hwcaps_split calls.  If
> +   SUBJECT is NULL, it is treated as the empty string.  */
> +static inline void
> +_dl_hwcaps_split_init (struct dl_hwcaps_split *s, const char *subject)
> +{
> +  s->segment = subject;
> +  /* The initial call to _dl_hwcaps_split will not skip anything.  */
> +  s->length = 0;
> +}
> +
> +/* Extract the next non-empty string segment, up to ':' or the null
> +   terminator.  Return true if one more segment was found, or false if
> +   the end of the string was reached.  On success, S->segment is the
> +   start of the segment found, and S->length is its length.
> +   (Typically, S->segment[S->length] is not null.)  */
> +_Bool _dl_hwcaps_split (struct dl_hwcaps_split *s) attribute_hidden;
> +
> +/* Similar to dl_hwcaps_split, but with bit-based and name-based
> +   masking.  */
> +struct dl_hwcaps_split_masked
> +{
> +  struct dl_hwcaps_split split;
> +
> +  /* For used by the iterator implementation.  */
> +  const char *mask;
> +  int32_t bitmask;
> +};
> +
> +/* Prepare *S for iteration with _dl_hwcaps_split_masked.  Only HWCAP
> +   names in SUBJECT whose bit is set in BITMASK and whose ane is in

s/ane/name/ ?

> +   MASK will be returned.  SUBJECT must not contain empty HWCAP names.
> +   If MASK is NULL, no name-based masking is applied.  Likewise for
> +   BITMASK if BITMASK is -1 (infinite number of bits).  */
> +static inline void
> +_dl_hwcaps_split_masked_init (struct dl_hwcaps_split_masked *s,
> +                              const char *subject,
> +                              int32_t bitmask, const char *mask)
> +{
> +  _dl_hwcaps_split_init (&s->split, subject);
> +  s->bitmask = bitmask;
> +  s->mask = mask;
> +}
> +
> +/* Like _dl_hwcaps_split, but apply masking.  */
> +_Bool _dl_hwcaps_split_masked (struct dl_hwcaps_split_masked *s)
> +  attribute_hidden;
> +
> +/* Returns true if the colon-separated HWCAP list HWCAPS contains the
> +   capability NAME (with length NAME_LENGTH).  If HWCAPS is NULL, the
> +   function returns true.  */
> +_Bool _dl_hwcaps_contains (const char *hwcaps, const char *name,
> +                           size_t name_length) attribute_hidden;
> +
> +/* Colon-separated string of glibc-hwcaps subdirectories, without the
> +   "glibc-hwcaps/" prefix.  The most preferred subdirectory needs to
> +   be listed first.  */
> +extern const char _dl_hwcaps_subdirs[] attribute_hidden;
> +
> +/* Returns a bitmap of active subdirectories in _dl_hwcaps_subdirs.
> +   Bit 0 (the LSB) corresponds to the first substring in
> +   _dl_hwcaps_subdirs, bit 1 to the second substring, and so on.
> +   There is no direct correspondence between HWCAP bitmasks and this
> +   bitmask.  */
> +int32_t _dl_hwcaps_subdirs_active (void) attribute_hidden;
> +
> +#endif /* _DL_HWCAPS_H */

PC
  
Florian Weimer Oct. 9, 2020, 8:56 a.m. UTC | #3
* Paul A. Clarke:

>> +  /* Sum of the individual substring lengths (without separates or
>
> separators?

Fixed.

>> +  /* glibc-hwcaps subdirectories.  These are exempted from the power
>> +     set construction below below.  */
>
> remove one "below" :-)

Done.

>> +/* Prepare *S for iteration with _dl_hwcaps_split_masked.  Only HWCAP
>> +   names in SUBJECT whose bit is set in BITMASK and whose ane is in
>
> s/ane/name/ ?

Right, fixed.

I'll try to address the other comments and repost.  Thanks.

Florian
  
Florian Weimer Oct. 9, 2020, 9:08 a.m. UTC | #4
* Szabolcs Nagy via Libc-alpha:

> The 10/01/2020 18:33, Florian Weimer via Libc-alpha wrote:
>> This hacks non-power-set processing into _dl_important_hwcaps.
>> Once the legacy hwcaps handling goes away, the subdirectory
>> handling needs to be reworked, but it is premature to do this
>> while both approaches are still supported.
> ...
>> +/* Returns a bitmap of active subdirectories in _dl_hwcaps_subdirs.
>> +   Bit 0 (the LSB) corresponds to the first substring in
>> +   _dl_hwcaps_subdirs, bit 1 to the second substring, and so on.
>> +   There is no direct correspondence between HWCAP bitmasks and this
>> +   bitmask.  */
>> +int32_t _dl_hwcaps_subdirs_active (void) attribute_hidden;
>
> if this returns a bitmask i would use uint32_t
>
> to avoid overflowing shifts and have 32bits available.

I did this for alignment with _dl_hwcaps_split_masked_init, where it
would convenient to shift in one bits (so that -1 always means “all
ones”).  But I guess I can turn this into a typedef and use an unsigned
type.

Thanks,
Florian
  
Szabolcs Nagy Oct. 9, 2020, 10:50 a.m. UTC | #5
The 10/09/2020 11:08, Florian Weimer wrote:
> * Szabolcs Nagy via Libc-alpha:
> > The 10/01/2020 18:33, Florian Weimer via Libc-alpha wrote:
> >> This hacks non-power-set processing into _dl_important_hwcaps.
> >> Once the legacy hwcaps handling goes away, the subdirectory
> >> handling needs to be reworked, but it is premature to do this
> >> while both approaches are still supported.
> > ...
> >> +/* Returns a bitmap of active subdirectories in _dl_hwcaps_subdirs.
> >> +   Bit 0 (the LSB) corresponds to the first substring in
> >> +   _dl_hwcaps_subdirs, bit 1 to the second substring, and so on.
> >> +   There is no direct correspondence between HWCAP bitmasks and this
> >> +   bitmask.  */
> >> +int32_t _dl_hwcaps_subdirs_active (void) attribute_hidden;
> >
> > if this returns a bitmask i would use uint32_t
> >
> > to avoid overflowing shifts and have 32bits available.
> 
> I did this for alignment with _dl_hwcaps_split_masked_init, where it
> would convenient to shift in one bits (so that -1 always means “all
> ones”).  But I guess I can turn this into a typedef and use an unsigned
> type.

-1 converted to uint32_t is "all ones" so i don't see a problem there.

i don't think this should be a typedef (that hides
important details about the type where it is used)
  
Florian Weimer Oct. 9, 2020, 10:55 a.m. UTC | #6
* Szabolcs Nagy:

> The 10/09/2020 11:08, Florian Weimer wrote:
>> * Szabolcs Nagy via Libc-alpha:
>> > The 10/01/2020 18:33, Florian Weimer via Libc-alpha wrote:
>> >> This hacks non-power-set processing into _dl_important_hwcaps.
>> >> Once the legacy hwcaps handling goes away, the subdirectory
>> >> handling needs to be reworked, but it is premature to do this
>> >> while both approaches are still supported.
>> > ...
>> >> +/* Returns a bitmap of active subdirectories in _dl_hwcaps_subdirs.
>> >> +   Bit 0 (the LSB) corresponds to the first substring in
>> >> +   _dl_hwcaps_subdirs, bit 1 to the second substring, and so on.
>> >> +   There is no direct correspondence between HWCAP bitmasks and this
>> >> +   bitmask.  */
>> >> +int32_t _dl_hwcaps_subdirs_active (void) attribute_hidden;
>> >
>> > if this returns a bitmask i would use uint32_t
>> >
>> > to avoid overflowing shifts and have 32bits available.
>> 
>> I did this for alignment with _dl_hwcaps_split_masked_init, where it
>> would convenient to shift in one bits (so that -1 always means “all
>> ones”).  But I guess I can turn this into a typedef and use an unsigned
>> type.
>
> -1 converted to uint32_t is "all ones" so i don't see a problem there.
>
> i don't think this should be a typedef (that hides
> important details about the type where it is used)

I don't have a strong opinion on this, but could you please elaborate?
Are you concerned that it would hide the number of available bits?

Thanks,
Florian
  
Szabolcs Nagy Oct. 9, 2020, 11:03 a.m. UTC | #7
The 10/09/2020 12:55, Florian Weimer wrote:
> * Szabolcs Nagy:
> > The 10/09/2020 11:08, Florian Weimer wrote:
> >> * Szabolcs Nagy via Libc-alpha:
> >> > The 10/01/2020 18:33, Florian Weimer via Libc-alpha wrote:
> >> >> This hacks non-power-set processing into _dl_important_hwcaps.
> >> >> Once the legacy hwcaps handling goes away, the subdirectory
> >> >> handling needs to be reworked, but it is premature to do this
> >> >> while both approaches are still supported.
> >> > ...
> >> >> +/* Returns a bitmap of active subdirectories in _dl_hwcaps_subdirs.
> >> >> +   Bit 0 (the LSB) corresponds to the first substring in
> >> >> +   _dl_hwcaps_subdirs, bit 1 to the second substring, and so on.
> >> >> +   There is no direct correspondence between HWCAP bitmasks and this
> >> >> +   bitmask.  */
> >> >> +int32_t _dl_hwcaps_subdirs_active (void) attribute_hidden;
> >> >
> >> > if this returns a bitmask i would use uint32_t
> >> >
> >> > to avoid overflowing shifts and have 32bits available.
> >> 
> >> I did this for alignment with _dl_hwcaps_split_masked_init, where it
> >> would convenient to shift in one bits (so that -1 always means “all
> >> ones”).  But I guess I can turn this into a typedef and use an unsigned
> >> type.
> >
> > -1 converted to uint32_t is "all ones" so i don't see a problem there.
> >
> > i don't think this should be a typedef (that hides
> > important details about the type where it is used)
> 
> I don't have a strong opinion on this, but could you please elaborate?
> Are you concerned that it would hide the number of available bits?

looking at code locally, it is easier
to see what operations are valid if
i know the signedness and bit width of
the integer types involved.

typedef does not help with typesafety,
it sometimes helps documenting the
intent, but in this case i think
uint32_t is appropriate for the internal
bit mask.
  
Adhemerval Zanella Netto Oct. 9, 2020, 1:19 p.m. UTC | #8
On 01/10/2020 13:33, Florian Weimer via Libc-alpha wrote:
> This hacks non-power-set processing into _dl_important_hwcaps.
> Once the legacy hwcaps handling goes away, the subdirectory
> handling needs to be reworked, but it is premature to do this
> while both approaches are still supported.

Looks good, some nits below with the main suggestion being the
bitmask type change as Szabolcs. 

> ---
>  elf/Makefile               |   5 +-
>  elf/dl-hwcaps-subdirs.c    |  29 ++++++++
>  elf/dl-hwcaps.c            | 138 +++++++++++++++++++++++++++++++-----
>  elf/dl-hwcaps.h            |  83 ++++++++++++++++++++++
>  elf/dl-hwcaps_split.c      |  77 ++++++++++++++++++++
>  elf/dl-load.c              |   7 +-
>  elf/dl-main.h              |  11 ++-
>  elf/dl-support.c           |   5 +-
>  elf/dl-usage.c             |  68 +++++++++++++++++-
>  elf/rtld.c                 |  18 +++++
>  elf/tst-dl-hwcaps_split.c  | 139 +++++++++++++++++++++++++++++++++++++
>  sysdeps/generic/ldsodefs.h |  20 ++++--
>  12 files changed, 570 insertions(+), 30 deletions(-)
>  create mode 100644 elf/dl-hwcaps-subdirs.c
>  create mode 100644 elf/dl-hwcaps_split.c
>  create mode 100644 elf/tst-dl-hwcaps_split.c
> 
> diff --git a/elf/Makefile b/elf/Makefile
> index 7b97e773a5..2c36b08c73 100644
> --- a/elf/Makefile
> +++ b/elf/Makefile
> @@ -59,7 +59,8 @@ elide-routines.os = $(all-dl-routines) dl-support enbl-secure dl-origin \
>  # ld.so uses those routines, plus some special stuff for being the program
>  # interpreter and operating independent of libc.
>  rtld-routines	= rtld $(all-dl-routines) dl-sysdep dl-environ dl-minimal \
> -  dl-error-minimal dl-conflict dl-hwcaps dl-usage
> +  dl-error-minimal dl-conflict dl-hwcaps dl-hwcaps_split dl-hwcaps-subdirs \
> +  dl-usage
>  all-rtld-routines = $(rtld-routines) $(sysdep-rtld-routines)
>  
>  CFLAGS-dl-runtime.c += -fexceptions -fasynchronous-unwind-tables

Ok.

> @@ -217,7 +218,7 @@ tests-internal += loadtest unload unload2 circleload1 \
>  	 neededtest neededtest2 neededtest3 neededtest4 \
>  	 tst-tls3 tst-tls6 tst-tls7 tst-tls8 tst-dlmopen2 \
>  	 tst-ptrguard1 tst-stackguard1 tst-libc_dlvsym \
> -	 tst-create_format1 tst-tls-surplus
> +	 tst-create_format1 tst-tls-surplus tst-dl-hwcaps_split
>  tests-container += tst-pldd tst-dlopen-tlsmodid-container \
>    tst-dlopen-self-container
>  test-srcs = tst-pathopt

Ok.

> diff --git a/elf/dl-hwcaps-subdirs.c b/elf/dl-hwcaps-subdirs.c
> new file mode 100644
> index 0000000000..b142a3b826
> --- /dev/null
> +++ b/elf/dl-hwcaps-subdirs.c
> @@ -0,0 +1,29 @@
> +/* Architecture-specific glibc-hwcaps subdirectories.  Generic version.
> +   Copyright (C) 2020 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <dl-hwcaps.h>
> +
> +/* In the generic version, there are no subdirectories defined.  */
> +
> +const char _dl_hwcaps_subdirs[] = "";
> +
> +int32_t
> +_dl_hwcaps_subdirs_active (void)
> +{
> +  return 0;
> +}

Although using a signed does not really afects how you use the bitmask 
on _dl_hwcaps_split_masked (by shift and checking the LSB bit), I tend
to agree with Szabolzs that an unsigned type here would be better.

> diff --git a/elf/dl-hwcaps.c b/elf/dl-hwcaps.c
> index 44dbac099f..4de94759a2 100644
> --- a/elf/dl-hwcaps.c
> +++ b/elf/dl-hwcaps.c
> @@ -26,20 +26,97 @@
>  #include <dl-procinfo.h>
>  #include <dl-hwcaps.h>
>  
> +/* This is the result of counting the substrings in a colon-separated
> +   hwcaps string.  */
> +struct count_hwcaps
> +{
> +  /* Number of substrings.  */
> +  size_t count;
> +
> +  /* Sum of the individual substring lengths (without separates or
> +     null terminators).  */
> +  size_t total_length;
> +
> +  /* Maximum length of an individual substring.  */
> +  size_t maximum_length;
> +};
> +
> +/* Update *COUNTS according to the contents of HWCAPS.  Skip over
> +   entries whose bit is not set in MASK.  */
> +static void
> +count_hwcaps (struct count_hwcaps *counts, const char *hwcaps,
> +	      int32_t bitmask, const char *mask)
> +{
> +  struct dl_hwcaps_split_masked sp;
> +  _dl_hwcaps_split_masked_init (&sp, hwcaps, bitmask, mask);
> +  while (_dl_hwcaps_split_masked (&sp))
> +    {
> +      ++counts->count;
> +      counts->total_length += sp.split.length;
> +      if (sp.split.length > counts->maximum_length)
> +	counts->maximum_length = sp.split.length;
> +    }
> +}
> +

Ok. Maybe define that this updates the input by rename to
'update_count_hwcaps'?

> +/* State for copy_hwcaps.  Must be initialized to point to
> +   the storage areas for the array and the strings themselves.  */
> +struct copy_hwcaps
> +{
> +  struct r_strlenpair *next_pair;
> +  char *next_string;
> +};
> +
> +/* Copy HWCAPS into the string pairs and strings, advancing *TARGET.
> +   Skip over entries whose bit is not set in MASK.  */
> +static void
> +copy_hwcaps (struct copy_hwcaps *target, const char *hwcaps,
> +	     int32_t bitmask, const char *mask)
> +{
> +  struct dl_hwcaps_split_masked sp;
> +  _dl_hwcaps_split_masked_init (&sp, hwcaps, bitmask, mask);
> +  while (_dl_hwcaps_split_masked (&sp))
> +    {
> +      target->next_pair->str = target->next_string;
> +      char *slash = __mempcpy (__mempcpy (target->next_string,
> +					  GLIBC_HWCAPS_PREFIX,
> +					  strlen (GLIBC_HWCAPS_PREFIX)),
> +			       sp.split.segment, sp.split.length);
> +      *slash = '/';
> +      target->next_pair->len
> +	= strlen (GLIBC_HWCAPS_PREFIX) + sp.split.length + 1;
> +      ++target->next_pair;
> +      target->next_string = slash + 1;
> +    }
> +}
> +

Ok.

>  /* Return an array of useful/necessary hardware capability names.  */
>  const struct r_strlenpair *
> -_dl_important_hwcaps (size_t *sz, size_t *max_capstrlen)
> +_dl_important_hwcaps (const char *glibc_hwcaps_prepend,
> +		      const char *glibc_hwcaps_mask,
> +		      size_t *sz, size_t *max_capstrlen)
>  {
>    uint64_t hwcap_mask = GET_HWCAP_MASK();
>    /* Determine how many important bits are set.  */
>    uint64_t masked = GLRO(dl_hwcap) & hwcap_mask;
>    size_t cnt = GLRO (dl_platform) != NULL;
>    size_t n, m;
> -  size_t total;
>    struct r_strlenpair *result;
>    struct r_strlenpair *rp;
>    char *cp;
>  
> +  /* glibc-hwcaps subdirectories.  These are exempted from the power
> +     set construction below below.  */
> +  int32_t hwcaps_subdirs_active = _dl_hwcaps_subdirs_active ();
> +  struct count_hwcaps hwcaps_counts =  { 0, };

Are you trying to outline the first element must be 0 initilized here
with the comma? 

> +  count_hwcaps (&hwcaps_counts, glibc_hwcaps_prepend, -1, NULL);
> +  count_hwcaps (&hwcaps_counts, _dl_hwcaps_subdirs, hwcaps_subdirs_active,
> +		glibc_hwcaps_mask);
> +
> +  /* Each hwcaps subdirectory has a GLIBC_HWCAPS_PREFIX string prefix
> +     and a "/" suffix once stored in the result.  */
> +  size_t total = (hwcaps_counts.count * (strlen (GLIBC_HWCAPS_PREFIX) + 1)
> +		  + hwcaps_counts.total_length);
> +
>    /* Count the number of bits set in the masked value.  */
>    for (n = 0; (~((1ULL << n) - 1) & masked) != 0; ++n)
>      if ((masked & (1ULL << n)) != 0)

Ok.

> @@ -74,10 +151,10 @@ _dl_important_hwcaps (size_t *sz, size_t *max_capstrlen)
>  
>    /* Determine the total size of all strings together.  */
>    if (cnt == 1)
> -    total = temp[0].len + 1;
> +    total += temp[0].len + 1;
>    else
>      {
> -      total = temp[0].len + temp[cnt - 1].len + 2;
> +      total += temp[0].len + temp[cnt - 1].len + 2;
>        if (cnt > 2)
>  	{
>  	  total <<= 1;
> @@ -94,26 +171,48 @@ _dl_important_hwcaps (size_t *sz, size_t *max_capstrlen)
>  	}
>      }
>  
> -  /* The result structure: we use a very compressed way to store the
> -     various combinations of capability names.  */
> -  *sz = 1 << cnt;
> -  result = (struct r_strlenpair *) malloc (*sz * sizeof (*result) + total);
> -  if (result == NULL)
> +  *sz = hwcaps_counts.count + (1 << cnt);
> +
> +  /* This is the overall result, including both glibc-hwcaps
> +     subdirectories and the legacy hwcaps subdirectories using the
> +     power set construction.  */
> +  struct r_strlenpair *overall_result
> +    = malloc (*sz * sizeof (*result) + total);
> +  if (overall_result == NULL)
>      _dl_signal_error (ENOMEM, NULL, NULL,
>  		      N_("cannot create capability list"));
>  
> +  /* Fill in the glibc-hwcaps subdirectories.  */
> +  {
> +    struct copy_hwcaps target;
> +    target.next_pair = overall_result;
> +    target.next_string = (char *) (overall_result + *sz);
> +    copy_hwcaps (&target, glibc_hwcaps_prepend, -1, NULL);
> +    copy_hwcaps (&target, _dl_hwcaps_subdirs,
> +		 hwcaps_subdirs_active, glibc_hwcaps_mask);
> +    /* Set up the write target for the power set construction.  */
> +    result = target.next_pair;
> +    cp = target.next_string;
> +  }
> +
> +

Ok.

> +  /* Power set construction begins here.  We use a very compressed way
> +     to store the various combinations of capability names.  */
> +
>    if (cnt == 1)
>      {
> -      result[0].str = (char *) (result + *sz);
> +      result[0].str = cp;
>        result[0].len = temp[0].len + 1;
> -      result[1].str = (char *) (result + *sz);
> +      result[1].str = cp;
>        result[1].len = 0;
> -      cp = __mempcpy ((char *) (result + *sz), temp[0].str, temp[0].len);
> +      cp = __mempcpy (cp, temp[0].str, temp[0].len);
>        *cp = '/';
> -      *sz = 2;
> -      *max_capstrlen = result[0].len;
> +      if (result[0].len > hwcaps_counts.maximum_length)
> +	*max_capstrlen = result[0].len;
> +      else
> +	*max_capstrlen = hwcaps_counts.maximum_length;
>  
> -      return result;
> +      return overall_result;
>      }
>  
>    /* Fill in the information.  This follows the following scheme

Ok.

> @@ -124,7 +223,7 @@ _dl_important_hwcaps (size_t *sz, size_t *max_capstrlen)
>  	      #3: 0, 3			1001
>       This allows the representation of all possible combinations of
>       capability names in the string.  First generate the strings.  */
> -  result[1].str = result[0].str = cp = (char *) (result + *sz);
> +  result[1].str = result[0].str = cp;
>  #define add(idx) \
>        cp = __mempcpy (__mempcpy (cp, temp[idx].str, temp[idx].len), "/", 1);
>    if (cnt == 2)
> @@ -191,7 +290,10 @@ _dl_important_hwcaps (size_t *sz, size_t *max_capstrlen)
>    while (--n != 0);
>  
>    /* The maximum string length.  */
> -  *max_capstrlen = result[0].len;
> +  if (result[0].len > hwcaps_counts.maximum_length)
> +    *max_capstrlen = result[0].len;
> +  else
> +    *max_capstrlen = hwcaps_counts.maximum_length;
>  
> -  return result;
> +  return overall_result;
>  }

Ok.

> diff --git a/elf/dl-hwcaps.h b/elf/dl-hwcaps.h
> index b66da59b89..a6453f15f3 100644
> --- a/elf/dl-hwcaps.h
> +++ b/elf/dl-hwcaps.h
> @@ -16,6 +16,11 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>  
> +#ifndef _DL_HWCAPS_H
> +#define _DL_HWCAPS_H
> +
> +#include <stdint.h>
> +
>  #include <elf/dl-tunables.h>
>  
>  #if HAVE_TUNABLES
> @@ -28,3 +33,81 @@
>  #  define GET_HWCAP_MASK() (0)
>  # endif
>  #endif
> +
> +#define GLIBC_HWCAPS_SUBDIRECTORY "glibc-hwcaps"
> +#define GLIBC_HWCAPS_PREFIX GLIBC_HWCAPS_SUBDIRECTORY "/"
> +
> +/* Used by _dl_hwcaps_split below, to split strings at ':'
> +   separators.  */
> +struct dl_hwcaps_split
> +{
> +  const char *segment;          /* Start of the current segment.  */
> +  size_t length;                /* Number of bytes until ':' or NUL.  */
> +};
> +
> +/* Prepare *S to parse SUBJECT, for future _dl_hwcaps_split calls.  If
> +   SUBJECT is NULL, it is treated as the empty string.  */
> +static inline void
> +_dl_hwcaps_split_init (struct dl_hwcaps_split *s, const char *subject)
> +{
> +  s->segment = subject;
> +  /* The initial call to _dl_hwcaps_split will not skip anything.  */
> +  s->length = 0;
> +}
> +

Ok.

> +/* Extract the next non-empty string segment, up to ':' or the null
> +   terminator.  Return true if one more segment was found, or false if
> +   the end of the string was reached.  On success, S->segment is the
> +   start of the segment found, and S->length is its length.
> +   (Typically, S->segment[S->length] is not null.)  */
> +_Bool _dl_hwcaps_split (struct dl_hwcaps_split *s) attribute_hidden;
> +
> +/* Similar to dl_hwcaps_split, but with bit-based and name-based
> +   masking.  */
> +struct dl_hwcaps_split_masked
> +{
> +  struct dl_hwcaps_split split;
> +
> +  /* For used by the iterator implementation.  */
> +  const char *mask;
> +  int32_t bitmask;
> +};
> +
> +/* Prepare *S for iteration with _dl_hwcaps_split_masked.  Only HWCAP
> +   names in SUBJECT whose bit is set in BITMASK and whose ane is in

s/ane/one

> +   MASK will be returned.  SUBJECT must not contain empty HWCAP names.
> +   If MASK is NULL, no name-based masking is applied.  Likewise for
> +   BITMASK if BITMASK is -1 (infinite number of bits).  */
> +static inline void
> +_dl_hwcaps_split_masked_init (struct dl_hwcaps_split_masked *s,
> +                              const char *subject,
> +                              int32_t bitmask, const char *mask)
> +{
> +  _dl_hwcaps_split_init (&s->split, subject);
> +  s->bitmask = bitmask;
> +  s->mask = mask;
> +}
> +
> +/* Like _dl_hwcaps_split, but apply masking.  */
> +_Bool _dl_hwcaps_split_masked (struct dl_hwcaps_split_masked *s)
> +  attribute_hidden;
> +
> +/* Returns true if the colon-separated HWCAP list HWCAPS contains the
> +   capability NAME (with length NAME_LENGTH).  If HWCAPS is NULL, the
> +   function returns true.  */
> +_Bool _dl_hwcaps_contains (const char *hwcaps, const char *name,
> +                           size_t name_length) attribute_hidden;
> +
> +/* Colon-separated string of glibc-hwcaps subdirectories, without the
> +   "glibc-hwcaps/" prefix.  The most preferred subdirectory needs to
> +   be listed first.  */
> +extern const char _dl_hwcaps_subdirs[] attribute_hidden;
> +
> +/* Returns a bitmap of active subdirectories in _dl_hwcaps_subdirs.
> +   Bit 0 (the LSB) corresponds to the first substring in
> +   _dl_hwcaps_subdirs, bit 1 to the second substring, and so on.
> +   There is no direct correspondence between HWCAP bitmasks and this
> +   bitmask.  */
> +int32_t _dl_hwcaps_subdirs_active (void) attribute_hidden;
> +
> +#endif /* _DL_HWCAPS_H */

Ok.

> diff --git a/elf/dl-hwcaps_split.c b/elf/dl-hwcaps_split.c
> new file mode 100644
> index 0000000000..95225e9f40
> --- /dev/null
> +++ b/elf/dl-hwcaps_split.c
> @@ -0,0 +1,77 @@
> +/* Hardware capability support for run-time dynamic loader.  String splitting.
> +   Copyright (C) 2020 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <dl-hwcaps.h>
> +#include <stdbool.h>
> +#include <string.h>
> +
> +_Bool
> +_dl_hwcaps_split (struct dl_hwcaps_split *s)
> +{
> +  if (s->segment == NULL)
> +    return false;
> +
> +  /* Skip over the previous segment.   */
> +  s->segment += s->length;
> +
> +  /* Consume delimiters.  This also avoids returning an empty
> +     segment.  */
> +  while (*s->segment == ':')
> +    ++s->segment;
> +  if (*s->segment == '\0')
> +    return false;
> +
> +  /* This could use strchrnul, but we would have to link the function
> +     into ld.so for that.  */
> +  const char *colon = strchr (s->segment, ':');
> +  if (colon == NULL)
> +    s->length = strlen (s->segment);
> +  else
> +    s->length = colon - s->segment;
> +  return true;
> +}

Ok.

> +
> +_Bool
> +_dl_hwcaps_split_masked (struct dl_hwcaps_split_masked *s)
> +{
> +  while (true)
> +    {
> +      if (!_dl_hwcaps_split (&s->split))
> +        return false;
> +      bool active = s->bitmask & 1;
> +      s->bitmask >>= 1;
> +      if (active && _dl_hwcaps_contains (s->mask,
> +                                         s->split.segment, s->split.length))
> +        return true;
> +    }
> +}
> +
> +_Bool
> +_dl_hwcaps_contains (const char *hwcaps, const char *name, size_t name_length)
> +{
> +  if (hwcaps == NULL)
> +    return true;
> +
> +  struct dl_hwcaps_split split;
> +  _dl_hwcaps_split_init (&split, hwcaps);
> +  while (_dl_hwcaps_split (&split))
> +    if (split.length == name_length
> +        && memcmp (split.segment, name, name_length) == 0)
> +      return true;
> +  return false;
> +}

Ok.

> diff --git a/elf/dl-load.c b/elf/dl-load.c
> index f3201e7c14..9020f1646f 100644
> --- a/elf/dl-load.c
> +++ b/elf/dl-load.c
> @@ -682,7 +682,9 @@ cache_rpath (struct link_map *l,
>  
>  
>  void
> -_dl_init_paths (const char *llp, const char *source)
> +_dl_init_paths (const char *llp, const char *source,
> +		const char *glibc_hwcaps_prepend,
> +		const char *glibc_hwcaps_mask)
>  {
>    size_t idx;
>    const char *strp;
> @@ -697,7 +699,8 @@ _dl_init_paths (const char *llp, const char *source)
>  
>  #ifdef SHARED
>    /* Get the capabilities.  */
> -  capstr = _dl_important_hwcaps (&ncapstr, &max_capstrlen);
> +  capstr = _dl_important_hwcaps (glibc_hwcaps_prepend, glibc_hwcaps_mask,
> +				 &ncapstr, &max_capstrlen);
>  #endif
>  
>    /* First set up the rest of the default search directory entries.  */

Ok.

> diff --git a/elf/dl-main.h b/elf/dl-main.h
> index 0df849d3cd..710d29685b 100644
> --- a/elf/dl-main.h
> +++ b/elf/dl-main.h
> @@ -80,6 +80,14 @@ struct dl_main_state
>    /* The preload list passed as a command argument.  */
>    const char *preloadarg;
>  
> +  /* Additional glibc-hwcaps subdirectories to search first.
> +     Colon-separated list.  */
> +  const char *glibc_hwcaps_prepend;
> +
> +  /* Mask for the internal glibc-hwcaps subdirectories.
> +     Colon-separated list.  */
> +  const char *glibc_hwcaps_mask;
> +
>    enum mode mode;
>  
>    /* True if any of the debugging options is enabled.  */

Ok.

> @@ -94,7 +102,8 @@ struct dl_main_state
>  static inline void
>  call_init_paths (const struct dl_main_state *state)
>  {
> -  _dl_init_paths (state->library_path, state->library_path_source);
> +  _dl_init_paths (state->library_path, state->library_path_source,
> +                  state->glibc_hwcaps_prepend, state->glibc_hwcaps_mask);
>  }
>  
>  /* Print ld.so usage information and exit.  */

Ok.

> diff --git a/elf/dl-support.c b/elf/dl-support.c
> index afbc94df54..3264262f4e 100644
> --- a/elf/dl-support.c
> +++ b/elf/dl-support.c
> @@ -323,7 +323,10 @@ _dl_non_dynamic_init (void)
>  
>    /* Initialize the data structures for the search paths for shared
>       objects.  */
> -  _dl_init_paths (getenv ("LD_LIBRARY_PATH"), "LD_LIBRARY_PATH");
> +  _dl_init_paths (getenv ("LD_LIBRARY_PATH"), "LD_LIBRARY_PATH",
> +		  /* No glibc-hwcaps selection support in statically
> +		     linked binaries.  */
> +		  NULL, NULL);
>  
>    /* Remember the last search directory added at startup.  */
>    _dl_init_all_dirs = GL(dl_all_dirs);

Ok.

> diff --git a/elf/dl-usage.c b/elf/dl-usage.c
> index 9765d1b5c1..9a3cbb8b91 100644
> --- a/elf/dl-usage.c
> +++ b/elf/dl-usage.c
> @@ -82,7 +82,7 @@ print_search_path_for_help (struct dl_main_state *state)
>  {
>    if (__rtld_search_dirs.dirs == NULL)
>      /* The run-time search paths have not yet been initialized.  */
> -    _dl_init_paths (state->library_path, state->library_path_source);
> +    call_init_paths (state);
>  
>    _dl_printf ("\nShared library search path:\n");
>  
> @@ -131,6 +131,67 @@ print_hwcap_1_finish (bool *first)
>      _dl_printf (")\n");
>  }
>  
> +/* Print the header for print_hwcaps_subdirectories.  */
> +static void
> +print_hwcaps_subdirectories_header (bool *nothing_printed)
> +{
> +  if (*nothing_printed)
> +    {
> +      _dl_printf ("\n\
> +Subdirectories of glibc-hwcaps directories, in priority order:\n");
> +      *nothing_printed = false;
> +    }
> +}
> +
> +/* Print the HWCAP name itself, indented.  */
> +static void
> +print_hwcaps_subdirectories_name (const struct dl_hwcaps_split *split)
> +{
> +  _dl_write (STDOUT_FILENO, "  ", 2);

Maybe 'strlen ("  ")" here? I usually see to use constants related to
string size erro-prone.

> +  _dl_write (STDOUT_FILENO, split->segment, split->length);
> +}
> +
> +/* Print the list of recognized glibc-hwcaps subdirectories.  */
> +static void
> +print_hwcaps_subdirectories (const struct dl_main_state *state)
> +{
> +  bool nothing_printed = true;
> +  struct dl_hwcaps_split split;
> +
> +  /* The prepended glibc-hwcaps subdirectories.  */
> +  _dl_hwcaps_split_init (&split, state->glibc_hwcaps_prepend);
> +  while (_dl_hwcaps_split (&split))
> +    {
> +      print_hwcaps_subdirectories_header (&nothing_printed);
> +      print_hwcaps_subdirectories_name (&split);
> +      bool first = true;
> +      print_hwcap_1 (&first, true, "searched");
> +      print_hwcap_1_finish (&first);
> +    }
> +
> +  /* The built-in glibc-hwcaps subdirectories.  Do the filtering
> +     manually, so that more precise diagnostics are possible.  */
> +  int32_t mask = _dl_hwcaps_subdirs_active ();
> +  _dl_hwcaps_split_init (&split, _dl_hwcaps_subdirs);
> +  while (_dl_hwcaps_split (&split))
> +    {
> +      print_hwcaps_subdirectories_header (&nothing_printed);
> +      print_hwcaps_subdirectories_name (&split);
> +      bool first = true;
> +      print_hwcap_1 (&first, mask & 1, "supported");
> +      bool listed = _dl_hwcaps_contains (state->glibc_hwcaps_mask,
> +                                         split.segment, split.length);
> +      print_hwcap_1 (&first, !listed, "masked");
> +      print_hwcap_1 (&first, (mask & 1) && listed, "searched");
> +      print_hwcap_1_finish (&first);
> +      mask >>= 1;

As before, I am not very found of this shift on a signed interger.

> +    }
> +
> +  if (nothing_printed)
> +    _dl_printf ("\n\
> +No subdirectories of glibc-hwcaps directories are searched.\n");
> +}
> +

Ok.

>  /* Write a list of hwcap subdirectories to standard output.  See
>   _dl_important_hwcaps in dl-hwcaps.c.  */
>  static void
> @@ -185,6 +246,10 @@ setting environment variables (which would be inherted by subprocesses).\n\
>    --inhibit-cache       Do not use " LD_SO_CACHE "\n\
>    --library-path PATH   use given PATH instead of content of the environment\n\
>                          variable LD_LIBRARY_PATH\n\
> +  --glibc-hwcaps-prepend LIST\n\
> +                        search glibc-hwcaps subdirectories in LIST\n\
> +  --glibc-hwcaps-mask LIST\n\
> +                        only search built-in subdirectories if in LIST\n\
>    --inhibit-rpath LIST  ignore RUNPATH and RPATH information in object names\n\
>                          in LIST\n\
>    --audit LIST          use objects named in LIST as auditors\n\
> @@ -197,6 +262,7 @@ This program interpreter self-identifies as: " RTLD "\n\
>  ",
>                argv0);
>    print_search_path_for_help (state);
> +  print_hwcaps_subdirectories (state);
>    print_legacy_hwcap_directories ();
>    _exit (0);
>  }

Ok.

> diff --git a/elf/rtld.c b/elf/rtld.c
> index e0e8e98c2f..c9d2330364 100644
> --- a/elf/rtld.c
> +++ b/elf/rtld.c
> @@ -289,6 +289,8 @@ dl_main_state_init (struct dl_main_state *state)
>    state->library_path_source = NULL;
>    state->preloadlist = NULL;
>    state->preloadarg = NULL;
> +  state->glibc_hwcaps_prepend = NULL;
> +  state->glibc_hwcaps_mask = NULL;
>    state->mode = normal;
>    state->any_debug = false;
>    state->version_info = false;

Ok.

> @@ -1244,6 +1246,22 @@ dl_main (const ElfW(Phdr) *phdr,
>  	  {
>  	    argv0 = _dl_argv[2];
>  
> +	    _dl_skip_args += 2;
> +	    _dl_argc -= 2;
> +	    _dl_argv += 2;
> +	  }
> +	else if (strcmp (_dl_argv[1], "--glibc-hwcaps-prepend") == 0
> +		 && _dl_argc > 2)
> +	  {
> +	    state.glibc_hwcaps_prepend = _dl_argv[2];
> +	    _dl_skip_args += 2;
> +	    _dl_argc -= 2;
> +	    _dl_argv += 2;
> +	  }
> +	else if (strcmp (_dl_argv[1], "--glibc-hwcaps-mask") == 0
> +		 && _dl_argc > 2)
> +	  {
> +	    state.glibc_hwcaps_mask = _dl_argv[2];
>  	    _dl_skip_args += 2;
>  	    _dl_argc -= 2;
>  	    _dl_argv += 2;

Ok.

> diff --git a/elf/tst-dl-hwcaps_split.c b/elf/tst-dl-hwcaps_split.c
> new file mode 100644
> index 0000000000..929c99a23b
> --- /dev/null
> +++ b/elf/tst-dl-hwcaps_split.c
> @@ -0,0 +1,139 @@
> +/* Unit tests for dl-hwcaps.c.
> +   Copyright (C) 2020 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <array_length.h>
> +#include <dl-hwcaps.h>
> +#include <string.h>
> +#include <support/check.h>
> +
> +static void
> +check_split_masked (const char *input, int32_t bitmask, const char *mask,
> +                    const char *expected[], size_t expected_length)
> +{
> +  struct dl_hwcaps_split_masked split;
> +  _dl_hwcaps_split_masked_init (&split, input, bitmask, mask);
> +  size_t index = 0;
> +  while (_dl_hwcaps_split_masked (&split))
> +    {
> +      TEST_VERIFY_EXIT (index < expected_length);
> +      TEST_COMPARE_BLOB (expected[index], strlen (expected[index]),
> +                         split.split.segment, split.split.length);
> +      ++index;
> +    }
> +  TEST_COMPARE (index, expected_length);
> +}
> +

Ok.

> +static void
> +check_split (const char *input,
> +             const char *expected[], size_t expected_length)
> +{
> +  struct dl_hwcaps_split split;
> +  _dl_hwcaps_split_init (&split, input);
> +  size_t index = 0;
> +  while (_dl_hwcaps_split (&split))
> +    {
> +      TEST_VERIFY_EXIT (index < expected_length);
> +      TEST_COMPARE_BLOB (expected[index], strlen (expected[index]),
> +                         split.segment, split.length);
> +      ++index;
> +    }
> +  TEST_COMPARE (index, expected_length);
> +
> +  /* Reuse the test cases with masking that does not actually remove
> +     anything.  */
> +  check_split_masked (input, -1, NULL, expected, expected_length);
> +  check_split_masked (input, -1, input, expected, expected_length);
> +}
> +

Ok.

> +static int
> +do_test (void)
> +{
> +  /* Splitting tests, without masking.  */
> +  check_split (NULL, NULL, 0);
> +  check_split ("", NULL, 0);
> +  check_split (":", NULL, 0);
> +  check_split ("::", NULL, 0);
> +
> +  {
> +    const char *expected[] = { "first" };
> +    check_split ("first", expected, array_length (expected));
> +    check_split (":first", expected, array_length (expected));
> +    check_split ("first:", expected, array_length (expected));
> +    check_split (":first:", expected, array_length (expected));
> +  }
> +
> +  {
> +    const char *expected[] = { "first", "second" };
> +    check_split ("first:second", expected, array_length (expected));
> +    check_split ("first::second", expected, array_length (expected));
> +    check_split (":first:second", expected, array_length (expected));
> +    check_split ("first:second:", expected, array_length (expected));
> +    check_split (":first:second:", expected, array_length (expected));
> +  }
> +
> +  /* Splitting tests with masking.  */
> +  {
> +    const char *expected[] = { "first" };
> +    check_split_masked ("first", 3, "first:second",
> +                        expected, array_length (expected));
> +    check_split_masked ("first:second", 3, "first:",
> +                        expected, array_length (expected));
> +    check_split_masked ("first:second", 1, NULL,
> +                        expected, array_length (expected));
> +  }
> +  {
> +    const char *expected[] = { "second" };
> +    check_split_masked ("first:second", 3, "second",
> +                        expected, array_length (expected));
> +    check_split_masked ("first:second:third", -1, "second:",
> +                        expected, array_length (expected));
> +    check_split_masked ("first:second", 2, NULL,
> +                        expected, array_length (expected));
> +    check_split_masked ("first:second:third", 2, "first:second",
> +                        expected, array_length (expected));
> +  }
> +
> +  /* Tests for _dl_hwcaps_contains.  */
> +  TEST_VERIFY (_dl_hwcaps_contains (NULL, "first", strlen ("first")));
> +  TEST_VERIFY (_dl_hwcaps_contains (NULL, "", 0));
> +  TEST_VERIFY (! _dl_hwcaps_contains ("", "first", strlen ("first")));
> +  TEST_VERIFY (! _dl_hwcaps_contains ("firs", "first", strlen ("first")));
> +  TEST_VERIFY (_dl_hwcaps_contains ("firs", "first", strlen ("first") - 1));
> +  for (int i = 0; i < strlen ("first"); ++i)
> +    TEST_VERIFY (! _dl_hwcaps_contains ("first", "first", i));
> +  TEST_VERIFY (_dl_hwcaps_contains ("first", "first", strlen ("first")));
> +  TEST_VERIFY (_dl_hwcaps_contains ("first:", "first", strlen ("first")));
> +  TEST_VERIFY (_dl_hwcaps_contains ("first:second",
> +                                    "first", strlen ("first")));
> +  TEST_VERIFY (_dl_hwcaps_contains (":first:second", "first",
> +                                    strlen ("first")));
> +  TEST_VERIFY (_dl_hwcaps_contains ("first:second", "second",
> +                                    strlen ("second")));
> +  TEST_VERIFY (_dl_hwcaps_contains ("first:second:", "second",
> +                                    strlen ("second")));
> +  for (int i = 0; i < strlen ("second"); ++i)
> +    TEST_VERIFY (!_dl_hwcaps_contains ("first:second:", "sec", i));
> +
> +  return 0;
> +}
> +
> +#include <support/test-driver.c>
> +
> +/* Rebuild the sources here because the object file is built for
> +   inclusion into the dynamic loader.  */
> +#include "dl-hwcaps_split.c"

Ok.

> diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
> index 382eeb9be0..0b2babc70c 100644
> --- a/sysdeps/generic/ldsodefs.h
> +++ b/sysdeps/generic/ldsodefs.h
> @@ -1047,8 +1047,13 @@ extern struct r_debug *_dl_debug_initialize (ElfW(Addr) ldbase, Lmid_t ns)
>       attribute_hidden;
>  
>  /* Initialize the basic data structure for the search paths.  SOURCE
> -   is either "LD_LIBRARY_PATH" or "--library-path".  */
> -extern void _dl_init_paths (const char *library_path, const char *source)
> +   is either "LD_LIBRARY_PATH" or "--library-path".
> +   GLIBC_HWCAPS_PREPEND adds additional glibc-hwcaps subdirectories to
> +   search.  GLIBC_HWCAPS_MASK is used to filter the built-in
> +   subdirectories if not NULL.  */
> +extern void _dl_init_paths (const char *library_path, const char *source,
> +			    const char *glibc_hwcaps_prepend,
> +			    const char *glibc_hwcaps_mask)
>    attribute_hidden;
>  

Ok.

>  /* Gather the information needed to install the profiling tables and start
> @@ -1072,9 +1077,14 @@ extern void _dl_show_auxv (void) attribute_hidden;
>  extern char *_dl_next_ld_env_entry (char ***position) attribute_hidden;
>  
>  /* Return an array with the names of the important hardware
> -   capabilities.  The length of the array is written to *SZ, and the
> -   maximum of all strings length is written to *MAX_CAPSTRLEN.  */
> -const struct r_strlenpair *_dl_important_hwcaps (size_t *sz,
> +   capabilities.  PREPEND is a colon-separated list of glibc-hwcaps
> +   directories to search first.  MASK is a colon-separated list used
> +   to filter the built-in glibc-hwcaps subdirectories.  The length of
> +   the array is written to *SZ, and the maximum of all strings length
> +   is written to *MAX_CAPSTRLEN.  */
> +const struct r_strlenpair *_dl_important_hwcaps (const char *prepend,
> +						 const char *mask,
> +						 size_t *sz,
>  						 size_t *max_capstrlen)
>    attribute_hidden;
>  
> 

Ok.
  
Florian Weimer Oct. 12, 2020, 11:54 a.m. UTC | #9
* Adhemerval Zanella via Libc-alpha:

>> +  /* glibc-hwcaps subdirectories.  These are exempted from the power
>> +     set construction below below.  */
>> +  int32_t hwcaps_subdirs_active = _dl_hwcaps_subdirs_active ();
>> +  struct count_hwcaps hwcaps_counts =  { 0, };
>
> Are you trying to outline the first element must be 0 initilized here
> with the comma?

Empty initializers are non-standard.  The , suggests that there is more
than one initializer.  I can drop the 0, if you prefer.
>> +/* Print the HWCAP name itself, indented.  */
>> +static void
>> +print_hwcaps_subdirectories_name (const struct dl_hwcaps_split *split)
>> +{
>> +  _dl_write (STDOUT_FILENO, "  ", 2);
>
> Maybe 'strlen ("  ")" here? I usually see to use constants related to
> string size erro-prone.

I prefer it this way here because it documents that there are exactly
two spaces (not one, not three).

I'm going to send a new patch later today, hopefully addressing Paul
Clarke's comment about magic numbers.  There should also be a new test.

Thanks,
Florian
  

Patch

diff --git a/elf/Makefile b/elf/Makefile
index 7b97e773a5..2c36b08c73 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -59,7 +59,8 @@  elide-routines.os = $(all-dl-routines) dl-support enbl-secure dl-origin \
 # ld.so uses those routines, plus some special stuff for being the program
 # interpreter and operating independent of libc.
 rtld-routines	= rtld $(all-dl-routines) dl-sysdep dl-environ dl-minimal \
-  dl-error-minimal dl-conflict dl-hwcaps dl-usage
+  dl-error-minimal dl-conflict dl-hwcaps dl-hwcaps_split dl-hwcaps-subdirs \
+  dl-usage
 all-rtld-routines = $(rtld-routines) $(sysdep-rtld-routines)
 
 CFLAGS-dl-runtime.c += -fexceptions -fasynchronous-unwind-tables
@@ -217,7 +218,7 @@  tests-internal += loadtest unload unload2 circleload1 \
 	 neededtest neededtest2 neededtest3 neededtest4 \
 	 tst-tls3 tst-tls6 tst-tls7 tst-tls8 tst-dlmopen2 \
 	 tst-ptrguard1 tst-stackguard1 tst-libc_dlvsym \
-	 tst-create_format1 tst-tls-surplus
+	 tst-create_format1 tst-tls-surplus tst-dl-hwcaps_split
 tests-container += tst-pldd tst-dlopen-tlsmodid-container \
   tst-dlopen-self-container
 test-srcs = tst-pathopt
diff --git a/elf/dl-hwcaps-subdirs.c b/elf/dl-hwcaps-subdirs.c
new file mode 100644
index 0000000000..b142a3b826
--- /dev/null
+++ b/elf/dl-hwcaps-subdirs.c
@@ -0,0 +1,29 @@ 
+/* Architecture-specific glibc-hwcaps subdirectories.  Generic version.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <dl-hwcaps.h>
+
+/* In the generic version, there are no subdirectories defined.  */
+
+const char _dl_hwcaps_subdirs[] = "";
+
+int32_t
+_dl_hwcaps_subdirs_active (void)
+{
+  return 0;
+}
diff --git a/elf/dl-hwcaps.c b/elf/dl-hwcaps.c
index 44dbac099f..4de94759a2 100644
--- a/elf/dl-hwcaps.c
+++ b/elf/dl-hwcaps.c
@@ -26,20 +26,97 @@ 
 #include <dl-procinfo.h>
 #include <dl-hwcaps.h>
 
+/* This is the result of counting the substrings in a colon-separated
+   hwcaps string.  */
+struct count_hwcaps
+{
+  /* Number of substrings.  */
+  size_t count;
+
+  /* Sum of the individual substring lengths (without separates or
+     null terminators).  */
+  size_t total_length;
+
+  /* Maximum length of an individual substring.  */
+  size_t maximum_length;
+};
+
+/* Update *COUNTS according to the contents of HWCAPS.  Skip over
+   entries whose bit is not set in MASK.  */
+static void
+count_hwcaps (struct count_hwcaps *counts, const char *hwcaps,
+	      int32_t bitmask, const char *mask)
+{
+  struct dl_hwcaps_split_masked sp;
+  _dl_hwcaps_split_masked_init (&sp, hwcaps, bitmask, mask);
+  while (_dl_hwcaps_split_masked (&sp))
+    {
+      ++counts->count;
+      counts->total_length += sp.split.length;
+      if (sp.split.length > counts->maximum_length)
+	counts->maximum_length = sp.split.length;
+    }
+}
+
+/* State for copy_hwcaps.  Must be initialized to point to
+   the storage areas for the array and the strings themselves.  */
+struct copy_hwcaps
+{
+  struct r_strlenpair *next_pair;
+  char *next_string;
+};
+
+/* Copy HWCAPS into the string pairs and strings, advancing *TARGET.
+   Skip over entries whose bit is not set in MASK.  */
+static void
+copy_hwcaps (struct copy_hwcaps *target, const char *hwcaps,
+	     int32_t bitmask, const char *mask)
+{
+  struct dl_hwcaps_split_masked sp;
+  _dl_hwcaps_split_masked_init (&sp, hwcaps, bitmask, mask);
+  while (_dl_hwcaps_split_masked (&sp))
+    {
+      target->next_pair->str = target->next_string;
+      char *slash = __mempcpy (__mempcpy (target->next_string,
+					  GLIBC_HWCAPS_PREFIX,
+					  strlen (GLIBC_HWCAPS_PREFIX)),
+			       sp.split.segment, sp.split.length);
+      *slash = '/';
+      target->next_pair->len
+	= strlen (GLIBC_HWCAPS_PREFIX) + sp.split.length + 1;
+      ++target->next_pair;
+      target->next_string = slash + 1;
+    }
+}
+
 /* Return an array of useful/necessary hardware capability names.  */
 const struct r_strlenpair *
-_dl_important_hwcaps (size_t *sz, size_t *max_capstrlen)
+_dl_important_hwcaps (const char *glibc_hwcaps_prepend,
+		      const char *glibc_hwcaps_mask,
+		      size_t *sz, size_t *max_capstrlen)
 {
   uint64_t hwcap_mask = GET_HWCAP_MASK();
   /* Determine how many important bits are set.  */
   uint64_t masked = GLRO(dl_hwcap) & hwcap_mask;
   size_t cnt = GLRO (dl_platform) != NULL;
   size_t n, m;
-  size_t total;
   struct r_strlenpair *result;
   struct r_strlenpair *rp;
   char *cp;
 
+  /* glibc-hwcaps subdirectories.  These are exempted from the power
+     set construction below below.  */
+  int32_t hwcaps_subdirs_active = _dl_hwcaps_subdirs_active ();
+  struct count_hwcaps hwcaps_counts =  { 0, };
+  count_hwcaps (&hwcaps_counts, glibc_hwcaps_prepend, -1, NULL);
+  count_hwcaps (&hwcaps_counts, _dl_hwcaps_subdirs, hwcaps_subdirs_active,
+		glibc_hwcaps_mask);
+
+  /* Each hwcaps subdirectory has a GLIBC_HWCAPS_PREFIX string prefix
+     and a "/" suffix once stored in the result.  */
+  size_t total = (hwcaps_counts.count * (strlen (GLIBC_HWCAPS_PREFIX) + 1)
+		  + hwcaps_counts.total_length);
+
   /* Count the number of bits set in the masked value.  */
   for (n = 0; (~((1ULL << n) - 1) & masked) != 0; ++n)
     if ((masked & (1ULL << n)) != 0)
@@ -74,10 +151,10 @@  _dl_important_hwcaps (size_t *sz, size_t *max_capstrlen)
 
   /* Determine the total size of all strings together.  */
   if (cnt == 1)
-    total = temp[0].len + 1;
+    total += temp[0].len + 1;
   else
     {
-      total = temp[0].len + temp[cnt - 1].len + 2;
+      total += temp[0].len + temp[cnt - 1].len + 2;
       if (cnt > 2)
 	{
 	  total <<= 1;
@@ -94,26 +171,48 @@  _dl_important_hwcaps (size_t *sz, size_t *max_capstrlen)
 	}
     }
 
-  /* The result structure: we use a very compressed way to store the
-     various combinations of capability names.  */
-  *sz = 1 << cnt;
-  result = (struct r_strlenpair *) malloc (*sz * sizeof (*result) + total);
-  if (result == NULL)
+  *sz = hwcaps_counts.count + (1 << cnt);
+
+  /* This is the overall result, including both glibc-hwcaps
+     subdirectories and the legacy hwcaps subdirectories using the
+     power set construction.  */
+  struct r_strlenpair *overall_result
+    = malloc (*sz * sizeof (*result) + total);
+  if (overall_result == NULL)
     _dl_signal_error (ENOMEM, NULL, NULL,
 		      N_("cannot create capability list"));
 
+  /* Fill in the glibc-hwcaps subdirectories.  */
+  {
+    struct copy_hwcaps target;
+    target.next_pair = overall_result;
+    target.next_string = (char *) (overall_result + *sz);
+    copy_hwcaps (&target, glibc_hwcaps_prepend, -1, NULL);
+    copy_hwcaps (&target, _dl_hwcaps_subdirs,
+		 hwcaps_subdirs_active, glibc_hwcaps_mask);
+    /* Set up the write target for the power set construction.  */
+    result = target.next_pair;
+    cp = target.next_string;
+  }
+
+
+  /* Power set construction begins here.  We use a very compressed way
+     to store the various combinations of capability names.  */
+
   if (cnt == 1)
     {
-      result[0].str = (char *) (result + *sz);
+      result[0].str = cp;
       result[0].len = temp[0].len + 1;
-      result[1].str = (char *) (result + *sz);
+      result[1].str = cp;
       result[1].len = 0;
-      cp = __mempcpy ((char *) (result + *sz), temp[0].str, temp[0].len);
+      cp = __mempcpy (cp, temp[0].str, temp[0].len);
       *cp = '/';
-      *sz = 2;
-      *max_capstrlen = result[0].len;
+      if (result[0].len > hwcaps_counts.maximum_length)
+	*max_capstrlen = result[0].len;
+      else
+	*max_capstrlen = hwcaps_counts.maximum_length;
 
-      return result;
+      return overall_result;
     }
 
   /* Fill in the information.  This follows the following scheme
@@ -124,7 +223,7 @@  _dl_important_hwcaps (size_t *sz, size_t *max_capstrlen)
 	      #3: 0, 3			1001
      This allows the representation of all possible combinations of
      capability names in the string.  First generate the strings.  */
-  result[1].str = result[0].str = cp = (char *) (result + *sz);
+  result[1].str = result[0].str = cp;
 #define add(idx) \
       cp = __mempcpy (__mempcpy (cp, temp[idx].str, temp[idx].len), "/", 1);
   if (cnt == 2)
@@ -191,7 +290,10 @@  _dl_important_hwcaps (size_t *sz, size_t *max_capstrlen)
   while (--n != 0);
 
   /* The maximum string length.  */
-  *max_capstrlen = result[0].len;
+  if (result[0].len > hwcaps_counts.maximum_length)
+    *max_capstrlen = result[0].len;
+  else
+    *max_capstrlen = hwcaps_counts.maximum_length;
 
-  return result;
+  return overall_result;
 }
diff --git a/elf/dl-hwcaps.h b/elf/dl-hwcaps.h
index b66da59b89..a6453f15f3 100644
--- a/elf/dl-hwcaps.h
+++ b/elf/dl-hwcaps.h
@@ -16,6 +16,11 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
+#ifndef _DL_HWCAPS_H
+#define _DL_HWCAPS_H
+
+#include <stdint.h>
+
 #include <elf/dl-tunables.h>
 
 #if HAVE_TUNABLES
@@ -28,3 +33,81 @@ 
 #  define GET_HWCAP_MASK() (0)
 # endif
 #endif
+
+#define GLIBC_HWCAPS_SUBDIRECTORY "glibc-hwcaps"
+#define GLIBC_HWCAPS_PREFIX GLIBC_HWCAPS_SUBDIRECTORY "/"
+
+/* Used by _dl_hwcaps_split below, to split strings at ':'
+   separators.  */
+struct dl_hwcaps_split
+{
+  const char *segment;          /* Start of the current segment.  */
+  size_t length;                /* Number of bytes until ':' or NUL.  */
+};
+
+/* Prepare *S to parse SUBJECT, for future _dl_hwcaps_split calls.  If
+   SUBJECT is NULL, it is treated as the empty string.  */
+static inline void
+_dl_hwcaps_split_init (struct dl_hwcaps_split *s, const char *subject)
+{
+  s->segment = subject;
+  /* The initial call to _dl_hwcaps_split will not skip anything.  */
+  s->length = 0;
+}
+
+/* Extract the next non-empty string segment, up to ':' or the null
+   terminator.  Return true if one more segment was found, or false if
+   the end of the string was reached.  On success, S->segment is the
+   start of the segment found, and S->length is its length.
+   (Typically, S->segment[S->length] is not null.)  */
+_Bool _dl_hwcaps_split (struct dl_hwcaps_split *s) attribute_hidden;
+
+/* Similar to dl_hwcaps_split, but with bit-based and name-based
+   masking.  */
+struct dl_hwcaps_split_masked
+{
+  struct dl_hwcaps_split split;
+
+  /* For used by the iterator implementation.  */
+  const char *mask;
+  int32_t bitmask;
+};
+
+/* Prepare *S for iteration with _dl_hwcaps_split_masked.  Only HWCAP
+   names in SUBJECT whose bit is set in BITMASK and whose ane is in
+   MASK will be returned.  SUBJECT must not contain empty HWCAP names.
+   If MASK is NULL, no name-based masking is applied.  Likewise for
+   BITMASK if BITMASK is -1 (infinite number of bits).  */
+static inline void
+_dl_hwcaps_split_masked_init (struct dl_hwcaps_split_masked *s,
+                              const char *subject,
+                              int32_t bitmask, const char *mask)
+{
+  _dl_hwcaps_split_init (&s->split, subject);
+  s->bitmask = bitmask;
+  s->mask = mask;
+}
+
+/* Like _dl_hwcaps_split, but apply masking.  */
+_Bool _dl_hwcaps_split_masked (struct dl_hwcaps_split_masked *s)
+  attribute_hidden;
+
+/* Returns true if the colon-separated HWCAP list HWCAPS contains the
+   capability NAME (with length NAME_LENGTH).  If HWCAPS is NULL, the
+   function returns true.  */
+_Bool _dl_hwcaps_contains (const char *hwcaps, const char *name,
+                           size_t name_length) attribute_hidden;
+
+/* Colon-separated string of glibc-hwcaps subdirectories, without the
+   "glibc-hwcaps/" prefix.  The most preferred subdirectory needs to
+   be listed first.  */
+extern const char _dl_hwcaps_subdirs[] attribute_hidden;
+
+/* Returns a bitmap of active subdirectories in _dl_hwcaps_subdirs.
+   Bit 0 (the LSB) corresponds to the first substring in
+   _dl_hwcaps_subdirs, bit 1 to the second substring, and so on.
+   There is no direct correspondence between HWCAP bitmasks and this
+   bitmask.  */
+int32_t _dl_hwcaps_subdirs_active (void) attribute_hidden;
+
+#endif /* _DL_HWCAPS_H */
diff --git a/elf/dl-hwcaps_split.c b/elf/dl-hwcaps_split.c
new file mode 100644
index 0000000000..95225e9f40
--- /dev/null
+++ b/elf/dl-hwcaps_split.c
@@ -0,0 +1,77 @@ 
+/* Hardware capability support for run-time dynamic loader.  String splitting.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <dl-hwcaps.h>
+#include <stdbool.h>
+#include <string.h>
+
+_Bool
+_dl_hwcaps_split (struct dl_hwcaps_split *s)
+{
+  if (s->segment == NULL)
+    return false;
+
+  /* Skip over the previous segment.   */
+  s->segment += s->length;
+
+  /* Consume delimiters.  This also avoids returning an empty
+     segment.  */
+  while (*s->segment == ':')
+    ++s->segment;
+  if (*s->segment == '\0')
+    return false;
+
+  /* This could use strchrnul, but we would have to link the function
+     into ld.so for that.  */
+  const char *colon = strchr (s->segment, ':');
+  if (colon == NULL)
+    s->length = strlen (s->segment);
+  else
+    s->length = colon - s->segment;
+  return true;
+}
+
+_Bool
+_dl_hwcaps_split_masked (struct dl_hwcaps_split_masked *s)
+{
+  while (true)
+    {
+      if (!_dl_hwcaps_split (&s->split))
+        return false;
+      bool active = s->bitmask & 1;
+      s->bitmask >>= 1;
+      if (active && _dl_hwcaps_contains (s->mask,
+                                         s->split.segment, s->split.length))
+        return true;
+    }
+}
+
+_Bool
+_dl_hwcaps_contains (const char *hwcaps, const char *name, size_t name_length)
+{
+  if (hwcaps == NULL)
+    return true;
+
+  struct dl_hwcaps_split split;
+  _dl_hwcaps_split_init (&split, hwcaps);
+  while (_dl_hwcaps_split (&split))
+    if (split.length == name_length
+        && memcmp (split.segment, name, name_length) == 0)
+      return true;
+  return false;
+}
diff --git a/elf/dl-load.c b/elf/dl-load.c
index f3201e7c14..9020f1646f 100644
--- a/elf/dl-load.c
+++ b/elf/dl-load.c
@@ -682,7 +682,9 @@  cache_rpath (struct link_map *l,
 
 
 void
-_dl_init_paths (const char *llp, const char *source)
+_dl_init_paths (const char *llp, const char *source,
+		const char *glibc_hwcaps_prepend,
+		const char *glibc_hwcaps_mask)
 {
   size_t idx;
   const char *strp;
@@ -697,7 +699,8 @@  _dl_init_paths (const char *llp, const char *source)
 
 #ifdef SHARED
   /* Get the capabilities.  */
-  capstr = _dl_important_hwcaps (&ncapstr, &max_capstrlen);
+  capstr = _dl_important_hwcaps (glibc_hwcaps_prepend, glibc_hwcaps_mask,
+				 &ncapstr, &max_capstrlen);
 #endif
 
   /* First set up the rest of the default search directory entries.  */
diff --git a/elf/dl-main.h b/elf/dl-main.h
index 0df849d3cd..710d29685b 100644
--- a/elf/dl-main.h
+++ b/elf/dl-main.h
@@ -80,6 +80,14 @@  struct dl_main_state
   /* The preload list passed as a command argument.  */
   const char *preloadarg;
 
+  /* Additional glibc-hwcaps subdirectories to search first.
+     Colon-separated list.  */
+  const char *glibc_hwcaps_prepend;
+
+  /* Mask for the internal glibc-hwcaps subdirectories.
+     Colon-separated list.  */
+  const char *glibc_hwcaps_mask;
+
   enum mode mode;
 
   /* True if any of the debugging options is enabled.  */
@@ -94,7 +102,8 @@  struct dl_main_state
 static inline void
 call_init_paths (const struct dl_main_state *state)
 {
-  _dl_init_paths (state->library_path, state->library_path_source);
+  _dl_init_paths (state->library_path, state->library_path_source,
+                  state->glibc_hwcaps_prepend, state->glibc_hwcaps_mask);
 }
 
 /* Print ld.so usage information and exit.  */
diff --git a/elf/dl-support.c b/elf/dl-support.c
index afbc94df54..3264262f4e 100644
--- a/elf/dl-support.c
+++ b/elf/dl-support.c
@@ -323,7 +323,10 @@  _dl_non_dynamic_init (void)
 
   /* Initialize the data structures for the search paths for shared
      objects.  */
-  _dl_init_paths (getenv ("LD_LIBRARY_PATH"), "LD_LIBRARY_PATH");
+  _dl_init_paths (getenv ("LD_LIBRARY_PATH"), "LD_LIBRARY_PATH",
+		  /* No glibc-hwcaps selection support in statically
+		     linked binaries.  */
+		  NULL, NULL);
 
   /* Remember the last search directory added at startup.  */
   _dl_init_all_dirs = GL(dl_all_dirs);
diff --git a/elf/dl-usage.c b/elf/dl-usage.c
index 9765d1b5c1..9a3cbb8b91 100644
--- a/elf/dl-usage.c
+++ b/elf/dl-usage.c
@@ -82,7 +82,7 @@  print_search_path_for_help (struct dl_main_state *state)
 {
   if (__rtld_search_dirs.dirs == NULL)
     /* The run-time search paths have not yet been initialized.  */
-    _dl_init_paths (state->library_path, state->library_path_source);
+    call_init_paths (state);
 
   _dl_printf ("\nShared library search path:\n");
 
@@ -131,6 +131,67 @@  print_hwcap_1_finish (bool *first)
     _dl_printf (")\n");
 }
 
+/* Print the header for print_hwcaps_subdirectories.  */
+static void
+print_hwcaps_subdirectories_header (bool *nothing_printed)
+{
+  if (*nothing_printed)
+    {
+      _dl_printf ("\n\
+Subdirectories of glibc-hwcaps directories, in priority order:\n");
+      *nothing_printed = false;
+    }
+}
+
+/* Print the HWCAP name itself, indented.  */
+static void
+print_hwcaps_subdirectories_name (const struct dl_hwcaps_split *split)
+{
+  _dl_write (STDOUT_FILENO, "  ", 2);
+  _dl_write (STDOUT_FILENO, split->segment, split->length);
+}
+
+/* Print the list of recognized glibc-hwcaps subdirectories.  */
+static void
+print_hwcaps_subdirectories (const struct dl_main_state *state)
+{
+  bool nothing_printed = true;
+  struct dl_hwcaps_split split;
+
+  /* The prepended glibc-hwcaps subdirectories.  */
+  _dl_hwcaps_split_init (&split, state->glibc_hwcaps_prepend);
+  while (_dl_hwcaps_split (&split))
+    {
+      print_hwcaps_subdirectories_header (&nothing_printed);
+      print_hwcaps_subdirectories_name (&split);
+      bool first = true;
+      print_hwcap_1 (&first, true, "searched");
+      print_hwcap_1_finish (&first);
+    }
+
+  /* The built-in glibc-hwcaps subdirectories.  Do the filtering
+     manually, so that more precise diagnostics are possible.  */
+  int32_t mask = _dl_hwcaps_subdirs_active ();
+  _dl_hwcaps_split_init (&split, _dl_hwcaps_subdirs);
+  while (_dl_hwcaps_split (&split))
+    {
+      print_hwcaps_subdirectories_header (&nothing_printed);
+      print_hwcaps_subdirectories_name (&split);
+      bool first = true;
+      print_hwcap_1 (&first, mask & 1, "supported");
+      bool listed = _dl_hwcaps_contains (state->glibc_hwcaps_mask,
+                                         split.segment, split.length);
+      print_hwcap_1 (&first, !listed, "masked");
+      print_hwcap_1 (&first, (mask & 1) && listed, "searched");
+      print_hwcap_1_finish (&first);
+      mask >>= 1;
+    }
+
+  if (nothing_printed)
+    _dl_printf ("\n\
+No subdirectories of glibc-hwcaps directories are searched.\n");
+}
+
 /* Write a list of hwcap subdirectories to standard output.  See
  _dl_important_hwcaps in dl-hwcaps.c.  */
 static void
@@ -185,6 +246,10 @@  setting environment variables (which would be inherted by subprocesses).\n\
   --inhibit-cache       Do not use " LD_SO_CACHE "\n\
   --library-path PATH   use given PATH instead of content of the environment\n\
                         variable LD_LIBRARY_PATH\n\
+  --glibc-hwcaps-prepend LIST\n\
+                        search glibc-hwcaps subdirectories in LIST\n\
+  --glibc-hwcaps-mask LIST\n\
+                        only search built-in subdirectories if in LIST\n\
   --inhibit-rpath LIST  ignore RUNPATH and RPATH information in object names\n\
                         in LIST\n\
   --audit LIST          use objects named in LIST as auditors\n\
@@ -197,6 +262,7 @@  This program interpreter self-identifies as: " RTLD "\n\
 ",
               argv0);
   print_search_path_for_help (state);
+  print_hwcaps_subdirectories (state);
   print_legacy_hwcap_directories ();
   _exit (0);
 }
diff --git a/elf/rtld.c b/elf/rtld.c
index e0e8e98c2f..c9d2330364 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -289,6 +289,8 @@  dl_main_state_init (struct dl_main_state *state)
   state->library_path_source = NULL;
   state->preloadlist = NULL;
   state->preloadarg = NULL;
+  state->glibc_hwcaps_prepend = NULL;
+  state->glibc_hwcaps_mask = NULL;
   state->mode = normal;
   state->any_debug = false;
   state->version_info = false;
@@ -1244,6 +1246,22 @@  dl_main (const ElfW(Phdr) *phdr,
 	  {
 	    argv0 = _dl_argv[2];
 
+	    _dl_skip_args += 2;
+	    _dl_argc -= 2;
+	    _dl_argv += 2;
+	  }
+	else if (strcmp (_dl_argv[1], "--glibc-hwcaps-prepend") == 0
+		 && _dl_argc > 2)
+	  {
+	    state.glibc_hwcaps_prepend = _dl_argv[2];
+	    _dl_skip_args += 2;
+	    _dl_argc -= 2;
+	    _dl_argv += 2;
+	  }
+	else if (strcmp (_dl_argv[1], "--glibc-hwcaps-mask") == 0
+		 && _dl_argc > 2)
+	  {
+	    state.glibc_hwcaps_mask = _dl_argv[2];
 	    _dl_skip_args += 2;
 	    _dl_argc -= 2;
 	    _dl_argv += 2;
diff --git a/elf/tst-dl-hwcaps_split.c b/elf/tst-dl-hwcaps_split.c
new file mode 100644
index 0000000000..929c99a23b
--- /dev/null
+++ b/elf/tst-dl-hwcaps_split.c
@@ -0,0 +1,139 @@ 
+/* Unit tests for dl-hwcaps.c.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <array_length.h>
+#include <dl-hwcaps.h>
+#include <string.h>
+#include <support/check.h>
+
+static void
+check_split_masked (const char *input, int32_t bitmask, const char *mask,
+                    const char *expected[], size_t expected_length)
+{
+  struct dl_hwcaps_split_masked split;
+  _dl_hwcaps_split_masked_init (&split, input, bitmask, mask);
+  size_t index = 0;
+  while (_dl_hwcaps_split_masked (&split))
+    {
+      TEST_VERIFY_EXIT (index < expected_length);
+      TEST_COMPARE_BLOB (expected[index], strlen (expected[index]),
+                         split.split.segment, split.split.length);
+      ++index;
+    }
+  TEST_COMPARE (index, expected_length);
+}
+
+static void
+check_split (const char *input,
+             const char *expected[], size_t expected_length)
+{
+  struct dl_hwcaps_split split;
+  _dl_hwcaps_split_init (&split, input);
+  size_t index = 0;
+  while (_dl_hwcaps_split (&split))
+    {
+      TEST_VERIFY_EXIT (index < expected_length);
+      TEST_COMPARE_BLOB (expected[index], strlen (expected[index]),
+                         split.segment, split.length);
+      ++index;
+    }
+  TEST_COMPARE (index, expected_length);
+
+  /* Reuse the test cases with masking that does not actually remove
+     anything.  */
+  check_split_masked (input, -1, NULL, expected, expected_length);
+  check_split_masked (input, -1, input, expected, expected_length);
+}
+
+static int
+do_test (void)
+{
+  /* Splitting tests, without masking.  */
+  check_split (NULL, NULL, 0);
+  check_split ("", NULL, 0);
+  check_split (":", NULL, 0);
+  check_split ("::", NULL, 0);
+
+  {
+    const char *expected[] = { "first" };
+    check_split ("first", expected, array_length (expected));
+    check_split (":first", expected, array_length (expected));
+    check_split ("first:", expected, array_length (expected));
+    check_split (":first:", expected, array_length (expected));
+  }
+
+  {
+    const char *expected[] = { "first", "second" };
+    check_split ("first:second", expected, array_length (expected));
+    check_split ("first::second", expected, array_length (expected));
+    check_split (":first:second", expected, array_length (expected));
+    check_split ("first:second:", expected, array_length (expected));
+    check_split (":first:second:", expected, array_length (expected));
+  }
+
+  /* Splitting tests with masking.  */
+  {
+    const char *expected[] = { "first" };
+    check_split_masked ("first", 3, "first:second",
+                        expected, array_length (expected));
+    check_split_masked ("first:second", 3, "first:",
+                        expected, array_length (expected));
+    check_split_masked ("first:second", 1, NULL,
+                        expected, array_length (expected));
+  }
+  {
+    const char *expected[] = { "second" };
+    check_split_masked ("first:second", 3, "second",
+                        expected, array_length (expected));
+    check_split_masked ("first:second:third", -1, "second:",
+                        expected, array_length (expected));
+    check_split_masked ("first:second", 2, NULL,
+                        expected, array_length (expected));
+    check_split_masked ("first:second:third", 2, "first:second",
+                        expected, array_length (expected));
+  }
+
+  /* Tests for _dl_hwcaps_contains.  */
+  TEST_VERIFY (_dl_hwcaps_contains (NULL, "first", strlen ("first")));
+  TEST_VERIFY (_dl_hwcaps_contains (NULL, "", 0));
+  TEST_VERIFY (! _dl_hwcaps_contains ("", "first", strlen ("first")));
+  TEST_VERIFY (! _dl_hwcaps_contains ("firs", "first", strlen ("first")));
+  TEST_VERIFY (_dl_hwcaps_contains ("firs", "first", strlen ("first") - 1));
+  for (int i = 0; i < strlen ("first"); ++i)
+    TEST_VERIFY (! _dl_hwcaps_contains ("first", "first", i));
+  TEST_VERIFY (_dl_hwcaps_contains ("first", "first", strlen ("first")));
+  TEST_VERIFY (_dl_hwcaps_contains ("first:", "first", strlen ("first")));
+  TEST_VERIFY (_dl_hwcaps_contains ("first:second",
+                                    "first", strlen ("first")));
+  TEST_VERIFY (_dl_hwcaps_contains (":first:second", "first",
+                                    strlen ("first")));
+  TEST_VERIFY (_dl_hwcaps_contains ("first:second", "second",
+                                    strlen ("second")));
+  TEST_VERIFY (_dl_hwcaps_contains ("first:second:", "second",
+                                    strlen ("second")));
+  for (int i = 0; i < strlen ("second"); ++i)
+    TEST_VERIFY (!_dl_hwcaps_contains ("first:second:", "sec", i));
+
+  return 0;
+}
+
+#include <support/test-driver.c>
+
+/* Rebuild the sources here because the object file is built for
+   inclusion into the dynamic loader.  */
+#include "dl-hwcaps_split.c"
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index 382eeb9be0..0b2babc70c 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -1047,8 +1047,13 @@  extern struct r_debug *_dl_debug_initialize (ElfW(Addr) ldbase, Lmid_t ns)
      attribute_hidden;
 
 /* Initialize the basic data structure for the search paths.  SOURCE
-   is either "LD_LIBRARY_PATH" or "--library-path".  */
-extern void _dl_init_paths (const char *library_path, const char *source)
+   is either "LD_LIBRARY_PATH" or "--library-path".
+   GLIBC_HWCAPS_PREPEND adds additional glibc-hwcaps subdirectories to
+   search.  GLIBC_HWCAPS_MASK is used to filter the built-in
+   subdirectories if not NULL.  */
+extern void _dl_init_paths (const char *library_path, const char *source,
+			    const char *glibc_hwcaps_prepend,
+			    const char *glibc_hwcaps_mask)
   attribute_hidden;
 
 /* Gather the information needed to install the profiling tables and start
@@ -1072,9 +1077,14 @@  extern void _dl_show_auxv (void) attribute_hidden;
 extern char *_dl_next_ld_env_entry (char ***position) attribute_hidden;
 
 /* Return an array with the names of the important hardware
-   capabilities.  The length of the array is written to *SZ, and the
-   maximum of all strings length is written to *MAX_CAPSTRLEN.  */
-const struct r_strlenpair *_dl_important_hwcaps (size_t *sz,
+   capabilities.  PREPEND is a colon-separated list of glibc-hwcaps
+   directories to search first.  MASK is a colon-separated list used
+   to filter the built-in glibc-hwcaps subdirectories.  The length of
+   the array is written to *SZ, and the maximum of all strings length
+   is written to *MAX_CAPSTRLEN.  */
+const struct r_strlenpair *_dl_important_hwcaps (const char *prepend,
+						 const char *mask,
+						 size_t *sz,
 						 size_t *max_capstrlen)
   attribute_hidden;