[v6,19/20] elf: Fix runtime linker auditing on aarch64 (BZ #26643)

Message ID 20211115183734.531155-20-adhemerval.zanella@linaro.org
State Superseded
Headers
Series Multiple rtld-audit fixes |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Adhemerval Zanella Nov. 15, 2021, 6:37 p.m. UTC
  From: Ben Woodard <woodard@redhat.com>

The dynamic linker's auditing has some issues aarch64. with two distinct
problems:

  1. _dl_runtime_resolve was not preserving x8 the indirect result
     location register.

  2. The NEON Q registers pushed onto the stack by _dl_runtime_resolve()
     were twice the size of D registers extracted from the stack frame by
     _dl_runtime_profile().

To fix it the rtld-auditor interfaces is changed for aarch64:

  * LAV_CURRENT is bumped to v2 and loader will fail to load audit
    modules that return a version different than the one supported.

  * The La_aarch64_regs structure was expanded to include x8 and the
    full sized NEON V registers that are required to be preserved by the
    ABI.

  * dl_runtime_profile needed to extract registers saved by
    _dl_runtime_resolve and put them into the new correctly sized
    La_aarch64_regs structure.

  * The return value structure La_aarch64_retval also did not have the
    correctly sized NEON V registers.

Similar to x86, a new La_aarch64_vector type to represent the NEON
register is added on the La_aarch64_regs (so each type can be accessed
easier).

Checked on aarch64-linux-gnu.

Co-authored-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
---
 elf/rtld.c                             |   4 +-
 sysdeps/aarch64/Makefile               |  20 +++
 sysdeps/aarch64/bits/link.h            |  24 ++--
 sysdeps/aarch64/bits/link_lavcurrent.h |  25 ++++
 sysdeps/aarch64/dl-link.sym            |   4 +-
 sysdeps/aarch64/dl-trampoline.S        |  93 ++++++++-----
 sysdeps/aarch64/tst-audit26.c          |  37 ++++++
 sysdeps/aarch64/tst-audit26mod.c       |  33 +++++
 sysdeps/aarch64/tst-audit26mod.h       |  50 +++++++
 sysdeps/aarch64/tst-audit27.c          |  64 +++++++++
 sysdeps/aarch64/tst-audit27mod.c       |  95 ++++++++++++++
 sysdeps/aarch64/tst-audit27mod.h       |  67 ++++++++++
 sysdeps/aarch64/tst-auditmod26.c       |  98 ++++++++++++++
 sysdeps/aarch64/tst-auditmod27.c       | 175 +++++++++++++++++++++++++
 14 files changed, 741 insertions(+), 48 deletions(-)
 create mode 100644 sysdeps/aarch64/bits/link_lavcurrent.h
 create mode 100644 sysdeps/aarch64/tst-audit26.c
 create mode 100644 sysdeps/aarch64/tst-audit26mod.c
 create mode 100644 sysdeps/aarch64/tst-audit26mod.h
 create mode 100644 sysdeps/aarch64/tst-audit27.c
 create mode 100644 sysdeps/aarch64/tst-audit27mod.c
 create mode 100644 sysdeps/aarch64/tst-audit27mod.h
 create mode 100644 sysdeps/aarch64/tst-auditmod26.c
 create mode 100644 sysdeps/aarch64/tst-auditmod27.c
  

Comments

Florian Weimer Dec. 20, 2021, 9:32 p.m. UTC | #1
* Adhemerval Zanella:

> diff --git a/elf/rtld.c b/elf/rtld.c
> index 29a37f51d3..1a13ae6314 100644
> --- a/elf/rtld.c
> +++ b/elf/rtld.c
> @@ -990,10 +990,10 @@ file=%s [%lu]; audit interface function la_version returned zero; ignored.\n",
>        return;
>      }
>  
> -  if (lav > LAV_CURRENT)
> +  if (lav != LAV_CURRENT)
>      {
>        _dl_debug_printf ("\
> -ERROR: audit interface '%s' requires version %d (maximum supported version %d); ignored.\n",
> +ERROR: audit interface '%s' requires version %d (current supported version %d); ignored.\n",
>  			name, lav, LAV_CURRENT);
>        unload_audit_module (dlmargs.map, original_tls_idx);
>        return;

Sorry, this is not what I had in mind.  I'd like to increase LAV_CURRENT
and pretend that the old and new versions have the same behavior, at
least as far as aarch64 is concerned.  We need not write the
compatibility code for the old <link.h> at this time, only if there
actual users out there that demand it.

(However, for the symbind change, I think we should look at the audit
module version.  It is also much easier to implement both behaviors
there, I hope.)

Thanks,
Florian
  
Florian Weimer Dec. 20, 2021, 9:34 p.m. UTC | #2
* Adhemerval Zanella:

> diff --git a/sysdeps/aarch64/bits/link.h b/sysdeps/aarch64/bits/link.h
> index 774bbe5f45..2af90ca6be 100644
> --- a/sysdeps/aarch64/bits/link.h
> +++ b/sysdeps/aarch64/bits/link.h
> @@ -20,23 +20,29 @@
>  # error "Never include <bits/link.h> directly; use <link.h> instead."
>  #endif
>  
> +typedef union
> +{
> +  float s;
> +  double d;
> +  long double q;
> +} La_aarch64_vector;
> +
>  /* Registers for entry into PLT on AArch64.  */
>  typedef struct La_aarch64_regs
>  {
> -  uint64_t lr_xreg[8];
> -  uint64_t lr_dreg[8];
> -  uint64_t lr_sp;
> -  uint64_t lr_lr;
> +  uint64_t          lr_xreg[9];
> +  La_aarch64_vector lr_vreg[8];
> +  uint64_t          lr_sp;
> +  uint64_t          lr_lr;
>  } La_aarch64_regs;

Ultimately this is up to the aarch64 maintainers to review, but I wonder
if it would make sense to extend support to non-standard calling
conventions by preserving more GPRs and vector registers.

(There is also talk of a C++-specific calling convention that uses the
carry flag as part of the function return …)

Thanks,
Florian
  
Adhemerval Zanella Dec. 21, 2021, 2:33 p.m. UTC | #3
On 20/12/2021 18:32, Florian Weimer wrote:
> * Adhemerval Zanella:
> 
>> diff --git a/elf/rtld.c b/elf/rtld.c
>> index 29a37f51d3..1a13ae6314 100644
>> --- a/elf/rtld.c
>> +++ b/elf/rtld.c
>> @@ -990,10 +990,10 @@ file=%s [%lu]; audit interface function la_version returned zero; ignored.\n",
>>        return;
>>      }
>>  
>> -  if (lav > LAV_CURRENT)
>> +  if (lav != LAV_CURRENT)
>>      {
>>        _dl_debug_printf ("\
>> -ERROR: audit interface '%s' requires version %d (maximum supported version %d); ignored.\n",
>> +ERROR: audit interface '%s' requires version %d (current supported version %d); ignored.\n",
>>  			name, lav, LAV_CURRENT);
>>        unload_audit_module (dlmargs.map, original_tls_idx);
>>        return;
> 
> Sorry, this is not what I had in mind.  I'd like to increase LAV_CURRENT
> and pretend that the old and new versions have the same behavior, at
> least as far as aarch64 is concerned.  We need not write the
> compatibility code for the old <link.h> at this time, only if there
> actual users out there that demand it.

Do you mean to only check if lav >= LAV_CURRENT and allow old audit modules
as well? 

At least for aarch64 old audit modules are error-prone and potentially adds
more subtle issues since they do not save/restore some return register that
I don't see any real gain to keep supporting them.

> 
> (However, for the symbind change, I think we should look at the audit
> module version.  It is also much easier to implement both behaviors
> there, I hope.)

For symbind it does make sense, although I am very convinced that users
will really care about it.

> 
> Thanks,
> Florian
>
  
Florian Weimer Dec. 21, 2021, 2:44 p.m. UTC | #4
* Adhemerval Zanella:

> On 20/12/2021 18:32, Florian Weimer wrote:
>> * Adhemerval Zanella:
>> 
>>> diff --git a/elf/rtld.c b/elf/rtld.c
>>> index 29a37f51d3..1a13ae6314 100644
>>> --- a/elf/rtld.c
>>> +++ b/elf/rtld.c
>>> @@ -990,10 +990,10 @@ file=%s [%lu]; audit interface function la_version returned zero; ignored.\n",
>>>        return;
>>>      }
>>>  
>>> -  if (lav > LAV_CURRENT)
>>> +  if (lav != LAV_CURRENT)
>>>      {
>>>        _dl_debug_printf ("\
>>> -ERROR: audit interface '%s' requires version %d (maximum supported version %d); ignored.\n",
>>> +ERROR: audit interface '%s' requires version %d (current supported version %d); ignored.\n",
>>>  			name, lav, LAV_CURRENT);
>>>        unload_audit_module (dlmargs.map, original_tls_idx);
>>>        return;
>> 
>> Sorry, this is not what I had in mind.  I'd like to increase LAV_CURRENT
>> and pretend that the old and new versions have the same behavior, at
>> least as far as aarch64 is concerned.  We need not write the
>> compatibility code for the old <link.h> at this time, only if there
>> actual users out there that demand it.
>
> Do you mean to only check if lav >= LAV_CURRENT and allow old audit modules
> as well?

Yes.

> At least for aarch64 old audit modules are error-prone and potentially adds
> more subtle issues since they do not save/restore some return register that
> I don't see any real gain to keep supporting them.

I disagree.  la_objsearch alone is a significant use case, and I don't
see why it wouldn't work today.  It does not need any
architecture-specific code whatsoever.

Thanks,
Florian
  
Adhemerval Zanella Dec. 21, 2021, 2:48 p.m. UTC | #5
On 21/12/2021 11:44, Florian Weimer wrote:
> * Adhemerval Zanella:
> 
>> On 20/12/2021 18:32, Florian Weimer wrote:
>>> * Adhemerval Zanella:
>>>
>>>> diff --git a/elf/rtld.c b/elf/rtld.c
>>>> index 29a37f51d3..1a13ae6314 100644
>>>> --- a/elf/rtld.c
>>>> +++ b/elf/rtld.c
>>>> @@ -990,10 +990,10 @@ file=%s [%lu]; audit interface function la_version returned zero; ignored.\n",
>>>>        return;
>>>>      }
>>>>  
>>>> -  if (lav > LAV_CURRENT)
>>>> +  if (lav != LAV_CURRENT)
>>>>      {
>>>>        _dl_debug_printf ("\
>>>> -ERROR: audit interface '%s' requires version %d (maximum supported version %d); ignored.\n",
>>>> +ERROR: audit interface '%s' requires version %d (current supported version %d); ignored.\n",
>>>>  			name, lav, LAV_CURRENT);
>>>>        unload_audit_module (dlmargs.map, original_tls_idx);
>>>>        return;
>>>
>>> Sorry, this is not what I had in mind.  I'd like to increase LAV_CURRENT
>>> and pretend that the old and new versions have the same behavior, at
>>> least as far as aarch64 is concerned.  We need not write the
>>> compatibility code for the old <link.h> at this time, only if there
>>> actual users out there that demand it.
>>
>> Do you mean to only check if lav >= LAV_CURRENT and allow old audit modules
>> as well?
> 
> Yes.
> 
>> At least for aarch64 old audit modules are error-prone and potentially adds
>> more subtle issues since they do not save/restore some return register that
>> I don't see any real gain to keep supporting them.
> 
> I disagree.  la_objsearch alone is a significant use case, and I don't
> see why it wouldn't work today.  It does not need any
> architecture-specific code whatsoever.

My main problem is provide a API which undocumented and missing support
where if users tries to replicate what other architecture does it will
shoot in the foot.  I think this is just a broken API and we should
avoid it.
  
Florian Weimer Dec. 21, 2021, 2:54 p.m. UTC | #6
* Adhemerval Zanella:

>>> At least for aarch64 old audit modules are error-prone and potentially adds
>>> more subtle issues since they do not save/restore some return register that
>>> I don't see any real gain to keep supporting them.
>> 
>> I disagree.  la_objsearch alone is a significant use case, and I don't
>> see why it wouldn't work today.  It does not need any
>> architecture-specific code whatsoever.
>
> My main problem is provide a API which undocumented and missing support
> where if users tries to replicate what other architecture does it will
> shoot in the foot.  I think this is just a broken API and we should
> avoid it.

Sorry, which API?

Thanks,
Florian
  
Adhemerval Zanella Dec. 21, 2021, 5:03 p.m. UTC | #7
On 21/12/2021 11:54, Florian Weimer wrote:
> * Adhemerval Zanella:
> 
>>>> At least for aarch64 old audit modules are error-prone and potentially adds
>>>> more subtle issues since they do not save/restore some return register that
>>>> I don't see any real gain to keep supporting them.
>>>
>>> I disagree.  la_objsearch alone is a significant use case, and I don't
>>> see why it wouldn't work today.  It does not need any
>>> architecture-specific code whatsoever.
>>
>> My main problem is provide a API which undocumented and missing support
>> where if users tries to replicate what other architecture does it will
>> shoot in the foot.  I think this is just a broken API and we should
>> avoid it.
> 
> Sorry, which API?

THe audit modules one, making la_objsearch work when the rest of the possible
callbacks functions might trigger undefined behavior (as per BZ#26643) does
not seems to me as as good way forward.  It has not bitten us before because
the user case is quite limited.
  
Florian Weimer Dec. 21, 2021, 5:22 p.m. UTC | #8
* Adhemerval Zanella:

> On 21/12/2021 11:54, Florian Weimer wrote:
>> * Adhemerval Zanella:
>> 
>>>>> At least for aarch64 old audit modules are error-prone and potentially adds
>>>>> more subtle issues since they do not save/restore some return register that
>>>>> I don't see any real gain to keep supporting them.
>>>>
>>>> I disagree.  la_objsearch alone is a significant use case, and I don't
>>>> see why it wouldn't work today.  It does not need any
>>>> architecture-specific code whatsoever.
>>>
>>> My main problem is provide a API which undocumented and missing support
>>> where if users tries to replicate what other architecture does it will
>>> shoot in the foot.  I think this is just a broken API and we should
>>> avoid it.
>> 
>> Sorry, which API?
>
> THe audit modules one, making la_objsearch work when the rest of the possible
> callbacks functions might trigger undefined behavior (as per BZ#26643) does
> not seems to me as as good way forward.  It has not bitten us before because
> the user case is quite limited.

Red Hat has at least one customer that only uses la_objsearch and not
la_symbind (but they don't use aarch64, so they aren't impacted by this
bug either way).

However, you are changing generic code, so what you are proposing
rejects all old audit modules on all architectures.  This is really not
the way to do this.

Let me summarize my recommendation:

. Change LAV_CURRENT to 2.

. Treat la_version return values 1 and 2 the same for now (so > as
  before in the check, not !=).

. *If* a user shows up whose aarch64 audit modules were broken by the
  fix for bug 26643, support two ABIs for the PLT enter/exit hooks.

. Consider issuing more la_symbind callbacks for LAV_CURRENT == 2
  only (BIND_NOW functions and basically all symbols).

Thanks,
Florian
  
Adhemerval Zanella Dec. 21, 2021, 5:38 p.m. UTC | #9
On 21/12/2021 14:22, Florian Weimer wrote:
> * Adhemerval Zanella:
> 
>> On 21/12/2021 11:54, Florian Weimer wrote:
>>> * Adhemerval Zanella:
>>>
>>>>>> At least for aarch64 old audit modules are error-prone and potentially adds
>>>>>> more subtle issues since they do not save/restore some return register that
>>>>>> I don't see any real gain to keep supporting them.
>>>>>
>>>>> I disagree.  la_objsearch alone is a significant use case, and I don't
>>>>> see why it wouldn't work today.  It does not need any
>>>>> architecture-specific code whatsoever.
>>>>
>>>> My main problem is provide a API which undocumented and missing support
>>>> where if users tries to replicate what other architecture does it will
>>>> shoot in the foot.  I think this is just a broken API and we should
>>>> avoid it.
>>>
>>> Sorry, which API?
>>
>> THe audit modules one, making la_objsearch work when the rest of the possible
>> callbacks functions might trigger undefined behavior (as per BZ#26643) does
>> not seems to me as as good way forward.  It has not bitten us before because
>> the user case is quite limited.
> 
> Red Hat has at least one customer that only uses la_objsearch and not
> la_symbind (but they don't use aarch64, so they aren't impacted by this
> bug either way).
> 
> However, you are changing generic code, so what you are proposing
> rejects all old audit modules on all architectures.  This is really not
> the way to do this.

Yes, I agree that changing the generic code was kind unnecessary on most 
architectures.

> 
> Let me summarize my recommendation:
> 
> . Change LAV_CURRENT to 2.
> 
> . Treat la_version return values 1 and 2 the same for now (so > as
>   before in the check, not !=).

And that is what I am doing now.

> 
> . *If* a user shows up whose aarch64 audit modules were broken by the
>   fix for bug 26643, support two ABIs for the PLT enter/exit hooks.

And that is what I really dislike and I want to avoid, I see absolute
no gain about supporting an interface for aarch64 (or any other port,
the issue is only aarch64 is currently showing) that has subtle and
broken API.  

For aarch64 I still think it better to just avoid loading old audit
modules.

> 
> . Consider issuing more la_symbind callbacks for LAV_CURRENT == 2
>   only (BIND_NOW functions and basically all symbols).
> 
What about newer audit module version that request PLT trace? Currently
my plan is to stop execution with an error, instead of just ignoring it.
  
Szabolcs Nagy Dec. 21, 2021, 5:47 p.m. UTC | #10
The 11/15/2021 15:37, Adhemerval Zanella via Libc-alpha wrote:
> --- a/sysdeps/aarch64/bits/link.h
> +++ b/sysdeps/aarch64/bits/link.h
> @@ -20,23 +20,29 @@
>  # error "Never include <bits/link.h> directly; use <link.h> instead."
>  #endif
>  
> +typedef union
> +{
> +  float s;
> +  double d;
> +  long double q;
> +} La_aarch64_vector;
> +
>  /* Registers for entry into PLT on AArch64.  */
>  typedef struct La_aarch64_regs
>  {
> -  uint64_t lr_xreg[8];
> -  uint64_t lr_dreg[8];
> -  uint64_t lr_sp;
> -  uint64_t lr_lr;
> +  uint64_t          lr_xreg[9];
> +  La_aarch64_vector lr_vreg[8];
> +  uint64_t          lr_sp;
> +  uint64_t          lr_lr;
>  } La_aarch64_regs;
>  
>  /* Return values for calls from PLT on AArch64.  */
>  typedef struct La_aarch64_retval
>  {
> -  /* Up to two integer registers can be used for a return value.  */
> -  uint64_t lrv_xreg[2];
> -  /* Up to four D registers can be used for a return value.  */
> -  uint64_t lrv_dreg[4];
> -
> +  /* Up to eight integer registers can be used for a return value.  */
> +  uint64_t          lrv_xreg[8];
> +  /* Up to eight V registers can be used for a return value.  */
> +  La_aarch64_vector lrv_vreg[8];
>  } La_aarch64_retval;
>  __BEGIN_DECLS


i think for variant_pcs we will only need a

void *extension;

or similar member at the end of both structs.

(we can make it point to a dump of all registers
in some format for variant_pcs PLT hooks)

if this can avoid a second bump of LAV_CURRENT
when we add variant_pcs support then it may be
useful to have. (if this turns out to be not
enough then we don't lose much, but it also
means users have to check extension==0 for now
and avoid interpreting or touching the registers
otherwise. not sure if this is better or bump,
depends on how many users need to keep updating
their auditor libs on our bumps.)
  
Adhemerval Zanella Dec. 21, 2021, 5:49 p.m. UTC | #11
On 21/12/2021 14:47, Szabolcs Nagy wrote:
> The 11/15/2021 15:37, Adhemerval Zanella via Libc-alpha wrote:
>> --- a/sysdeps/aarch64/bits/link.h
>> +++ b/sysdeps/aarch64/bits/link.h
>> @@ -20,23 +20,29 @@
>>  # error "Never include <bits/link.h> directly; use <link.h> instead."
>>  #endif
>>  
>> +typedef union
>> +{
>> +  float s;
>> +  double d;
>> +  long double q;
>> +} La_aarch64_vector;
>> +
>>  /* Registers for entry into PLT on AArch64.  */
>>  typedef struct La_aarch64_regs
>>  {
>> -  uint64_t lr_xreg[8];
>> -  uint64_t lr_dreg[8];
>> -  uint64_t lr_sp;
>> -  uint64_t lr_lr;
>> +  uint64_t          lr_xreg[9];
>> +  La_aarch64_vector lr_vreg[8];
>> +  uint64_t          lr_sp;
>> +  uint64_t          lr_lr;
>>  } La_aarch64_regs;
>>  
>>  /* Return values for calls from PLT on AArch64.  */
>>  typedef struct La_aarch64_retval
>>  {
>> -  /* Up to two integer registers can be used for a return value.  */
>> -  uint64_t lrv_xreg[2];
>> -  /* Up to four D registers can be used for a return value.  */
>> -  uint64_t lrv_dreg[4];
>> -
>> +  /* Up to eight integer registers can be used for a return value.  */
>> +  uint64_t          lrv_xreg[8];
>> +  /* Up to eight V registers can be used for a return value.  */
>> +  La_aarch64_vector lrv_vreg[8];
>>  } La_aarch64_retval;
>>  __BEGIN_DECLS
> 
> 
> i think for variant_pcs we will only need a
> 
> void *extension;
> 
> or similar member at the end of both structs.
> 
> (we can make it point to a dump of all registers
> in some format for variant_pcs PLT hooks)
> 
> if this can avoid a second bump of LAV_CURRENT
> when we add variant_pcs support then it may be
> useful to have. (if this turns out to be not
> enough then we don't lose much, but it also
> means users have to check extension==0 for now
> and avoid interpreting or touching the registers
> otherwise. not sure if this is better or bump,
> depends on how many users need to keep updating
> their auditor libs on our bumps.)

That's my idea as well, I really don't want to bump LAV_CURRENT for
aarch64 on 2.36.
  
Florian Weimer Dec. 21, 2021, 6:11 p.m. UTC | #12
* Adhemerval Zanella:

>> . *If* a user shows up whose aarch64 audit modules were broken by the
>>   fix for bug 26643, support two ABIs for the PLT enter/exit hooks.
>
> And that is what I really dislike and I want to avoid, I see absolute
> no gain about supporting an interface for aarch64 (or any other port,
> the issue is only aarch64 is currently showing) that has subtle and
> broken API.

I agree!  I just want to have an escape hatch in case we have to support
them.  That's why I recommend changing LAV_CURRENT.

> For aarch64 I still think it better to just avoid loading old audit
> modules.

In the end, it is your port, but I strongly advise against this.

>> . Consider issuing more la_symbind callbacks for LAV_CURRENT == 2
>>   only (BIND_NOW functions and basically all symbols).
>> 
> What about newer audit module version that request PLT trace? Currently
> my plan is to stop execution with an error, instead of just ignoring it.

You mean la_version returns 2, a jump slot is available, and the
la_symbind function requests callbacks?  I don't see why we'd have to
reject that.

Thanks,
Florian
  
Adhemerval Zanella Dec. 21, 2021, 6:19 p.m. UTC | #13
On 21/12/2021 15:11, Florian Weimer wrote:
> * Adhemerval Zanella:
> 
>>> . *If* a user shows up whose aarch64 audit modules were broken by the
>>>   fix for bug 26643, support two ABIs for the PLT enter/exit hooks.
>>
>> And that is what I really dislike and I want to avoid, I see absolute
>> no gain about supporting an interface for aarch64 (or any other port,
>> the issue is only aarch64 is currently showing) that has subtle and
>> broken API.
> 
> I agree!  I just want to have an escape hatch in case we have to support
> them.  That's why I recommend changing LAV_CURRENT.> 
>> For aarch64 I still think it better to just avoid loading old audit
>> modules.
> 
> In the end, it is your port, but I strongly advise against this.

The problem with aarch64 currently is it is subtle broken even when
PLT tracking is not enabled, since _dl_runtime_profile does not
save/restore x8.  The reproducer from BZ#26643 at least trigger a
SEGFAULT, but since is essentially undefined behavior this is hard to
reason to keep supporting the older version.

Also, it would require more effort by providing two different
_dl_runtime_profile. 

> 
>>> . Consider issuing more la_symbind callbacks for LAV_CURRENT == 2
>>>   only (BIND_NOW functions and basically all symbols).
>>>
>> What about newer audit module version that request PLT trace? Currently
>> my plan is to stop execution with an error, instead of just ignoring it.
> 
> You mean la_version returns 2, a jump slot is available, and the
> la_symbind function requests callbacks?  I don't see why we'd have to
> reject that.

Alright, I think I can live it with.
  

Patch

diff --git a/elf/rtld.c b/elf/rtld.c
index 29a37f51d3..1a13ae6314 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -990,10 +990,10 @@  file=%s [%lu]; audit interface function la_version returned zero; ignored.\n",
       return;
     }
 
-  if (lav > LAV_CURRENT)
+  if (lav != LAV_CURRENT)
     {
       _dl_debug_printf ("\
-ERROR: audit interface '%s' requires version %d (maximum supported version %d); ignored.\n",
+ERROR: audit interface '%s' requires version %d (current supported version %d); ignored.\n",
 			name, lav, LAV_CURRENT);
       unload_audit_module (dlmargs.map, original_tls_idx);
       return;
diff --git a/sysdeps/aarch64/Makefile b/sysdeps/aarch64/Makefile
index 7c66fb97aa..7183895d04 100644
--- a/sysdeps/aarch64/Makefile
+++ b/sysdeps/aarch64/Makefile
@@ -10,6 +10,26 @@  endif
 
 ifeq ($(subdir),elf)
 sysdep-dl-routines += dl-bti
+
+tests += tst-audit26 \
+	 tst-audit27
+
+modules-names += \
+    tst-audit26mod \
+    tst-auditmod26 \
+    tst-audit27mod \
+    tst-auditmod27
+
+$(objpfx)tst-audit26: $(objpfx)tst-audit26mod.so \
+		      $(objpfx)tst-auditmod26.so
+LDFLAGS-tst-audit26 += -Wl,-z,lazy
+tst-audit26-ENV = LD_AUDIT=$(objpfx)tst-auditmod26.so
+
+$(objpfx)tst-audit27: $(objpfx)tst-audit27mod.so \
+		      $(objpfx)tst-auditmod27.so
+$(objpfx)tst-audit27mod.so: $(libsupport)
+LDFLAGS-tst-audit27 += -Wl,-z,lazy
+tst-audit27-ENV = LD_AUDIT=$(objpfx)tst-auditmod27.so
 endif
 
 ifeq ($(subdir),elf)
diff --git a/sysdeps/aarch64/bits/link.h b/sysdeps/aarch64/bits/link.h
index 774bbe5f45..2af90ca6be 100644
--- a/sysdeps/aarch64/bits/link.h
+++ b/sysdeps/aarch64/bits/link.h
@@ -20,23 +20,29 @@ 
 # error "Never include <bits/link.h> directly; use <link.h> instead."
 #endif
 
+typedef union
+{
+  float s;
+  double d;
+  long double q;
+} La_aarch64_vector;
+
 /* Registers for entry into PLT on AArch64.  */
 typedef struct La_aarch64_regs
 {
-  uint64_t lr_xreg[8];
-  uint64_t lr_dreg[8];
-  uint64_t lr_sp;
-  uint64_t lr_lr;
+  uint64_t          lr_xreg[9];
+  La_aarch64_vector lr_vreg[8];
+  uint64_t          lr_sp;
+  uint64_t          lr_lr;
 } La_aarch64_regs;
 
 /* Return values for calls from PLT on AArch64.  */
 typedef struct La_aarch64_retval
 {
-  /* Up to two integer registers can be used for a return value.  */
-  uint64_t lrv_xreg[2];
-  /* Up to four D registers can be used for a return value.  */
-  uint64_t lrv_dreg[4];
-
+  /* Up to eight integer registers can be used for a return value.  */
+  uint64_t          lrv_xreg[8];
+  /* Up to eight V registers can be used for a return value.  */
+  La_aarch64_vector lrv_vreg[8];
 } La_aarch64_retval;
 __BEGIN_DECLS
 
diff --git a/sysdeps/aarch64/bits/link_lavcurrent.h b/sysdeps/aarch64/bits/link_lavcurrent.h
new file mode 100644
index 0000000000..c48835d12b
--- /dev/null
+++ b/sysdeps/aarch64/bits/link_lavcurrent.h
@@ -0,0 +1,25 @@ 
+/* Data structure for communication from the run-time dynamic linker for
+   loaded ELF shared objects.  LAV_CURRENT definition.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _LINK_H
+# error "Never include <bits/link_lavcurrent.h> directly; use <link.h> instead."
+#endif
+
+/* Version numbers for la_version handshake interface.  */
+#define LAV_CURRENT	2
diff --git a/sysdeps/aarch64/dl-link.sym b/sysdeps/aarch64/dl-link.sym
index d67d28b40c..70d153a1d5 100644
--- a/sysdeps/aarch64/dl-link.sym
+++ b/sysdeps/aarch64/dl-link.sym
@@ -7,9 +7,9 @@  DL_SIZEOF_RG		sizeof(struct La_aarch64_regs)
 DL_SIZEOF_RV		sizeof(struct La_aarch64_retval)
 
 DL_OFFSET_RG_X0		offsetof(struct La_aarch64_regs, lr_xreg)
-DL_OFFSET_RG_D0		offsetof(struct La_aarch64_regs, lr_dreg)
+DL_OFFSET_RG_V0		offsetof(struct La_aarch64_regs, lr_vreg)
 DL_OFFSET_RG_SP		offsetof(struct La_aarch64_regs, lr_sp)
 DL_OFFSET_RG_LR		offsetof(struct La_aarch64_regs, lr_lr)
 
 DL_OFFSET_RV_X0		offsetof(struct La_aarch64_retval, lrv_xreg)
-DL_OFFSET_RV_D0		offsetof(struct La_aarch64_retval, lrv_dreg)
+DL_OFFSET_RV_V0		offsetof(struct La_aarch64_retval, lrv_vreg)
diff --git a/sysdeps/aarch64/dl-trampoline.S b/sysdeps/aarch64/dl-trampoline.S
index 9b352b1d0f..0d540651d4 100644
--- a/sysdeps/aarch64/dl-trampoline.S
+++ b/sysdeps/aarch64/dl-trampoline.S
@@ -45,7 +45,8 @@  _dl_runtime_resolve:
 
 	cfi_rel_offset (lr, 8)
 
-	/* Save arguments.  */
+	/* Note: Saving x9 is not required by the ABI but the assember requires
+	   the immediate values of operand 3 to be a multiple of 16 */
 	stp	x8, x9, [sp, #-(80+8*16)]!
 	cfi_adjust_cfa_offset (80+8*16)
 	cfi_rel_offset (x8, 0)
@@ -142,13 +143,17 @@  _dl_runtime_profile:
 	   Stack frame layout:
 	   [sp,   #...] lr
 	   [sp,   #...] &PLTGOT[n]
-	   [sp,    #96] La_aarch64_regs
-	   [sp,    #48] La_aarch64_retval
-	   [sp,    #40] frame size return from pltenter
-	   [sp,    #32] dl_profile_call saved x1
-	   [sp,    #24] dl_profile_call saved x0
-	   [sp,    #16] t1
-	   [sp,     #0] x29, lr   <- x29
+	   -----------------------
+	   [sp,   #384] La_aarch64_regs::lr_xreg (x0-x8)
+	   [sp,   #256] La_aarch64_regs::lr_vreg (q0-q7)
+	   [sp,   #240] La_aarch64_regs::sp and La_aarch64_regs::lr
+	   [sp,   #176] La_aarch64_retval::lrv_xreg (x0-x7)
+	   [sp,   # 48] La_aarch64_retval::lrv_vreg (q0-q7)
+	   [sp,   # 40] frame size return from pltenter
+	   [sp,   # 32] dl_profile_call saved x1
+	   [sp,   # 24] dl_profile_call saved x0
+	   [sp,   # 16] t1
+	   [sp,   #  0] x29, lr   <- x29
 	 */
 
 # define OFFSET_T1		16
@@ -183,19 +188,22 @@  _dl_runtime_profile:
 	stp	x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3]
 	cfi_rel_offset (x6, OFFSET_RG + DL_OFFSET_RG_X0 + 16*3 + 0)
 	cfi_rel_offset (x7, OFFSET_RG + DL_OFFSET_RG_X0 + 16*3 + 8)
-
-	stp	d0, d1, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0]
-	cfi_rel_offset (d0, OFFSET_RG + DL_OFFSET_RG_D0 + 16*0)
-	cfi_rel_offset (d1, OFFSET_RG + DL_OFFSET_RG_D0 + 16*0 + 8)
-	stp	d2, d3, [X29, #OFFSET_RG+ DL_OFFSET_RG_D0 + 16*1]
-	cfi_rel_offset (d2, OFFSET_RG + DL_OFFSET_RG_D0 + 16*1 + 0)
-	cfi_rel_offset (d3, OFFSET_RG + DL_OFFSET_RG_D0 + 16*1 + 8)
-	stp	d4, d5, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2]
-	cfi_rel_offset (d4, OFFSET_RG + DL_OFFSET_RG_D0 + 16*2 + 0)
-	cfi_rel_offset (d5, OFFSET_RG + DL_OFFSET_RG_D0 + 16*2 + 8)
-	stp	d6, d7, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3]
-	cfi_rel_offset (d6, OFFSET_RG + DL_OFFSET_RG_D0 + 16*3 + 0)
-	cfi_rel_offset (d7, OFFSET_RG + DL_OFFSET_RG_D0 + 16*3 + 8)
+	str	x8, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*4 + 0]
+	cfi_rel_offset (x8, OFFSET_RG + DL_OFFSET_RG_X0 + 16*4 + 0)
+	/* Note 8 bytes of padding is in the stack frame for alignment */
+
+	stp	q0, q1, [X29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*0]
+	cfi_rel_offset (q0, OFFSET_RG + DL_OFFSET_RG_V0 + 32*0)
+	cfi_rel_offset (q1, OFFSET_RG + DL_OFFSET_RG_V0 + 32*0 + 16)
+	stp	q2, q3, [X29, #OFFSET_RG+ DL_OFFSET_RG_V0 + 32*1]
+	cfi_rel_offset (q2, OFFSET_RG + DL_OFFSET_RG_V0 + 32*1 + 0)
+	cfi_rel_offset (q3, OFFSET_RG + DL_OFFSET_RG_V0 + 32*1 + 16)
+	stp	q4, q5, [X29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*2]
+	cfi_rel_offset (q4, OFFSET_RG + DL_OFFSET_RG_V0 + 32*2 + 0)
+	cfi_rel_offset (q5, OFFSET_RG + DL_OFFSET_RG_V0 + 32*2 + 16)
+	stp	q6, q7, [X29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*3]
+	cfi_rel_offset (q6, OFFSET_RG + DL_OFFSET_RG_V0 + 32*3 + 0)
+	cfi_rel_offset (q7, OFFSET_RG + DL_OFFSET_RG_V0 + 32*3 + 16)
 
 	add     x0, x29, #SF_SIZE + 16
 	ldr	x1, [x29, #OFFSET_LR]
@@ -234,10 +242,11 @@  _dl_runtime_profile:
 	ldp	x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1]
 	ldp	x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2]
 	ldp	x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3]
-	ldp	d0, d1, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0]
-	ldp	d2, d3, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*1]
-	ldp	d4, d5, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2]
-	ldp	d6, d7, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3]
+	ldr	x8,     [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*4]
+	ldp	q0, q1, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*0]
+	ldp	q2, q3, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*1]
+	ldp	q4, q5, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*2]
+	ldp	q6, q7, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*3]
 
 	cfi_def_cfa_register (sp)
 	ldp	x29, x30, [x29, #0]
@@ -280,14 +289,21 @@  _dl_runtime_profile:
 	ldp	x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1]
 	ldp	x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2]
 	ldp	x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3]
-	ldp	d0, d1, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0]
-	ldp	d2, d3, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*1]
-	ldp	d4, d5, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2]
-	ldp	d6, d7, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3]
+	ldr	x8,     [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*4]
+	ldp	q0, q1, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*0]
+	ldp	q2, q3, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*1]
+	ldp	q4, q5, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*2]
+	ldp	q6, q7, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*3]
 	blr	ip0
-	stp	x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0]
-	stp	d0, d1, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*0]
-	stp	d2, d3, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*1]
+	stp	x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*0]
+	stp	x2, x3, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*1]
+	stp	x4, x5, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*2]
+	stp	x6, x7, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*3]
+	str	x8,     [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*4]
+	stp	q0, q1, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*0]
+	stp	q2, q3, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*1]
+	stp	q4, q5, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*2]
+	stp	q6, q7, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*3]
 
 	/* Setup call to pltexit  */
 	ldp	x0, x1, [x29, #OFFSET_SAVED_CALL_X0]
@@ -295,9 +311,16 @@  _dl_runtime_profile:
 	add	x3, x29, #OFFSET_RV
 	bl	_dl_audit_pltexit
 
-	ldp	x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0]
-	ldp	d0, d1, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*0]
-	ldp	d2, d3, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*1]
+	ldp	x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*0]
+	ldp	x2, x3, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*1]
+	ldp	x4, x5, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*2]
+	ldp	x6, x7, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*3]
+	ldr	x8,     [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*4]
+	ldp	q0, q1, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*0]
+	ldp	q2, q3, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*1]
+	ldp	q4, q5, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*2]
+	ldp	q6, q7, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*3]
+
 	/* LR from within La_aarch64_reg */
 	ldr	lr, [x29, #OFFSET_RG + DL_OFFSET_RG_LR]
 	cfi_restore(lr)
diff --git a/sysdeps/aarch64/tst-audit26.c b/sysdeps/aarch64/tst-audit26.c
new file mode 100644
index 0000000000..44d2479e08
--- /dev/null
+++ b/sysdeps/aarch64/tst-audit26.c
@@ -0,0 +1,37 @@ 
+/* Check DT_AUDIT for aarch64 ABI specifics.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <array_length.h>
+#include <string.h>
+#include <support/check.h>
+#include "tst-audit26mod.h"
+
+int
+do_test (void)
+{
+  /* Returning a large struct uses 'x8' as indirect result location.  */
+  struct large_struct r = tst_audit26_func (ARG1, ARG2, ARG3);
+
+  struct large_struct e = set_large_struct (ARG1, ARG2, ARG3);
+
+  TEST_COMPARE_BLOB (r.a, sizeof (r.a), e.a, sizeof (e.a));
+
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/aarch64/tst-audit26mod.c b/sysdeps/aarch64/tst-audit26mod.c
new file mode 100644
index 0000000000..f8d9270898
--- /dev/null
+++ b/sysdeps/aarch64/tst-audit26mod.c
@@ -0,0 +1,33 @@ 
+/* Check DT_AUDIT for aarch64 ABI specifics.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdlib.h>
+#include "tst-audit26mod.h"
+
+struct large_struct
+tst_audit26_func (char a, short b, long int c)
+{
+  if (a != ARG1)
+    abort ();
+  if (b != ARG2)
+    abort ();
+  if (c != ARG3)
+    abort ();
+
+  return set_large_struct (a, b, c);
+}
diff --git a/sysdeps/aarch64/tst-audit26mod.h b/sysdeps/aarch64/tst-audit26mod.h
new file mode 100644
index 0000000000..dd9ddcdada
--- /dev/null
+++ b/sysdeps/aarch64/tst-audit26mod.h
@@ -0,0 +1,50 @@ 
+/* Check DT_AUDIT for aarch64 specific ABI.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _TST_AUDIT27MOD_H
+#define _TST_AUDIT27MOD_H 1
+
+#include <array_length.h>
+
+struct large_struct
+{
+  char a[16];
+  short b[8];
+  long int c[4];
+};
+
+static inline struct large_struct
+set_large_struct (char a, short b, long int c)
+{
+  struct large_struct r;
+  for (int i = 0; i < array_length (r.a); i++)
+    r.a[i] = a;
+  for (int i = 0; i < array_length (r.b); i++)
+    r.b[i] = b;
+  for (int i = 0; i < array_length (r.c); i++)
+    r.c[i] = c;
+  return r;
+}
+
+#define ARG1 0x12
+#define ARG2 0x1234
+#define ARG3 0x12345678
+
+struct large_struct tst_audit26_func (char a, short b, long int c);
+
+#endif
diff --git a/sysdeps/aarch64/tst-audit27.c b/sysdeps/aarch64/tst-audit27.c
new file mode 100644
index 0000000000..e19b58bc3b
--- /dev/null
+++ b/sysdeps/aarch64/tst-audit27.c
@@ -0,0 +1,64 @@ 
+/* Check DT_AUDIT for aarch64 ABI specifics.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <array_length.h>
+#include <string.h>
+#include <support/check.h>
+#include "tst-audit27mod.h"
+
+int
+do_test (void)
+{
+  {
+    float r = tst_audit27_func_float (FUNC_FLOAT_ARG0, FUNC_FLOAT_ARG1,
+				      FUNC_FLOAT_ARG2, FUNC_FLOAT_ARG3,
+				      FUNC_FLOAT_ARG4, FUNC_FLOAT_ARG5,
+				      FUNC_FLOAT_ARG6, FUNC_FLOAT_ARG7);
+    if (r != FUNC_FLOAT_RET)
+      FAIL_EXIT1 ("tst_audit27_func_float() returned %a, expected %a",
+		  r, FUNC_FLOAT_RET);
+  }
+
+  {
+    double r = tst_audit27_func_double (FUNC_DOUBLE_ARG0, FUNC_DOUBLE_ARG1,
+					FUNC_DOUBLE_ARG2, FUNC_DOUBLE_ARG3,
+					FUNC_DOUBLE_ARG4, FUNC_DOUBLE_ARG5,
+					FUNC_DOUBLE_ARG6, FUNC_DOUBLE_ARG7);
+    if (r != FUNC_DOUBLE_RET)
+      FAIL_EXIT1 ("tst_audit27_func_double() returned %la, expected %la",
+		  r, FUNC_DOUBLE_RET);
+  }
+
+  {
+    long double r = tst_audit27_func_ldouble (FUNC_LDOUBLE_ARG0,
+					      FUNC_LDOUBLE_ARG1,
+					      FUNC_LDOUBLE_ARG2,
+					      FUNC_LDOUBLE_ARG3,
+					      FUNC_LDOUBLE_ARG4,
+					      FUNC_LDOUBLE_ARG5,
+					      FUNC_LDOUBLE_ARG6,
+					      FUNC_LDOUBLE_ARG7);
+    if (r != FUNC_LDOUBLE_RET)
+      FAIL_EXIT1 ("tst_audit27_func_ldouble() returned %La, expected %La",
+		  r, FUNC_LDOUBLE_RET);
+  }
+
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/aarch64/tst-audit27mod.c b/sysdeps/aarch64/tst-audit27mod.c
new file mode 100644
index 0000000000..a8e8b28523
--- /dev/null
+++ b/sysdeps/aarch64/tst-audit27mod.c
@@ -0,0 +1,95 @@ 
+/* Check DT_AUDIT for aarch64 ABI specifics.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <array_length.h>
+#include <stdlib.h>
+#include <support/check.h>
+#include "tst-audit27mod.h"
+
+float
+tst_audit27_func_float (float a0, float a1, float a2, float a3, float a4,
+			float a5, float a6, float a7)
+{
+  if (a0 != FUNC_FLOAT_ARG0)
+    FAIL_EXIT1 ("a0: %a != %a", a0, FUNC_FLOAT_ARG0);
+  if (a1 != FUNC_FLOAT_ARG1)
+    FAIL_EXIT1 ("a1: %a != %a", a1, FUNC_FLOAT_ARG1);
+  if (a2 != FUNC_FLOAT_ARG2)
+    FAIL_EXIT1 ("a2: %a != %a", a2, FUNC_FLOAT_ARG2);
+  if (a3 != FUNC_FLOAT_ARG3)
+    FAIL_EXIT1 ("a3: %a != %a", a3, FUNC_FLOAT_ARG3);
+  if (a4 != FUNC_FLOAT_ARG4)
+    FAIL_EXIT1 ("a4: %a != %a", a4, FUNC_FLOAT_ARG4);
+  if (a5 != FUNC_FLOAT_ARG5)
+    FAIL_EXIT1 ("a5: %a != %a", a5, FUNC_FLOAT_ARG5);
+  if (a6 != FUNC_FLOAT_ARG6)
+    FAIL_EXIT1 ("a6: %a != %a", a6, FUNC_FLOAT_ARG6);
+  if (a7 != FUNC_FLOAT_ARG7)
+    FAIL_EXIT1 ("a7: %a != %a", a7, FUNC_FLOAT_ARG7);
+
+  return FUNC_FLOAT_RET;
+}
+
+double
+tst_audit27_func_double (double a0, double a1, double a2, double a3, double a4,
+			 double a5, double a6, double a7)
+{
+  if (a0 != FUNC_DOUBLE_ARG0)
+    FAIL_EXIT1 ("a0: %la != %la", a0, FUNC_DOUBLE_ARG0);
+  if (a1 != FUNC_DOUBLE_ARG1)
+    FAIL_EXIT1 ("a1: %la != %la", a1, FUNC_DOUBLE_ARG1);
+  if (a2 != FUNC_DOUBLE_ARG2)
+    FAIL_EXIT1 ("a2: %la != %la", a2, FUNC_DOUBLE_ARG2);
+  if (a3 != FUNC_DOUBLE_ARG3)
+    FAIL_EXIT1 ("a3: %la != %la", a3, FUNC_DOUBLE_ARG3);
+  if (a4 != FUNC_DOUBLE_ARG4)
+    FAIL_EXIT1 ("a4: %la != %la", a4, FUNC_DOUBLE_ARG4);
+  if (a5 != FUNC_DOUBLE_ARG5)
+    FAIL_EXIT1 ("a5: %la != %la", a5, FUNC_DOUBLE_ARG5);
+  if (a6 != FUNC_DOUBLE_ARG6)
+    FAIL_EXIT1 ("a6: %la != %la", a6, FUNC_DOUBLE_ARG6);
+  if (a7 != FUNC_DOUBLE_ARG7)
+    FAIL_EXIT1 ("a7: %la != %la", a7, FUNC_DOUBLE_ARG7);
+
+  return FUNC_DOUBLE_RET;
+}
+
+long double
+tst_audit27_func_ldouble (long double a0, long double a1, long double a2,
+			  long double a3, long double a4, long double a5,
+			  long double a6, long double a7)
+{
+  if (a0 != FUNC_LDOUBLE_ARG0)
+    FAIL_EXIT1 ("a0: %La != %La", a0, FUNC_LDOUBLE_ARG0);
+  if (a1 != FUNC_LDOUBLE_ARG1)
+    FAIL_EXIT1 ("a1: %La != %La", a1, FUNC_LDOUBLE_ARG1);
+  if (a2 != FUNC_LDOUBLE_ARG2)
+    FAIL_EXIT1 ("a2: %La != %La", a2, FUNC_LDOUBLE_ARG2);
+  if (a3 != FUNC_LDOUBLE_ARG3)
+    FAIL_EXIT1 ("a3: %La != %La", a3, FUNC_LDOUBLE_ARG3);
+  if (a4 != FUNC_LDOUBLE_ARG4)
+    FAIL_EXIT1 ("a4: %La != %La", a4, FUNC_LDOUBLE_ARG4);
+  if (a5 != FUNC_LDOUBLE_ARG5)
+    FAIL_EXIT1 ("a5: %La != %La", a5, FUNC_LDOUBLE_ARG5);
+  if (a6 != FUNC_LDOUBLE_ARG6)
+    FAIL_EXIT1 ("a6: %La != %La", a6, FUNC_LDOUBLE_ARG6);
+  if (a7 != FUNC_LDOUBLE_ARG7)
+    FAIL_EXIT1 ("a7: %La != %La", a7, FUNC_LDOUBLE_ARG7);
+
+  return FUNC_LDOUBLE_RET;
+}
diff --git a/sysdeps/aarch64/tst-audit27mod.h b/sysdeps/aarch64/tst-audit27mod.h
new file mode 100644
index 0000000000..cbd44c4bdf
--- /dev/null
+++ b/sysdeps/aarch64/tst-audit27mod.h
@@ -0,0 +1,67 @@ 
+/* Check DT_AUDIT for aarch64 specific ABI.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _TST_AUDIT27MOD_H
+#define _TST_AUDIT27MOD_H 1
+
+#include <float.h>
+
+#define FUNC_FLOAT_ARG0 FLT_MIN
+#define FUNC_FLOAT_ARG1 FLT_MAX
+#define FUNC_FLOAT_ARG2 FLT_EPSILON
+#define FUNC_FLOAT_ARG3 FLT_TRUE_MIN
+#define FUNC_FLOAT_ARG4 0.0f
+#define FUNC_FLOAT_ARG5 1.0f
+#define FUNC_FLOAT_ARG6 2.0f
+#define FUNC_FLOAT_ARG7 3.0f
+#define FUNC_FLOAT_RET  4.0f
+
+float
+tst_audit27_func_float (float a0, float a1, float a2, float a3, float a4,
+			float a5, float a6, float a7);
+
+#define FUNC_DOUBLE_ARG0 DBL_MIN
+#define FUNC_DOUBLE_ARG1 DBL_MAX
+#define FUNC_DOUBLE_ARG2 DBL_EPSILON
+#define FUNC_DOUBLE_ARG3 DBL_TRUE_MIN
+#define FUNC_DOUBLE_ARG4 0.0
+#define FUNC_DOUBLE_ARG5 1.0
+#define FUNC_DOUBLE_ARG6 2.0
+#define FUNC_DOUBLE_ARG7 3.0
+#define FUNC_DOUBLE_RET  0x1.fffffe0000001p+127
+
+double
+tst_audit27_func_double (double a0, double a1, double a2, double a3, double a4,
+			 double a5, double a6, double a7);
+
+#define FUNC_LDOUBLE_ARG0 DBL_MAX + 1.0L
+#define FUNC_LDOUBLE_ARG1 DBL_MAX + 2.0L
+#define FUNC_LDOUBLE_ARG2 DBL_MAX + 3.0L
+#define FUNC_LDOUBLE_ARG3 DBL_MAX + 4.0L
+#define FUNC_LDOUBLE_ARG4 DBL_MAX + 5.0L
+#define FUNC_LDOUBLE_ARG5 DBL_MAX + 6.0L
+#define FUNC_LDOUBLE_ARG6 DBL_MAX + 7.0L
+#define FUNC_LDOUBLE_ARG7 DBL_MAX + 8.0L
+#define FUNC_LDOUBLE_RET  0x1.fffffffffffff000000000000001p+1023L
+
+long double
+tst_audit27_func_ldouble (long double a0, long double a1, long double a2,
+			  long double a3, long double a4, long double a5,
+			  long double a6, long double a7);
+
+#endif
diff --git a/sysdeps/aarch64/tst-auditmod26.c b/sysdeps/aarch64/tst-auditmod26.c
new file mode 100644
index 0000000000..028f638446
--- /dev/null
+++ b/sysdeps/aarch64/tst-auditmod26.c
@@ -0,0 +1,98 @@ 
+/* Check DT_AUDIT for aarch64 specific ABI.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <assert.h>
+#include <link.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "tst-audit26mod.h"
+
+#define TEST_NAME  "tst-audit26"
+
+#define AUDIT26_COOKIE 0
+
+unsigned int
+la_version (unsigned int v)
+{
+  return v;
+}
+
+unsigned int
+la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie)
+{
+  const char *p = strrchr (map->l_name, '/');
+  const char *l_name = p == NULL ? map->l_name : p + 1;
+  uintptr_t ck = -1;
+  if (strncmp (l_name, TEST_NAME, strlen (TEST_NAME)) == 0)
+    ck = AUDIT26_COOKIE;
+  *cookie = ck;
+  printf ("objopen: %ld, %s [cookie=%ld]\n", lmid, l_name, ck);
+  return ck == -1 ? 0 : LA_FLG_BINDFROM | LA_FLG_BINDTO;
+}
+
+ElfW(Addr)
+la_aarch64_gnu_pltenter (ElfW(Sym) *sym __attribute__ ((unused)),
+                         unsigned int ndx __attribute__ ((unused)),
+                         uintptr_t *refcook, uintptr_t *defcook,
+                         La_aarch64_regs *regs, unsigned int *flags,
+                         const char *symname, long int *framesizep)
+{
+  printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+	  symname, (long int) sym->st_value, ndx, *flags);
+
+  if (strcmp (symname, "tst_audit26_func") == 0)
+    {
+      assert (regs->lr_xreg[0] == ARG1);
+      assert (regs->lr_xreg[1] == ARG2);
+      assert (regs->lr_xreg[2] == ARG3);
+    }
+  else
+    abort ();
+
+  /* Clobber 'x8'.  */
+  asm volatile ("mov x8, -1" : : : "x8");
+
+  *framesizep = 1024;
+
+  return sym->st_value;
+}
+
+unsigned int
+la_aarch64_gnu_pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+                        uintptr_t *defcook,
+                        const struct La_aarch64_regs *inregs,
+                        struct La_aarch64_retval *outregs, const char *symname)
+{
+  printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u\n",
+	  symname, (long int) sym->st_value, ndx);
+
+  if (strcmp (symname, "tst_audit26_func") == 0)
+    {
+      assert (inregs->lr_xreg[0] == ARG1);
+      assert (inregs->lr_xreg[1] == ARG2);
+      assert (inregs->lr_xreg[2] == ARG3);
+    }
+  else
+    abort ();
+
+  /* Clobber 'x8'.  */
+  asm volatile ("mov x8, -1" : : : "x8");
+
+  return 0;
+}
diff --git a/sysdeps/aarch64/tst-auditmod27.c b/sysdeps/aarch64/tst-auditmod27.c
new file mode 100644
index 0000000000..b1dbff8330
--- /dev/null
+++ b/sysdeps/aarch64/tst-auditmod27.c
@@ -0,0 +1,175 @@ 
+/* Check DT_AUDIT for aarch64 specific ABI.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <assert.h>
+#include <link.h>
+#include <string.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "tst-audit27mod.h"
+
+#define TEST_NAME  "tst-audit27"
+
+#define AUDIT27_COOKIE 0
+
+unsigned int
+la_version (unsigned int v)
+{
+  return v;
+}
+
+unsigned int
+la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie)
+{
+  const char *p = strrchr (map->l_name, '/');
+  const char *l_name = p == NULL ? map->l_name : p + 1;
+  uintptr_t ck = -1;
+  if (strncmp (l_name, TEST_NAME, strlen (TEST_NAME)) == 0)
+    ck = AUDIT27_COOKIE;
+  *cookie = ck;
+  printf ("objopen: %ld, %s [%ld]\n", lmid, l_name, ck);
+  return ck == -1 ? 0 : LA_FLG_BINDFROM | LA_FLG_BINDTO;
+}
+
+ElfW(Addr)
+la_aarch64_gnu_pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+			 uintptr_t *defcook, La_aarch64_regs *regs,
+			 unsigned int *flags, const char *symname,
+			 long int *framesizep)
+{
+  printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+	  symname, (long int) sym->st_value, ndx, *flags);
+
+  if (strcmp (symname, "tst_audit27_func_float") == 0)
+    {
+      assert (regs->lr_vreg[0].s == FUNC_FLOAT_ARG0);
+      assert (regs->lr_vreg[1].s == FUNC_FLOAT_ARG1);
+      assert (regs->lr_vreg[2].s == FUNC_FLOAT_ARG2);
+      assert (regs->lr_vreg[3].s == FUNC_FLOAT_ARG3);
+      assert (regs->lr_vreg[4].s == FUNC_FLOAT_ARG4);
+      assert (regs->lr_vreg[5].s == FUNC_FLOAT_ARG5);
+      assert (regs->lr_vreg[6].s == FUNC_FLOAT_ARG6);
+      assert (regs->lr_vreg[7].s == FUNC_FLOAT_ARG7);
+    }
+  else if (strcmp (symname, "tst_audit27_func_double") == 0)
+    {
+      assert (regs->lr_vreg[0].d == FUNC_DOUBLE_ARG0);
+      assert (regs->lr_vreg[1].d == FUNC_DOUBLE_ARG1);
+      assert (regs->lr_vreg[2].d == FUNC_DOUBLE_ARG2);
+      assert (regs->lr_vreg[3].d == FUNC_DOUBLE_ARG3);
+      assert (regs->lr_vreg[4].d == FUNC_DOUBLE_ARG4);
+      assert (regs->lr_vreg[5].d == FUNC_DOUBLE_ARG5);
+      assert (regs->lr_vreg[6].d == FUNC_DOUBLE_ARG6);
+      assert (regs->lr_vreg[7].d == FUNC_DOUBLE_ARG7);
+    }
+  else if (strcmp (symname, "tst_audit27_func_ldouble") == 0)
+    {
+      assert (regs->lr_vreg[0].q == FUNC_LDOUBLE_ARG0);
+      assert (regs->lr_vreg[1].q == FUNC_LDOUBLE_ARG1);
+      assert (regs->lr_vreg[2].q == FUNC_LDOUBLE_ARG2);
+      assert (regs->lr_vreg[3].q == FUNC_LDOUBLE_ARG3);
+      assert (regs->lr_vreg[4].q == FUNC_LDOUBLE_ARG4);
+      assert (regs->lr_vreg[5].q == FUNC_LDOUBLE_ARG5);
+      assert (regs->lr_vreg[6].q == FUNC_LDOUBLE_ARG6);
+      assert (regs->lr_vreg[7].q == FUNC_LDOUBLE_ARG7);
+    }
+  else
+    abort ();
+
+  /* Clobber the q registers on exit.  */
+  uint8_t v = 0xff;
+  asm volatile ("dup v0.8b, %w0" : : "r" (v) : "v0");
+  asm volatile ("dup v1.8b, %w0" : : "r" (v) : "v1");
+  asm volatile ("dup v2.8b, %w0" : : "r" (v) : "v2");
+  asm volatile ("dup v3.8b, %w0" : : "r" (v) : "v3");
+  asm volatile ("dup v4.8b, %w0" : : "r" (v) : "v4");
+  asm volatile ("dup v5.8b, %w0" : : "r" (v) : "v5");
+  asm volatile ("dup v6.8b, %w0" : : "r" (v) : "v6");
+  asm volatile ("dup v7.8b, %w0" : : "r" (v) : "v7");
+
+  *framesizep = 1024;
+
+  return sym->st_value;
+}
+
+unsigned int
+la_aarch64_gnu_pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+                        uintptr_t *defcook,
+			const struct La_aarch64_regs *inregs,
+                        struct La_aarch64_retval *outregs,
+			const char *symname)
+{
+  printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u\n",
+	  symname, (long int) sym->st_value, ndx);
+
+  if (strcmp (symname, "tst_audit27_func_float") == 0)
+    {
+      assert (inregs->lr_vreg[0].s == FUNC_FLOAT_ARG0);
+      assert (inregs->lr_vreg[1].s == FUNC_FLOAT_ARG1);
+      assert (inregs->lr_vreg[2].s == FUNC_FLOAT_ARG2);
+      assert (inregs->lr_vreg[3].s == FUNC_FLOAT_ARG3);
+      assert (inregs->lr_vreg[4].s == FUNC_FLOAT_ARG4);
+      assert (inregs->lr_vreg[5].s == FUNC_FLOAT_ARG5);
+      assert (inregs->lr_vreg[6].s == FUNC_FLOAT_ARG6);
+      assert (inregs->lr_vreg[7].s == FUNC_FLOAT_ARG7);
+
+      assert (outregs->lrv_vreg[0].s == FUNC_FLOAT_RET);
+    }
+  else if (strcmp (symname, "tst_audit27_func_double") == 0)
+    {
+      assert (inregs->lr_vreg[0].d == FUNC_DOUBLE_ARG0);
+      assert (inregs->lr_vreg[1].d == FUNC_DOUBLE_ARG1);
+      assert (inregs->lr_vreg[2].d == FUNC_DOUBLE_ARG2);
+      assert (inregs->lr_vreg[3].d == FUNC_DOUBLE_ARG3);
+      assert (inregs->lr_vreg[4].d == FUNC_DOUBLE_ARG4);
+      assert (inregs->lr_vreg[5].d == FUNC_DOUBLE_ARG5);
+      assert (inregs->lr_vreg[6].d == FUNC_DOUBLE_ARG6);
+      assert (inregs->lr_vreg[7].d == FUNC_DOUBLE_ARG7);
+
+      assert (outregs->lrv_vreg[0].d == FUNC_DOUBLE_RET);
+    }
+  else if (strcmp (symname, "tst_audit27_func_ldouble") == 0)
+    {
+      assert (inregs->lr_vreg[0].q == FUNC_LDOUBLE_ARG0);
+      assert (inregs->lr_vreg[1].q == FUNC_LDOUBLE_ARG1);
+      assert (inregs->lr_vreg[2].q == FUNC_LDOUBLE_ARG2);
+      assert (inregs->lr_vreg[3].q == FUNC_LDOUBLE_ARG3);
+      assert (inregs->lr_vreg[4].q == FUNC_LDOUBLE_ARG4);
+      assert (inregs->lr_vreg[5].q == FUNC_LDOUBLE_ARG5);
+      assert (inregs->lr_vreg[6].q == FUNC_LDOUBLE_ARG6);
+      assert (inregs->lr_vreg[7].q == FUNC_LDOUBLE_ARG7);
+
+      assert (outregs->lrv_vreg[0].q == FUNC_LDOUBLE_RET);
+    }
+  else
+    abort ();
+
+  /* Clobber the q registers on exit.  */
+  uint8_t v = 0xff;
+  asm volatile ("dup v0.8b, %w0" : : "r" (v) : "v0");
+  asm volatile ("dup v1.8b, %w0" : : "r" (v) : "v1");
+  asm volatile ("dup v2.8b, %w0" : : "r" (v) : "v2");
+  asm volatile ("dup v3.8b, %w0" : : "r" (v) : "v3");
+  asm volatile ("dup v4.8b, %w0" : : "r" (v) : "v4");
+  asm volatile ("dup v5.8b, %w0" : : "r" (v) : "v5");
+  asm volatile ("dup v6.8b, %w0" : : "r" (v) : "v6");
+  asm volatile ("dup v7.8b, %w0" : : "r" (v) : "v7");
+
+  return 0;
+}