aarch64: Use target attributes to enable SVE in math functions

Message ID 20260327170155.683964-1-adhemerval.zanella@linaro.org (mailing list archive)
State Changes Requested
Delegated to: Wilco Dijkstra
Headers
Series aarch64: Use target attributes to enable SVE in math functions |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
redhat-pt-bot/TryBot-32bit success Build for i686
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_glibc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_glibc_check--master-arm success Test passed
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 success Test passed

Commit Message

Adhemerval Zanella Netto March 27, 2026, 5 p.m. UTC
  Instead of relying on global compiler flags ('-mcpu=generic+sve
-march=armv8-a+sve') in the Makefile, this patch applies SVE
architecture requirements directly at the function level.

A new SVE_FUNCTION macro (mapping to
__attribute__((target("+sve")))) and corresponding pragmas
(OPTIONS_PUSH_SVE / OPTIONS_POP) are introduced in sv_math.h.
These are then applied across all SVE-specific math implementations in
sysdeps/aarch64/fpu/.

This allows building glibc with a -march without the need to
also specify '+sve' (not an usual target, but it helps on testing
some target without the need to rebuild the toolchain).

Checked on aarch64-linux-gnu.
---
 sysdeps/aarch64/fpu/Makefile                  |  2 +-
 sysdeps/aarch64/fpu/acos_sve.c                |  2 +-
 sysdeps/aarch64/fpu/acosf_sve.c               |  2 +-
 sysdeps/aarch64/fpu/acosh_sve.c               |  4 ++--
 sysdeps/aarch64/fpu/acoshf_sve.c              |  4 ++--
 sysdeps/aarch64/fpu/acospi_sve.c              |  2 +-
 sysdeps/aarch64/fpu/acospif_sve.c             |  2 +-
 sysdeps/aarch64/fpu/asin_sve.c                |  2 +-
 sysdeps/aarch64/fpu/asinf_sve.c               |  2 +-
 sysdeps/aarch64/fpu/asinh_sve.c               |  6 ++---
 sysdeps/aarch64/fpu/asinhf_sve.c              |  4 ++--
 sysdeps/aarch64/fpu/asinpi_sve.c              |  2 +-
 sysdeps/aarch64/fpu/asinpif_sve.c             |  2 +-
 sysdeps/aarch64/fpu/atan2_sve.c               |  8 +++----
 sysdeps/aarch64/fpu/atan2f_sve.c              |  8 +++----
 sysdeps/aarch64/fpu/atan2pi_sve.c             |  8 +++----
 sysdeps/aarch64/fpu/atan2pif_sve.c            |  8 +++----
 sysdeps/aarch64/fpu/atan_sve.c                |  2 +-
 sysdeps/aarch64/fpu/atanf_sve.c               |  2 +-
 sysdeps/aarch64/fpu/atanh_sve.c               |  4 ++--
 sysdeps/aarch64/fpu/atanhf_sve.c              |  4 ++--
 sysdeps/aarch64/fpu/atanpi_sve.c              |  2 +-
 sysdeps/aarch64/fpu/atanpif_sve.c             |  2 +-
 sysdeps/aarch64/fpu/cbrt_sve.c                |  6 ++---
 sysdeps/aarch64/fpu/cbrtf_sve.c               |  6 ++---
 sysdeps/aarch64/fpu/cos_sve.c                 |  4 ++--
 sysdeps/aarch64/fpu/cosf_sve.c                |  4 ++--
 sysdeps/aarch64/fpu/cosh_sve.c                |  6 ++---
 sysdeps/aarch64/fpu/coshf_sve.c               |  4 ++--
 sysdeps/aarch64/fpu/cospi_sve.c               |  2 +-
 sysdeps/aarch64/fpu/cospif_sve.c              |  2 +-
 sysdeps/aarch64/fpu/erf_sve.c                 |  2 +-
 sysdeps/aarch64/fpu/erfc_sve.c                |  2 +-
 sysdeps/aarch64/fpu/erfcf_sve.c               |  2 +-
 sysdeps/aarch64/fpu/erff_sve.c                |  2 +-
 sysdeps/aarch64/fpu/exp10_sve.c               |  4 ++--
 sysdeps/aarch64/fpu/exp10f_sve.c              |  6 ++---
 sysdeps/aarch64/fpu/exp10m1_sve.c             |  4 ++--
 sysdeps/aarch64/fpu/exp10m1f_sve.c            |  2 +-
 sysdeps/aarch64/fpu/exp2_sve.c                |  4 ++--
 sysdeps/aarch64/fpu/exp2f_sve.c               |  6 ++---
 sysdeps/aarch64/fpu/exp2m1_sve.c              |  4 ++--
 sysdeps/aarch64/fpu/exp2m1f_sve.c             |  2 +-
 sysdeps/aarch64/fpu/exp_sve.c                 |  4 ++--
 sysdeps/aarch64/fpu/expf_sve.c                |  8 +++----
 sysdeps/aarch64/fpu/expm1_sve.c               |  4 ++--
 sysdeps/aarch64/fpu/expm1f_sve.c              |  2 +-
 sysdeps/aarch64/fpu/hypot_sve.c               |  5 ++--
 sysdeps/aarch64/fpu/hypotf_sve.c              |  6 ++---
 sysdeps/aarch64/fpu/log10_sve.c               |  6 ++---
 sysdeps/aarch64/fpu/log10f_sve.c              |  6 ++---
 sysdeps/aarch64/fpu/log10p1_sve.c             |  4 ++--
 sysdeps/aarch64/fpu/log10p1f_sve.c            |  4 ++--
 sysdeps/aarch64/fpu/log1p_sve.c               |  4 ++--
 sysdeps/aarch64/fpu/log1pf_sve.c              |  4 ++--
 sysdeps/aarch64/fpu/log2_sve.c                |  6 ++---
 sysdeps/aarch64/fpu/log2f_sve.c               |  6 ++---
 sysdeps/aarch64/fpu/log2p1_sve.c              |  4 ++--
 sysdeps/aarch64/fpu/log2p1f_sve.c             |  4 ++--
 sysdeps/aarch64/fpu/log_sve.c                 |  6 ++---
 sysdeps/aarch64/fpu/logf_sve.c                |  6 ++---
 sysdeps/aarch64/fpu/poly_sve_generic.h        |  4 ++++
 sysdeps/aarch64/fpu/pow_sve.c                 | 23 ++++++++++---------
 sysdeps/aarch64/fpu/powf_sve.c                | 17 +++++++-------
 sysdeps/aarch64/fpu/rsqrt_sve.c               |  6 ++---
 sysdeps/aarch64/fpu/rsqrtf_sve.c              |  6 ++---
 sysdeps/aarch64/fpu/sin_sve.c                 |  4 ++--
 sysdeps/aarch64/fpu/sinf_sve.c                |  4 ++--
 sysdeps/aarch64/fpu/sinh_sve.c                |  6 ++---
 sysdeps/aarch64/fpu/sinhf_sve.c               |  4 ++--
 sysdeps/aarch64/fpu/sinpi_sve.c               |  2 +-
 sysdeps/aarch64/fpu/sinpif_sve.c              |  2 +-
 sysdeps/aarch64/fpu/sv_exp_special_inline.h   |  2 +-
 sysdeps/aarch64/fpu/sv_expf_inline.h          |  2 +-
 sysdeps/aarch64/fpu/sv_expf_special_inline.h  |  4 ++--
 sysdeps/aarch64/fpu/sv_expm1f_inline.h        |  2 +-
 sysdeps/aarch64/fpu/sv_log1p_inline.h         |  2 +-
 sysdeps/aarch64/fpu/sv_log1pf_inline.h        |  2 +-
 sysdeps/aarch64/fpu/sv_math.h                 | 17 ++++++++++++++
 sysdeps/aarch64/fpu/tan_sve.c                 |  4 ++--
 sysdeps/aarch64/fpu/tanf_sve.c                |  4 ++--
 sysdeps/aarch64/fpu/tanh_sve.c                |  4 ++--
 sysdeps/aarch64/fpu/tanhf_sve.c               |  6 ++---
 sysdeps/aarch64/fpu/tanpi_sve.c               |  2 +-
 sysdeps/aarch64/fpu/tanpif_sve.c              |  2 +-
 .../aarch64/fpu/test-double-sve-wrappers.c    |  6 +++--
 sysdeps/aarch64/fpu/test-float-sve-wrappers.c |  6 +++--
 87 files changed, 209 insertions(+), 181 deletions(-)
  

Comments

Joe Ramsay March 30, 2026, 3:06 p.m. UTC | #1
On Fri, Mar 27, 2026 at 05:00:55PM +0000, Adhemerval Zanella wrote:

Hi Adhemerval,

Would it be possible to make this change in AOR first? We prefer to
keep the two in sync as much as possible, and for license reasons it
is preferable to make changes in AOR first. We do a similar thing to
this in AOR by applying target attributes at file scope, but I prefer
your approach of doing it at function scope.

> A new SVE_FUNCTION macro (mapping to
> __attribute__((target("+sve")))) and corresponding pragmas
> (OPTIONS_PUSH_SVE / OPTIONS_POP) are introduced in sv_math.h.
> These are then applied across all SVE-specific math implementations in
> sysdeps/aarch64/fpu/.

Is it possible to use the attributes directly here rather than
pragmas? That way I think the push/pop thing wouldn't be needed?

Thanks,
Joe
  
Wilco Dijkstra March 30, 2026, 4:10 p.m. UTC | #2
Hi Adhemerval,

(adding Joe&Pierre)

> Instead of relying on global compiler flags ('-mcpu=generic+sve
> -march=armv8-a+sve') in the Makefile, this patch applies SVE
> architecture requirements directly at the function level.
>
> A new SVE_FUNCTION macro (mapping to
> __attribute__((target("+sve")))) and corresponding pragmas
> (OPTIONS_PUSH_SVE / OPTIONS_POP) are introduced in sv_math.h.
> These are then applied across all SVE-specific math implementations in
> sysdeps/aarch64/fpu/.

If we do this, it should be done in the AOR version first so we don't diverge
any further (and ideally the sources should be more identical).

Note AOR uses a single target attribute in the header (but then needs a
pop just for LLVM at the end of each file...).

> This allows building glibc with a -march without the need to
> also specify '+sve' (not an usual target, but it helps on testing
> some target without the need to rebuild the toolchain).

You can change CFLAGS to use -march= or -mcpu= and it will work. We use
a generic SVE target for the vector math functions - and that is fine since the
code is almost exclusively intrinsics. So I'd like to understand what goes wrong
if you change CFLAGS.

In principle we could scan CFLAGS for -mcpu=/-march= and add a +sve
rather than use a generic target, but that's QoI. I think the only difference it
would make is emitting RETAA in some helpers if PAC is enabled.

Cheers,
Wilco
  
Adhemerval Zanella Netto March 30, 2026, 8:26 p.m. UTC | #3
On 30/03/26 13:10, Wilco Dijkstra wrote:
> Hi Adhemerval,
> 
> (adding Joe&Pierre)
> 
>> Instead of relying on global compiler flags ('-mcpu=generic+sve
>> -march=armv8-a+sve') in the Makefile, this patch applies SVE
>> architecture requirements directly at the function level.
>>
>> A new SVE_FUNCTION macro (mapping to
>> __attribute__((target("+sve")))) and corresponding pragmas
>> (OPTIONS_PUSH_SVE / OPTIONS_POP) are introduced in sv_math.h.
>> These are then applied across all SVE-specific math implementations in
>> sysdeps/aarch64/fpu/.
> 
> If we do this, it should be done in the AOR version first so we don't diverge
> any further (and ideally the sources should be more identical).
> 
> Note AOR uses a single target attribute in the header (but then needs a
> pop just for LLVM at the end of each file...).
> 
>> This allows building glibc with a -march without the need to
>> also specify '+sve' (not an usual target, but it helps on testing
>> some target without the need to rebuild the toolchain).
> 
> You can change CFLAGS to use -march= or -mcpu= and it will work. We use
> a generic SVE target for the vector math functions - and that is fine since the
> code is almost exclusively intrinsics. So I'd like to understand what goes wrong
> if you change CFLAGS.

The issue is for a build I added the -march to CFLAGS *and* CPPFLAGS and 
since our make rules adds it *before* CFLAGS it messed which is the default 
-march.

So I think this change is not really required, albeit I still think it would be
a good think to disentangle the required ABI from the Makefile rules.

> 
> In principle we could scan CFLAGS for -mcpu=/-march= and add a +sve
> rather than use a generic target, but that's QoI. I think the only difference it
> would make is emitting RETAA in some helpers if PAC is enabled.

Yeah I agree this would be QoI, but I think it would be better done at source
level instead of make level.
  

Patch

diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile
index 998fc08d435..9be2b890881 100644
--- a/sysdeps/aarch64/fpu/Makefile
+++ b/sysdeps/aarch64/fpu/Makefile
@@ -65,7 +65,7 @@  endif
 # add a generic -mcpu and -march with SVE enabled.  Also use a tune for a modern
 # SVE core.
 
-sve-cflags = -mcpu=generic+sve -march=armv8-a+sve -mtune=neoverse-v1
+sve-cflags = -mtune=neoverse-v1
 
 ifeq ($(build-mathvec),yes)
 bench-libmvec = $(addprefix float-advsimd-,$(float-advsimd-funcs)) \
diff --git a/sysdeps/aarch64/fpu/acos_sve.c b/sysdeps/aarch64/fpu/acos_sve.c
index 330a964c4b9..3e2145f8dea 100644
--- a/sysdeps/aarch64/fpu/acos_sve.c
+++ b/sysdeps/aarch64/fpu/acos_sve.c
@@ -54,7 +54,7 @@  static const struct data
    The largest observed error in this region is 1.50 ulp:
    _ZGVsMxv_acos (0x1.252a2cf3fb9acp-1) got 0x1.ec1a46aa82901p-1
 				       want 0x1.ec1a46aa829p-1.  */
-svfloat64_t SV_NAME_D1 (acos) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (acos) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
   svbool_t ptrue = svptrue_b64 ();
diff --git a/sysdeps/aarch64/fpu/acosf_sve.c b/sysdeps/aarch64/fpu/acosf_sve.c
index 5989af8dd68..c9b1b47a109 100644
--- a/sysdeps/aarch64/fpu/acosf_sve.c
+++ b/sysdeps/aarch64/fpu/acosf_sve.c
@@ -51,7 +51,7 @@  static const struct data
    The largest observed error in this region is 1.32 ulps,
    _ZGVsMxv_acosf (0x1.15ba56p-1) got 0x1.feb33p-1
 				 want 0x1.feb32ep-1.  */
-svfloat32_t SV_NAME_F1 (acos) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (acos) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/acosh_sve.c b/sysdeps/aarch64/fpu/acosh_sve.c
index 6d996e3d36c..e81af94c787 100644
--- a/sysdeps/aarch64/fpu/acosh_sve.c
+++ b/sysdeps/aarch64/fpu/acosh_sve.c
@@ -35,7 +35,7 @@  const static struct data
 
    Right before returning we check if x is infinity or if x is lower than 1,
    in which case we return infinity or NaN.  */
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 special_case (svfloat64_t x, svfloat64_t xm1, svfloat64_t y, svbool_t special,
 	      svbool_t pg, const struct data *d)
 {
@@ -54,7 +54,7 @@  special_case (svfloat64_t x, svfloat64_t xm1, svfloat64_t y, svbool_t special,
    argument to log1p falls in the k=0 interval, i.e. x close to 1:
    SV_NAME_D1 (acosh)(0x1.1e80ed12f0ad1p+0) got 0x1.ef0cee7c33ce1p-2
 					   want 0x1.ef0cee7c33ce4p-2.  */
-svfloat64_t SV_NAME_D1 (acosh) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (acosh) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/acoshf_sve.c b/sysdeps/aarch64/fpu/acoshf_sve.c
index e133db5490b..f9b0ed5a3b6 100644
--- a/sysdeps/aarch64/fpu/acoshf_sve.c
+++ b/sysdeps/aarch64/fpu/acoshf_sve.c
@@ -23,7 +23,7 @@ 
 #define One 0x3f800000
 #define Thres 0x20000000 /* asuint(0x1p64) - One.  */
 
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
 special_case (svfloat32_t xm1, svfloat32_t tmp, svbool_t special)
 {
   svfloat32_t x = svadd_x (svptrue_b32 (), xm1, 1.0f);
@@ -37,7 +37,7 @@  special_case (svfloat32_t xm1, svfloat32_t tmp, svbool_t special)
    Maximum error is 2.47 ULPs:
    SV_NAME_F1 (acosh) (0x1.01ca76p+0) got 0x1.e435a6p-4
 				     want 0x1.e435a2p-4.  */
-svfloat32_t SV_NAME_F1 (acosh) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (acosh) (svfloat32_t x, const svbool_t pg)
 {
   svuint32_t ix = svreinterpret_u32 (x);
   svbool_t special = svcmpge (pg, svsub_x (pg, ix, One), Thres);
diff --git a/sysdeps/aarch64/fpu/acospi_sve.c b/sysdeps/aarch64/fpu/acospi_sve.c
index f5cacf557ed..595ef5f07aa 100644
--- a/sysdeps/aarch64/fpu/acospi_sve.c
+++ b/sysdeps/aarch64/fpu/acospi_sve.c
@@ -56,7 +56,7 @@  static const struct data
    The largest observed error in this region is 2.55 ulp:
    _ZGVsMxv_acospi(0x1.d90d50357410cp-1) got 0x1.ffd43d5dd3a9ep-4
 					want 0x1.ffd43d5dd3a9bp-4.  */
-svfloat64_t SV_NAME_D1 (acospi) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (acospi) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
   svbool_t ptrue = svptrue_b64 ();
diff --git a/sysdeps/aarch64/fpu/acospif_sve.c b/sysdeps/aarch64/fpu/acospif_sve.c
index 6a5b5cca7ca..b981e0f1e82 100644
--- a/sysdeps/aarch64/fpu/acospif_sve.c
+++ b/sysdeps/aarch64/fpu/acospif_sve.c
@@ -49,7 +49,7 @@  static const struct data
    The largest observed error in this region is 2.61 ulps,
    _ZGVsMxv_acospif (0x1.6b232ep-1) got 0x1.fe04bap-3
 				   want 0x1.fe04cp-3.  */
-svfloat32_t SV_NAME_F1 (acospi) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (acospi) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/asin_sve.c b/sysdeps/aarch64/fpu/asin_sve.c
index 7c71251bc06..82fe71207c3 100644
--- a/sysdeps/aarch64/fpu/asin_sve.c
+++ b/sysdeps/aarch64/fpu/asin_sve.c
@@ -51,7 +51,7 @@  static const struct data
    The largest observed error in this region is 2.66 ulp:
    _ZGVsMxv_asin (0x1.04024f6e2a2fbp-1) got 0x1.10b9586f087a8p-1
 				       want 0x1.10b9586f087abp-1.  */
-svfloat64_t SV_NAME_D1 (asin) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (asin) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
   svbool_t ptrue = svptrue_b64 ();
diff --git a/sysdeps/aarch64/fpu/asinf_sve.c b/sysdeps/aarch64/fpu/asinf_sve.c
index a2c6d16e183..84d5df4a8fc 100644
--- a/sysdeps/aarch64/fpu/asinf_sve.c
+++ b/sysdeps/aarch64/fpu/asinf_sve.c
@@ -48,7 +48,7 @@  static const struct data
    The largest observed error in this region is 2.41 ulps,
      _ZGVsMxv_asinf (-0x1.00203ep-1) got -0x1.0c3a64p-1
 				    want -0x1.0c3a6p-1.  */
-svfloat32_t SV_NAME_F1 (asin) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (asin) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/asinh_sve.c b/sysdeps/aarch64/fpu/asinh_sve.c
index cb32b60c3de..9cfa1388418 100644
--- a/sysdeps/aarch64/fpu/asinh_sve.c
+++ b/sysdeps/aarch64/fpu/asinh_sve.c
@@ -64,7 +64,7 @@  static const struct data
   .inf = INFINITY
 };
 
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
 __sv_log_inline (svfloat64_t x, const struct data *d, const svbool_t pg)
 {
   /* Double-precision SVE log, copied from SVE log implementation with some
@@ -99,7 +99,7 @@  __sv_log_inline (svfloat64_t x, const struct data *d, const svbool_t pg)
   return y;
 }
 
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 special_case (svfloat64_t ax, svfloat64_t y, svuint64_t sign, svbool_t special,
 	      svbool_t pg, const struct data *d)
 {
@@ -136,7 +136,7 @@  special_case (svfloat64_t ax, svfloat64_t y, svuint64_t sign, svbool_t special,
    |x| >= 1:
    _ZGVsMxv_asinh(0x1.170469d024505p+0) got 0x1.e3181c43b0f36p-1
 				       want 0x1.e3181c43b0f39p-1.  */
-svfloat64_t SV_NAME_D1 (asinh) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (asinh) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/asinhf_sve.c b/sysdeps/aarch64/fpu/asinhf_sve.c
index b41ed93d717..a46a164baab 100644
--- a/sysdeps/aarch64/fpu/asinhf_sve.c
+++ b/sysdeps/aarch64/fpu/asinhf_sve.c
@@ -22,7 +22,7 @@ 
 
 #define BigBound 0x5f800000 /* asuint(0x1p64).  */
 
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
 special_case (svuint32_t iax, svuint32_t sign, svfloat32_t y, svbool_t special)
 {
   svfloat32_t x = svreinterpret_f32 (sveor_x (svptrue_b32 (), iax, sign));
@@ -37,7 +37,7 @@  special_case (svuint32_t iax, svuint32_t sign, svfloat32_t y, svbool_t special)
    Maximum error is 1.92 ULPs:
    SV_NAME_F1 (asinh) (-0x1.0922ecp-1) got -0x1.fd0bccp-2
 				      want -0x1.fd0bc8p-2.  */
-svfloat32_t SV_NAME_F1 (asinh) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (asinh) (svfloat32_t x, const svbool_t pg)
 {
   svfloat32_t ax = svabs_x (pg, x);
   svuint32_t iax = svreinterpret_u32 (ax);
diff --git a/sysdeps/aarch64/fpu/asinpi_sve.c b/sysdeps/aarch64/fpu/asinpi_sve.c
index cfc209bb29f..54b3a057e8d 100644
--- a/sysdeps/aarch64/fpu/asinpi_sve.c
+++ b/sysdeps/aarch64/fpu/asinpi_sve.c
@@ -51,7 +51,7 @@  static const struct data
    The largest observed error in this region is 3.48 ulp:
    _ZGVsMxv_asinpi (0x1.03da0c2295424p-1) got 0x1.5b02b3dcafaefp-3
 					 want 0x1.5b02b3dcafaf2p-3.  */
-svfloat64_t SV_NAME_D1 (asinpi) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (asinpi) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
   svbool_t ptrue = svptrue_b64 ();
diff --git a/sysdeps/aarch64/fpu/asinpif_sve.c b/sysdeps/aarch64/fpu/asinpif_sve.c
index 5e55a8d6409..9fec30e30be 100644
--- a/sysdeps/aarch64/fpu/asinpif_sve.c
+++ b/sysdeps/aarch64/fpu/asinpif_sve.c
@@ -47,7 +47,7 @@  static const struct data
    The largest observed error in this region is 3.46 ulps:
    _ZGVsMxv_asinpif (0x1.0df892p-1) got 0x1.6a114cp-3
 				   want 0x1.6a1146p-3.  */
-svfloat32_t SV_NAME_F1 (asinpi) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (asinpi) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
   svbool_t ptrue = svptrue_b32 ();
diff --git a/sysdeps/aarch64/fpu/atan2_sve.c b/sysdeps/aarch64/fpu/atan2_sve.c
index d9b7647a79c..d315ab05d64 100644
--- a/sysdeps/aarch64/fpu/atan2_sve.c
+++ b/sysdeps/aarch64/fpu/atan2_sve.c
@@ -39,7 +39,7 @@  static const struct data
   .c18 = -0x1.44a4b059b6f67p-13, .c19 = 0x1.c4a45029e5a91p-17,
 };
 /* Special cases i.e. 0, infinity, nan (fall back to scalar calls).  */
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 special_case (svfloat64_t y, svfloat64_t x, svfloat64_t ret,
 	      const svbool_t cmp)
 {
@@ -48,7 +48,7 @@  special_case (svfloat64_t y, svfloat64_t x, svfloat64_t ret,
 
 /* Returns a predicate indicating true if the input is the bit representation
    of 0, infinity or nan.  */
-static inline svbool_t
+SVE_FUNCTION static inline svbool_t
 zeroinfnan (svuint64_t i, const svbool_t pg)
 {
   return svcmpge (pg, svsub_x (pg, svlsl_x (pg, i, 1), 1),
@@ -59,8 +59,8 @@  zeroinfnan (svuint64_t i, const svbool_t pg)
    x are reasonably close together. The greatest observed error is 1.94 ULP:
    _ZGVsMxvv_atan2 (0x1.8a4bf7167228ap+5, 0x1.84971226bb57bp+5)
    got 0x1.95db19dfef9ccp-1 want 0x1.95db19dfef9cep-1.  */
-svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x,
-				const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x,
+					     const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/atan2f_sve.c b/sysdeps/aarch64/fpu/atan2f_sve.c
index 7f6ababc25d..89e7a6e7377 100644
--- a/sysdeps/aarch64/fpu/atan2f_sve.c
+++ b/sysdeps/aarch64/fpu/atan2f_sve.c
@@ -33,7 +33,7 @@  static const struct data
 };
 
 /* Special cases i.e. 0, infinity, nan (fall back to scalar calls).  */
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
 special_case (svfloat32_t y, svfloat32_t x, svfloat32_t ret,
 	      const svbool_t cmp)
 {
@@ -42,7 +42,7 @@  special_case (svfloat32_t y, svfloat32_t x, svfloat32_t ret,
 
 /* Returns a predicate indicating true if the input is the bit representation
    of 0, infinity or nan.  */
-static inline svbool_t
+SVE_FUNCTION static inline svbool_t
 zeroinfnan (svuint32_t i, const svbool_t pg)
 {
   return svcmpge (pg, svsub_x (pg, svlsl_x (pg, i, 1), 1),
@@ -54,8 +54,8 @@  zeroinfnan (svuint32_t i, const svbool_t pg)
    observed error is 2.21 ULP:
    _ZGVnN4vv_atan2f (0x1.a04aa8p+6, 0x1.9a274p+6) got 0x1.95ed3ap-1
 						 want 0x1.95ed36p-1.  */
-svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x,
-				const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x,
+					     const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
   svbool_t ptrue = svptrue_b32 ();
diff --git a/sysdeps/aarch64/fpu/atan2pi_sve.c b/sysdeps/aarch64/fpu/atan2pi_sve.c
index cccd0a84e10..bd33fd3e268 100644
--- a/sysdeps/aarch64/fpu/atan2pi_sve.c
+++ b/sysdeps/aarch64/fpu/atan2pi_sve.c
@@ -44,7 +44,7 @@  static const struct data
 #define OneOverPi sv_f64 (0x1.45f306dc9c883p-2)
 
 /* Special cases i.e. 0, infinity, nan (fall back to scalar calls).  */
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 special_case (svfloat64_t y, svfloat64_t x, svfloat64_t ret,
 	      const svbool_t cmp)
 {
@@ -54,7 +54,7 @@  special_case (svfloat64_t y, svfloat64_t x, svfloat64_t ret,
 
 /* Returns a predicate indicating true if the input is the bit representation
    of 0, infinity or nan.  */
-static inline svbool_t
+SVE_FUNCTION static inline svbool_t
 zeroinfnan (svuint64_t i, const svbool_t pg)
 {
   return svcmpge (pg, svsub_x (pg, svlsl_x (pg, i, 1), 1),
@@ -65,8 +65,8 @@  zeroinfnan (svuint64_t i, const svbool_t pg)
    Maximum observed error is 3.11 ulps:
    _ZGVsMxvv_atan2pi (0x1.ef284a877f6b5p+6, 0x1.03fdde8242b17p+7)
    got 0x1.f00f800163079p-3 want 0x1.f00f800163076p-3.  */
-svfloat64_t SV_NAME_D2 (atan2pi) (svfloat64_t y, svfloat64_t x,
-				  const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D2 (atan2pi) (svfloat64_t y, svfloat64_t x,
+					       const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
   svbool_t ptrue = svptrue_b64 ();
diff --git a/sysdeps/aarch64/fpu/atan2pif_sve.c b/sysdeps/aarch64/fpu/atan2pif_sve.c
index fe1451dbd9a..0da7f9143b0 100644
--- a/sysdeps/aarch64/fpu/atan2pif_sve.c
+++ b/sysdeps/aarch64/fpu/atan2pif_sve.c
@@ -44,7 +44,7 @@  static const struct data
 #define OneOverPi sv_f32 (0x1.45f307p-2)
 
 /* Special cases i.e. 0, infinity, nan (fall back to scalar calls).  */
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
 special_case (svfloat32_t y, svfloat32_t x, svfloat32_t ret,
 	      const svbool_t cmp)
 {
@@ -54,7 +54,7 @@  special_case (svfloat32_t y, svfloat32_t x, svfloat32_t ret,
 
 /* Returns a predicate indicating true if the input is the bit representation
    of 0, infinity or nan.  */
-static inline svbool_t
+SVE_FUNCTION static inline svbool_t
 zeroinfnan (svuint32_t i, const svbool_t pg, const struct data *d)
 {
   return svcmpge (pg, svsub_x (pg, svlsl_x (pg, i, 1), 1),
@@ -66,8 +66,8 @@  zeroinfnan (svuint32_t i, const svbool_t pg, const struct data *d)
    observed error is 2.90 ULP:
    _ZGVsMxvv_atan2pif (0x1.a28542p+5, 0x1.adb7c6p+5) got 0x1.f76524p-3
 						    want 0x1.f7651ep-3.  */
-svfloat32_t SV_NAME_F2 (atan2pi) (svfloat32_t y, svfloat32_t x,
-				  const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F2 (atan2pi) (svfloat32_t y, svfloat32_t x,
+					       const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
   svbool_t ptrue = svptrue_b32 ();
diff --git a/sysdeps/aarch64/fpu/atan_sve.c b/sysdeps/aarch64/fpu/atan_sve.c
index 5477090d6aa..586173aba71 100644
--- a/sysdeps/aarch64/fpu/atan_sve.c
+++ b/sysdeps/aarch64/fpu/atan_sve.c
@@ -49,7 +49,7 @@  static const struct data
    error is 2.08 ulps:
    _ZGVsMxv_atan (0x1.000a7c56975e8p+0) got 0x1.922a3163e15c2p-1
 				       want 0x1.922a3163e15c4p-1.  */
-svfloat64_t SV_NAME_D1 (atan) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (atan) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/atanf_sve.c b/sysdeps/aarch64/fpu/atanf_sve.c
index b93bdddb0ac..f1f3fafeaca 100644
--- a/sysdeps/aarch64/fpu/atanf_sve.c
+++ b/sysdeps/aarch64/fpu/atanf_sve.c
@@ -48,7 +48,7 @@  static const struct data
    Largest observed error is 2.12 ULP:
    _ZGVsMxv_atanf (0x1.03d4cep+0) got 0x1.95ed3ap-1
 				 want 0x1.95ed36p-1.  */
-svfloat32_t SV_NAME_F1 (atan) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (atan) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
   svbool_t ptrue = svptrue_b32 ();
diff --git a/sysdeps/aarch64/fpu/atanh_sve.c b/sysdeps/aarch64/fpu/atanh_sve.c
index 558984974d5..313f483e7ca 100644
--- a/sysdeps/aarch64/fpu/atanh_sve.c
+++ b/sysdeps/aarch64/fpu/atanh_sve.c
@@ -27,7 +27,7 @@  static const struct data
   double nan;
 } data = { .half = 0x3fe0000000000000, .inf = INFINITY, .nan = NAN };
 
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 special_case (svfloat64_t ax, svfloat64_t y, svbool_t pg, svbool_t special,
 	      svfloat64_t halfsign, const struct data *d)
 {
@@ -40,7 +40,7 @@  special_case (svfloat64_t ax, svfloat64_t y, svbool_t pg, svbool_t special,
    The greatest observed error is 3.3 ULP:
    _ZGVsMxv_atanh(0x1.ffae6288b601p-6) got 0x1.ffd8ff31b5019p-6
 				      want 0x1.ffd8ff31b501cp-6.  */
-svfloat64_t SV_NAME_D1 (atanh) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (atanh) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/atanhf_sve.c b/sysdeps/aarch64/fpu/atanhf_sve.c
index 8e07fe5a829..03358e2ba25 100644
--- a/sysdeps/aarch64/fpu/atanhf_sve.c
+++ b/sysdeps/aarch64/fpu/atanhf_sve.c
@@ -23,7 +23,7 @@ 
 #define One (0x3f800000)
 #define Half (0x3f000000)
 
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
 special_case (svuint32_t iax, svuint32_t sign, svfloat32_t halfsign,
 	      svfloat32_t y, svbool_t special)
 {
@@ -36,7 +36,7 @@  special_case (svuint32_t iax, svuint32_t sign, svfloat32_t halfsign,
    The maximum error is 1.99 ULP:
    _ZGVsMxv_atanhf(0x1.f1583p-5) got 0x1.f1f4fap-5
 				want 0x1.f1f4f6p-5.  */
-svfloat32_t SV_NAME_F1 (atanh) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (atanh) (svfloat32_t x, const svbool_t pg)
 {
   svfloat32_t ax = svabs_x (pg, x);
   svuint32_t iax = svreinterpret_u32 (ax);
diff --git a/sysdeps/aarch64/fpu/atanpi_sve.c b/sysdeps/aarch64/fpu/atanpi_sve.c
index 0228270de81..bc14c0d8106 100644
--- a/sysdeps/aarch64/fpu/atanpi_sve.c
+++ b/sysdeps/aarch64/fpu/atanpi_sve.c
@@ -58,7 +58,7 @@  static const struct data
    error is 2.80 ulps:
    _ZGVsMxv_atanpi(0x1.f19587d63c76fp-1) got 0x1.f6b1304817d02p-3
 					want 0x1.f6b1304817d05p-3.  */
-svfloat64_t SV_NAME_D1 (atanpi) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (atanpi) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/atanpif_sve.c b/sysdeps/aarch64/fpu/atanpif_sve.c
index 4d74d90b2b7..9f2d33b549a 100644
--- a/sysdeps/aarch64/fpu/atanpif_sve.c
+++ b/sysdeps/aarch64/fpu/atanpif_sve.c
@@ -41,7 +41,7 @@  static const struct data
    Largest observed error is 2.59 ULP, close to +/-1.0:
    _ZGVsMxv_atanpif(0x1.f2a89cp-1) got 0x1.f76524p-3
 				  want 0x1.f7651ep-3.  */
-svfloat32_t SV_NAME_F1 (atanpi) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (atanpi) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
   svbool_t ptrue = svptrue_b32 ();
diff --git a/sysdeps/aarch64/fpu/cbrt_sve.c b/sysdeps/aarch64/fpu/cbrt_sve.c
index 86405f61949..a38d1d49c29 100644
--- a/sysdeps/aarch64/fpu/cbrt_sve.c
+++ b/sysdeps/aarch64/fpu/cbrt_sve.c
@@ -45,13 +45,13 @@  const static struct data
 #define MantissaMask 0x000fffffffffffff
 #define HalfExp 0x3fe0000000000000
 
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
 {
   return sv_call_f64 (cbrt, x, y, special);
 }
 
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
 shifted_lookup (const svbool_t pg, const float64_t *table, svint64_t i)
 {
   return svld1_gather_index (pg, table, svadd_x (pg, i, 2));
@@ -64,7 +64,7 @@  shifted_lookup (const svbool_t pg, const float64_t *table, svint64_t i)
    is an integer.
    _ZGVsMxv_cbrt (0x0.3fffb8d4413f3p-1022) got 0x1.965f53b0e5d97p-342
 					  want 0x1.965f53b0e5d95p-342.  */
-svfloat64_t SV_NAME_D1 (cbrt) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (cbrt) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/cbrtf_sve.c b/sysdeps/aarch64/fpu/cbrtf_sve.c
index f51f2df71d4..27b5ea2de5b 100644
--- a/sysdeps/aarch64/fpu/cbrtf_sve.c
+++ b/sysdeps/aarch64/fpu/cbrtf_sve.c
@@ -41,13 +41,13 @@  const static struct data
 #define MantissaMask 0x007fffff
 #define HalfExp 0x3f000000
 
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
 special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
 {
   return sv_call_f32 (cbrtf, x, y, special);
 }
 
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
 shifted_lookup (const svbool_t pg, const float32_t *table, svint32_t i)
 {
   return svld1_gather_index (pg, table, svadd_x (pg, i, 2));
@@ -59,7 +59,7 @@  shifted_lookup (const svbool_t pg, const float32_t *table, svint32_t i)
    0x1.85a2aa and the exponent is a multiple of 3, for example:
    _ZGVsMxv_cbrtf (0x1.85a2aap+3) got 0x1.267936p+1
 				 want 0x1.267932p+1.  */
-svfloat32_t SV_NAME_F1 (cbrt) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (cbrt) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/cos_sve.c b/sysdeps/aarch64/fpu/cos_sve.c
index 528391fe0c8..3ac232e8560 100644
--- a/sysdeps/aarch64/fpu/cos_sve.c
+++ b/sysdeps/aarch64/fpu/cos_sve.c
@@ -36,7 +36,7 @@  static const struct data
 
 #define RangeVal 0x4160000000000000 /* asuint64 (0x1p23).  */
 
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 special_case (svfloat64_t x, svfloat64_t y, svbool_t oob)
 {
   return sv_call_f64 (cos, x, y, oob);
@@ -47,7 +47,7 @@  special_case (svfloat64_t x, svfloat64_t y, svbool_t oob)
    Maximum measured error: 2.108 ULPs.
    SV_NAME_D1 (cos)(0x1.9b0ba158c98f3p+7) got -0x1.fddd4c65c7f07p-3
 					 want -0x1.fddd4c65c7f05p-3.  */
-svfloat64_t SV_NAME_D1 (cos) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (cos) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/cosf_sve.c b/sysdeps/aarch64/fpu/cosf_sve.c
index 680d89a8fa9..291fc7e3fb5 100644
--- a/sysdeps/aarch64/fpu/cosf_sve.c
+++ b/sysdeps/aarch64/fpu/cosf_sve.c
@@ -36,7 +36,7 @@  static const struct data
 
 #define RangeVal 0x49800000 /* asuint32(0x1p20f).  */
 
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
 special_case (svfloat32_t x, svfloat32_t y, svbool_t oob)
 {
   return sv_call_f32 (cosf, x, y, oob);
@@ -47,7 +47,7 @@  special_case (svfloat32_t x, svfloat32_t y, svbool_t oob)
    Maximum measured error: 2.06 ULPs.
    SV_NAME_F1 (cos)(0x1.dea2f2p+19) got 0x1.fffe7ap-6
 				   want 0x1.fffe76p-6.  */
-svfloat32_t SV_NAME_F1 (cos) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (cos) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/cosh_sve.c b/sysdeps/aarch64/fpu/cosh_sve.c
index 39754ac01ad..631a938f8be 100644
--- a/sysdeps/aarch64/fpu/cosh_sve.c
+++ b/sysdeps/aarch64/fpu/cosh_sve.c
@@ -46,7 +46,7 @@  static const struct data
    Functionally identical to FEXPA exp(x), but an adjustment in
    the shift value which leads to a reduction in the exponent of scale by 1,
    thus halving the result at no cost.  */
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
 exp_over_two_inline (const svbool_t pg, svfloat64_t x, const struct data *d)
 {
   /* Calculate exp(x).  */
@@ -76,7 +76,7 @@  exp_over_two_inline (const svbool_t pg, svfloat64_t x, const struct data *d)
    cosh (A + B) = cosh(A)cosh(B) + sinh(A)sinh(B)
    By choosing sufficiently large values whereby after rounding cosh == sinh,
    this can be simplified into: cosh (A + B) = cosh(A) * e^B.  */
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 special_case (svfloat64_t x, svbool_t pg, svbool_t special, svfloat64_t t,
 	      const struct data *d)
 {
@@ -108,7 +108,7 @@  special_case (svfloat64_t x, svbool_t pg, svbool_t special, svfloat64_t t,
    The greatest observed error is 2.10 + 0.5 ULP:
    _ZGVsMxv_cosh (-0x1.2acb2978bd15ep+4) got 0x1.ebbd8806ea342p+25
 					want 0x1.ebbd8806ea33fp+25.  */
-svfloat64_t SV_NAME_D1 (cosh) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (cosh) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/coshf_sve.c b/sysdeps/aarch64/fpu/coshf_sve.c
index a37456e5b72..4154de5bc6e 100644
--- a/sysdeps/aarch64/fpu/coshf_sve.c
+++ b/sysdeps/aarch64/fpu/coshf_sve.c
@@ -38,7 +38,7 @@  static const struct data
    cosh (A + B) = cosh(A)cosh(B) + sinh(A)sinh(B)
    By choosing sufficiently large values whereby after rounding cosh == sinh,
    this can be simplified into: cosh (A + B) = cosh(A) * e^B.  */
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
 special_case (svfloat32_t x, svbool_t special, svfloat32_t half_e,
 	      svfloat32_t half_over_e, const struct data *d)
 {
@@ -70,7 +70,7 @@  special_case (svfloat32_t x, svbool_t special, svfloat32_t half_e,
    Maximum error is 2.55 +0.5 ULP:
    _ZGVsMxv_coshf(-0x1.5b40f4p+1) got 0x1.e47748p+2
 				 want 0x1.e4774ep+2.  */
-svfloat32_t SV_NAME_F1 (cosh) (svfloat32_t x, svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (cosh) (svfloat32_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/cospi_sve.c b/sysdeps/aarch64/fpu/cospi_sve.c
index 555752bc99e..6b48077e6be 100644
--- a/sysdeps/aarch64/fpu/cospi_sve.c
+++ b/sysdeps/aarch64/fpu/cospi_sve.c
@@ -38,7 +38,7 @@  static const struct data
    Maximum error 3.20 ULP:
    _ZGVsMxv_cospi(0x1.f18ba32c63159p-6) got 0x1.fdabf595f9763p-1
 				       want 0x1.fdabf595f9766p-1.  */
-svfloat64_t SV_NAME_D1 (cospi) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (cospi) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/cospif_sve.c b/sysdeps/aarch64/fpu/cospif_sve.c
index 54af83db62f..57d0a12f427 100644
--- a/sysdeps/aarch64/fpu/cospif_sve.c
+++ b/sysdeps/aarch64/fpu/cospif_sve.c
@@ -35,7 +35,7 @@  static const struct data
    Maximum error: 2.60 ULP:
    _ZGVsMxv_cospif(+/-0x1.cae664p-4) got 0x1.e09c9ep-1
 				    want 0x1.e09c98p-1.  */
-svfloat32_t SV_NAME_F1 (cospi) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (cospi) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/erf_sve.c b/sysdeps/aarch64/fpu/erf_sve.c
index 4260cb590d4..10a3d5e5604 100644
--- a/sysdeps/aarch64/fpu/erf_sve.c
+++ b/sysdeps/aarch64/fpu/erf_sve.c
@@ -54,7 +54,7 @@  static const struct data
    Maximum measure error: 2.29 ULP
    _ZGVsMxv_erf(-0x1.00003c924e5d1p-8) got -0x1.20dd59132ebadp-8
 				      want -0x1.20dd59132ebafp-8.  */
-svfloat64_t SV_NAME_D1 (erf) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (erf) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *dat = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/erfc_sve.c b/sysdeps/aarch64/fpu/erfc_sve.c
index b489cd30f7b..ec6b401af79 100644
--- a/sysdeps/aarch64/fpu/erfc_sve.c
+++ b/sysdeps/aarch64/fpu/erfc_sve.c
@@ -86,7 +86,7 @@  static const struct data
    Maximum measured error: 1.71 ULP
    _ZGVsMxv_erfc(0x1.46cfe976733p+4) got 0x1.e15fcbea3e7afp-608
 				    want 0x1.e15fcbea3e7adp-608.  */
-svfloat64_t SV_NAME_D1 (erfc) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (erfc) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *dat = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/erfcf_sve.c b/sysdeps/aarch64/fpu/erfcf_sve.c
index 78b7b9369b2..ea911329654 100644
--- a/sysdeps/aarch64/fpu/erfcf_sve.c
+++ b/sysdeps/aarch64/fpu/erfcf_sve.c
@@ -59,7 +59,7 @@  static const struct data
    Maximum error: 1.63 ULP (~1.0 ULP for x < 0.0).
    _ZGVsMxv_erfcf(0x1.1dbf7ap+3) got 0x1.f51212p-120
 				want 0x1.f51216p-120.  */
-svfloat32_t SV_NAME_F1 (erfc) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (erfc) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *dat = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/erff_sve.c b/sysdeps/aarch64/fpu/erff_sve.c
index 68e419c794b..61e064332f7 100644
--- a/sysdeps/aarch64/fpu/erff_sve.c
+++ b/sysdeps/aarch64/fpu/erff_sve.c
@@ -49,7 +49,7 @@  static const struct data
      _ZGVsMxv_erff(0x1.c373e6p-9) got 0x1.fd686cp-9 want 0x1.fd6868p-9
    - [0x1.cp-7, 4.0]: 1.26 ULP
      _ZGVsMxv_erff(0x1.1d002ep+0) got 0x1.c4eb9ap-1 want 0x1.c4eb98p-1.  */
-svfloat32_t SV_NAME_F1 (erf) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (erf) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *dat = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/exp10_sve.c b/sysdeps/aarch64/fpu/exp10_sve.c
index fd04bab44a0..42946ed9b9c 100644
--- a/sysdeps/aarch64/fpu/exp10_sve.c
+++ b/sysdeps/aarch64/fpu/exp10_sve.c
@@ -50,7 +50,7 @@  static const struct data
   .special_bound = SpecialBound,
 };
 
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 special_exp (svfloat64_t scale, svfloat64_t poly, svfloat64_t n, svuint64_t u,
 	     const struct sv_exp_special_data *ds)
 {
@@ -69,7 +69,7 @@  special_exp (svfloat64_t scale, svfloat64_t poly, svfloat64_t n, svuint64_t u,
    Maximum measured error is 1.02 ulp.
    SV_NAME_D1 (exp10)(-0x1.2862fec805e58p+2) got 0x1.885a89551d782p-16
 					    want 0x1.885a89551d781p-16.  */
-svfloat64_t SV_NAME_D1 (exp10) (svfloat64_t x, svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (exp10) (svfloat64_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/exp10f_sve.c b/sysdeps/aarch64/fpu/exp10f_sve.c
index 0c426797ff0..963556f4ffb 100644
--- a/sysdeps/aarch64/fpu/exp10f_sve.c
+++ b/sysdeps/aarch64/fpu/exp10f_sve.c
@@ -46,7 +46,7 @@  static const struct data
   .zero_bound = ZeroBound,
 };
 
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
 sv_exp10f_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
 {
   /* exp10(x) = 2^(n/N) * 10^r = 2^n * (1 + poly (r)),
@@ -73,7 +73,7 @@  sv_exp10f_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
   return svmla_x (pg, scale, scale, poly);
 }
 
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
 special_case (svfloat32_t x, svbool_t pg, svbool_t special,
 	      const struct data *d)
 {
@@ -98,7 +98,7 @@  special_case (svfloat32_t x, svbool_t pg, svbool_t special,
    Worst case error is 2.86 ULP +0.50 ULP.
    _ZGVsMxv_exp10f (0x1.31b778p+5) got 0x1.ed399p+126
 				  want 0x1.ed398ap+126.  */
-svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
   svbool_t special = svacgt (pg, x, d->special_bound);
diff --git a/sysdeps/aarch64/fpu/exp10m1_sve.c b/sysdeps/aarch64/fpu/exp10m1_sve.c
index e93883c2cf4..c174ec44317 100644
--- a/sysdeps/aarch64/fpu/exp10m1_sve.c
+++ b/sysdeps/aarch64/fpu/exp10m1_sve.c
@@ -72,7 +72,7 @@  static const struct data
   },
 };
 
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 special_m1 (svbool_t special, svfloat64_t y, svfloat64_t z, svfloat64_t scale,
 	    svfloat64_t poly, svfloat64_t n,
 	    const struct sv_exp_special_data *ds)
@@ -94,7 +94,7 @@  special_m1 (svbool_t special, svfloat64_t y, svfloat64_t z, svfloat64_t scale,
    Maximum measured error is 2.87 + 0.5 ULP:
    _ZGVsMxv_exp10m1(0x1.64645f11e94c6p-4) got 0x1.c64d54eb7658dp-3
 					 want 0x1.c64d54eb7658ap-3.  */
-svfloat64_t SV_NAME_D1 (exp10m1) (svfloat64_t x, svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (exp10m1) (svfloat64_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/exp10m1f_sve.c b/sysdeps/aarch64/fpu/exp10m1f_sve.c
index de0b0b8d1d6..d9d31469acb 100644
--- a/sysdeps/aarch64/fpu/exp10m1f_sve.c
+++ b/sysdeps/aarch64/fpu/exp10m1f_sve.c
@@ -57,7 +57,7 @@  static const struct data
    Algorithm is accurate to 1.68 + 0.5 ULP.
    _ZGVnN4v_exp10m1f(0x1.3aeffep-3) got 0x1.b3139p-2
 				   want 0x1.b3138cp-2.  */
-svfloat32_t SV_NAME_F1 (exp10m1) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (exp10m1) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/exp2_sve.c b/sysdeps/aarch64/fpu/exp2_sve.c
index a6f002f9b0c..65f9d092185 100644
--- a/sysdeps/aarch64/fpu/exp2_sve.c
+++ b/sysdeps/aarch64/fpu/exp2_sve.c
@@ -46,7 +46,7 @@  static const struct data
   .special_bound = SpecialBound,
 };
 
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 special_exp (svfloat64_t poly, svfloat64_t scale, svfloat64_t n, svfloat64_t z,
 	     const struct sv_exp_special_data *ds)
 {
@@ -66,7 +66,7 @@  special_exp (svfloat64_t poly, svfloat64_t scale, svfloat64_t n, svfloat64_t z,
    Maximum measured error is 0.52 + 0.5 ulp.
    _ZGVsMxv_exp2 (0x1.3b72ad5b701bfp-1) got 0x1.8861641b49e08p+0
 				       want 0x1.8861641b49e07p+0.  */
-svfloat64_t SV_NAME_D1 (exp2) (svfloat64_t x, svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (exp2) (svfloat64_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/exp2f_sve.c b/sysdeps/aarch64/fpu/exp2f_sve.c
index b71f472d449..30beef003fe 100644
--- a/sysdeps/aarch64/fpu/exp2f_sve.c
+++ b/sysdeps/aarch64/fpu/exp2f_sve.c
@@ -43,7 +43,7 @@  static const struct data
   .zero_bound = ZeroBound,
 };
 
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
 sv_exp2f_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
 {
   /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
@@ -60,7 +60,7 @@  sv_exp2f_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
   return svmla_x (pg, scale, scale, poly);
 }
 
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
 special_case (svfloat32_t x, svbool_t pg, svbool_t special,
 	      const struct data *d)
 {
@@ -85,7 +85,7 @@  special_case (svfloat32_t x, svbool_t pg, svbool_t special,
    Worst case error is 2.87 +0.50 ULP.
    _ZGVsMxv_exp2f (0x1.fbcb78p+6) got 0x1.ee1d32p+126
 				 want 0x1.ee1d2cp+126.  */
-svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
   svbool_t special = svacgt (pg, x, d->special_bound);
diff --git a/sysdeps/aarch64/fpu/exp2m1_sve.c b/sysdeps/aarch64/fpu/exp2m1_sve.c
index ef13c26ecb4..30724f7d329 100644
--- a/sysdeps/aarch64/fpu/exp2m1_sve.c
+++ b/sysdeps/aarch64/fpu/exp2m1_sve.c
@@ -64,7 +64,7 @@  static const struct data
   },
 };
 
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 special_m1 (svbool_t special, svfloat64_t y, svfloat64_t z, svfloat64_t scale,
 	    svfloat64_t poly, svfloat64_t n,
 	    const struct sv_exp_special_data *ds)
@@ -86,7 +86,7 @@  special_m1 (svbool_t special, svfloat64_t y, svfloat64_t z, svfloat64_t scale,
    Maximum error is 2.58 + 0.5 ULP.
    _ZGVsMxv_exp2m1(0x1.0284a345c99bfp-8) got 0x1.66df630cd2965p-9
 					want 0x1.66df630cd2962p-9.  */
-svfloat64_t SV_NAME_D1 (exp2m1) (svfloat64_t x, svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (exp2m1) (svfloat64_t x, svbool_t pg)
 {
   /* exp2(x) = 2^n (1 + poly(r))
      x = n + r, with r in [-1/2N, 1/2N].
diff --git a/sysdeps/aarch64/fpu/exp2m1f_sve.c b/sysdeps/aarch64/fpu/exp2m1f_sve.c
index d53a805e19c..9f3ea31fd58 100644
--- a/sysdeps/aarch64/fpu/exp2m1f_sve.c
+++ b/sysdeps/aarch64/fpu/exp2m1f_sve.c
@@ -50,7 +50,7 @@  static const struct data
    The maximum error is  1.76 + 0.5 ULP.
    _ZGVsMxv_exp2m1f (0x1.018af8p-1) got 0x1.ab2ebcp-2
 				   want 0x1.ab2ecp-2.  */
-svfloat32_t SV_NAME_F1 (exp2m1) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (exp2m1) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/exp_sve.c b/sysdeps/aarch64/fpu/exp_sve.c
index 7c8c5e0559e..4b780b06d17 100644
--- a/sysdeps/aarch64/fpu/exp_sve.c
+++ b/sysdeps/aarch64/fpu/exp_sve.c
@@ -46,7 +46,7 @@  static const struct data
   .special_bound = SpecialBound,
 };
 
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 special_exp (svfloat64_t poly, svfloat64_t scale, svfloat64_t n, svuint64_t u,
 	     const struct sv_exp_special_data *ds)
 {
@@ -64,7 +64,7 @@  special_exp (svfloat64_t poly, svfloat64_t scale, svfloat64_t n, svuint64_t u,
 /* SVE exp algorithm. Maximum measured error is 1.01ulps:
    SV_NAME_D1 (exp)(0x1.4619d7b04da41p+6) got 0x1.885d9acc41da7p+117
 					 want 0x1.885d9acc41da6p+117.  */
-svfloat64_t SV_NAME_D1 (exp) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (exp) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/expf_sve.c b/sysdeps/aarch64/fpu/expf_sve.c
index 9c552958edb..8be986e3136 100644
--- a/sysdeps/aarch64/fpu/expf_sve.c
+++ b/sysdeps/aarch64/fpu/expf_sve.c
@@ -48,7 +48,7 @@  static const struct data
   .zero_bound = ZeroBound,
 };
 
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
 expf_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
 {
   /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
@@ -78,7 +78,7 @@  expf_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
    The approximation needs to match that of the fast path.
    To achieve this we assemble the same polynomial, ie `r + 0.5 * r^2`,
    then we conditionally add an extra `c2 * r^3` term.  */
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
 expf_slow_inline (svfloat32_t x, const svbool_t special, const struct data *d)
 {
   svfloat32_t lane_constants = svld1rq (svptrue_b32 (), &d->ln2_hi);
@@ -102,7 +102,7 @@  expf_slow_inline (svfloat32_t x, const svbool_t special, const struct data *d)
   return svmla_x (svptrue_b32 (), scale, scale, poly);
 }
 
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
 special_case (svfloat32_t x, svbool_t pg, svbool_t special,
 	      const struct data *d)
 {
@@ -127,7 +127,7 @@  special_case (svfloat32_t x, svbool_t pg, svbool_t special,
    Worst-case error is 2.70 +0.50 ULP:
    _ZGVsMxv_expf(0x1.5fec38p+6) got 0x1.e7831ep+126
 			       want 0x1.e78318p+126.  */
-svfloat32_t SV_NAME_F1 (exp) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (exp) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
   svbool_t special = svacgt (pg, x, d->special_bound);
diff --git a/sysdeps/aarch64/fpu/expm1_sve.c b/sysdeps/aarch64/fpu/expm1_sve.c
index 52abd8af1b9..6132e19e106 100644
--- a/sysdeps/aarch64/fpu/expm1_sve.c
+++ b/sysdeps/aarch64/fpu/expm1_sve.c
@@ -66,7 +66,7 @@  static const struct data
   .fexpa_bound = FexpaBound,
 };
 
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 special_m1 (svbool_t special, svfloat64_t y, svfloat64_t z, svfloat64_t scale,
 	    svfloat64_t poly, svfloat64_t n,
 	    const struct sv_exp_special_data *ds)
@@ -88,7 +88,7 @@  special_m1 (svbool_t special, svfloat64_t y, svfloat64_t z, svfloat64_t scale,
    Maximum measured error is 2.81 + 0.5 ULP:
    _ZGVsMxv_expm1 (0x1.974060e619bfp-3) got 0x1.c290e5858bb53p-3
 				       want 0x1.c290e5858bb5p-3.  */
-svfloat64_t SV_NAME_D1 (expm1) (svfloat64_t x, svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (expm1) (svfloat64_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/expm1f_sve.c b/sysdeps/aarch64/fpu/expm1f_sve.c
index 07e13556a2d..5234342de86 100644
--- a/sysdeps/aarch64/fpu/expm1f_sve.c
+++ b/sysdeps/aarch64/fpu/expm1f_sve.c
@@ -50,7 +50,7 @@  static const struct data
    Maximum error is 1.02 +0.5ULP:
    _ZGVsMxv_expm1f(0x1.8f4ebcp-2) got 0x1.e859dp-2
 				 want 0x1.e859d4p-2.  */
-svfloat32_t SV_NAME_F1 (expm1) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (expm1) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/hypot_sve.c b/sysdeps/aarch64/fpu/hypot_sve.c
index d6d56c35b15..958143dbe1c 100644
--- a/sysdeps/aarch64/fpu/hypot_sve.c
+++ b/sysdeps/aarch64/fpu/hypot_sve.c
@@ -27,7 +27,7 @@  static const struct data
   .thres = 0x7300000000000000,	    /* asuint (inf) - tiny_bound.  */
 };
 
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 special_case (svfloat64_t sqsum, svfloat64_t x, svfloat64_t y, svbool_t pg,
 	      svbool_t special)
 {
@@ -39,7 +39,8 @@  special_case (svfloat64_t sqsum, svfloat64_t x, svfloat64_t y, svbool_t pg,
    _ZGVsMxvv_hypot (-0x1.6a22d0412cdd3p+352, 0x1.d3d89bd66fb1ap+330)
     got 0x1.6a22d0412cfp+352
    want 0x1.6a22d0412cf01p+352.  */
-svfloat64_t SV_NAME_D2 (hypot) (svfloat64_t x, svfloat64_t y, svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D2 (hypot) (svfloat64_t x, svfloat64_t y,
+					     svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/hypotf_sve.c b/sysdeps/aarch64/fpu/hypotf_sve.c
index 99edf90b0ab..0d9ec38d3c3 100644
--- a/sysdeps/aarch64/fpu/hypotf_sve.c
+++ b/sysdeps/aarch64/fpu/hypotf_sve.c
@@ -22,7 +22,7 @@ 
 #define TinyBound 0x0c800000 /* asuint (0x1p-102).  */
 #define Thres 0x73000000     /* 0x70000000 - TinyBound.  */
 
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
 special_case (svfloat32_t sqsum, svfloat32_t x, svfloat32_t y, svbool_t pg,
 	      svbool_t special)
 {
@@ -33,8 +33,8 @@  special_case (svfloat32_t sqsum, svfloat32_t x, svfloat32_t y, svbool_t pg,
    Maximum error observed is 1.21 ULP:
    _ZGVsMxvv_hypotf (0x1.6a213cp-19, -0x1.32b982p-26) got 0x1.6a2346p-19
 						     want 0x1.6a2344p-19.  */
-svfloat32_t SV_NAME_F2 (hypot) (svfloat32_t x, svfloat32_t y,
-				const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F2 (hypot) (svfloat32_t x, svfloat32_t y,
+					     const svbool_t pg)
 {
   svfloat32_t sqsum = svmla_x (pg, svmul_x (pg, x, x), y, y);
 
diff --git a/sysdeps/aarch64/fpu/log10_sve.c b/sysdeps/aarch64/fpu/log10_sve.c
index b87ef297ec3..bb39bb9590a 100644
--- a/sysdeps/aarch64/fpu/log10_sve.c
+++ b/sysdeps/aarch64/fpu/log10_sve.c
@@ -45,7 +45,7 @@  static const struct data
   .thresh = 0x7fe0000000000000, /* infinity - min.  */
 };
 
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
 v_log10_inline (svuint64_t ix, const svbool_t pg, const struct data *d)
 {
   /* x = 2^k z; where z is in range [Off,2*Off) and exact.
@@ -89,7 +89,7 @@  v_log10_inline (svuint64_t ix, const svbool_t pg, const struct data *d)
    calculation of x * 2^52 (2^mantissa) to normalise the number at entry to
    the log function and then subtract log10(2) * 52 to re-subnormalise the
    output to the correct result.  */
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 special_case (svfloat64_t x, svbool_t pg, svbool_t special)
 {
   const struct data *d = ptr_barrier (&data);
@@ -122,7 +122,7 @@  special_case (svfloat64_t x, svbool_t pg, svbool_t special)
    Maximum measured error is 2.46 ulps.
    SV_NAME_D1 (log10)(0x1.131956cd4b627p+0) got 0x1.fffbdf6eaa669p-6
 					   want 0x1.fffbdf6eaa667p-6.  */
-svfloat64_t SV_NAME_D1 (log10) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (log10) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/log10f_sve.c b/sysdeps/aarch64/fpu/log10f_sve.c
index 365c6065350..70d04422a12 100644
--- a/sysdeps/aarch64/fpu/log10f_sve.c
+++ b/sysdeps/aarch64/fpu/log10f_sve.c
@@ -52,7 +52,7 @@  static const struct data
 
 #define MantissaMask 0x007fffff
 
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
 v_log10f_inline (svuint32_t u_off, const svbool_t pg, const struct data *d)
 {
   /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3.  */
@@ -90,7 +90,7 @@  v_log10f_inline (svuint32_t u_off, const svbool_t pg, const struct data *d)
    calculation of x * 2^23 (2^mantissa) to normalise the number at entry to
    the log function and then subtract log10(2) * 23 to re-subnormalise the
    output to the correct result.  */
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
 special_case (svfloat32_t x, svbool_t pg, svbool_t special,
 	      const struct data *d)
 {
@@ -125,7 +125,7 @@  special_case (svfloat32_t x, svbool_t pg, svbool_t special,
    Maximum error is 3.31ulps:
    SV_NAME_F1 (log10)(0x1.555c16p+0) got 0x1.ffe2fap-4
 				    want 0x1.ffe2f4p-4.  */
-svfloat32_t SV_NAME_F1 (log10) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (log10) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/log10p1_sve.c b/sysdeps/aarch64/fpu/log10p1_sve.c
index 90ad8ec01ba..5c1bf929b92 100644
--- a/sysdeps/aarch64/fpu/log10p1_sve.c
+++ b/sysdeps/aarch64/fpu/log10p1_sve.c
@@ -58,7 +58,7 @@  static const struct data
   .bottom_mask = 0x00000000ffffffff,
 };
 
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 special_case (svfloat64_t x, svfloat64_t y, svbool_t special, svbool_t pg)
 {
 
@@ -73,7 +73,7 @@  special_case (svfloat64_t x, svfloat64_t y, svbool_t special, svbool_t pg)
    Worst-case error is 2.81 ULP:
    _ZGVsMxv_log10p1(0x1.25c3f17d7602p-53) got 0x1.fe52a1624aad1p-55
 					 want 0x1.fe52a1624aacep-55.  */
-svfloat64_t SV_NAME_D1 (log10p1) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (log10p1) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/log10p1f_sve.c b/sysdeps/aarch64/fpu/log10p1f_sve.c
index d76f8b2df3c..5a6946672fe 100644
--- a/sysdeps/aarch64/fpu/log10p1f_sve.c
+++ b/sysdeps/aarch64/fpu/log10p1f_sve.c
@@ -47,7 +47,7 @@  static const struct data
 
 #define SignedExpMask sv_s32 (0xff800000)
 
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
 special_case (svfloat32_t x, svfloat32_t y, const svbool_t pg,
 	      svbool_t special)
 {
@@ -62,7 +62,7 @@  special_case (svfloat32_t x, svfloat32_t y, const svbool_t pg,
    Worst-case error is 3.40 ULP:
    _ZGVsMxv_log10p1f(0x1.8bfff6p+6) got 0x1.000002p+1
 				   want 0x1.fffffep+0.  */
-svfloat32_t SV_NAME_F1 (log10p1) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (log10p1) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/log1p_sve.c b/sysdeps/aarch64/fpu/log1p_sve.c
index bde0e8a7d1d..d19933a3a4d 100644
--- a/sysdeps/aarch64/fpu/log1p_sve.c
+++ b/sysdeps/aarch64/fpu/log1p_sve.c
@@ -62,7 +62,7 @@  static const struct data
 #define AbsMask 0x7fffffffffffffff
 #define BottomMask 0xffffffff
 
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
 {
   return sv_call_f64 (log1p, x, y, special);
@@ -72,7 +72,7 @@  special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
    observed error is 2.46 ULP:
    _ZGVsMxv_log1p(0x1.654a1307242a4p+11) got 0x1.fd5565fb590f4p+2
 					want 0x1.fd5565fb590f6p+2.  */
-svfloat64_t SV_NAME_D1 (log1p) (svfloat64_t x, svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (log1p) (svfloat64_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
   svuint64_t ix = svreinterpret_u64 (x);
diff --git a/sysdeps/aarch64/fpu/log1pf_sve.c b/sysdeps/aarch64/fpu/log1pf_sve.c
index f4d409eb507..0e776f2c777 100644
--- a/sysdeps/aarch64/fpu/log1pf_sve.c
+++ b/sysdeps/aarch64/fpu/log1pf_sve.c
@@ -20,7 +20,7 @@ 
 #include "sv_math.h"
 #include "sv_log1pf_inline.h"
 
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
 special_case (svfloat32_t x, svbool_t special)
 {
   return sv_call_f32 (log1pf, x, sv_log1pf_inline (x, svptrue_b32 ()),
@@ -31,7 +31,7 @@  special_case (svfloat32_t x, svbool_t special)
    error is 1.27 ULP very close to 0.5.
    _ZGVsMxv_log1pf(0x1.fffffep-2) got 0x1.9f324p-2
 				 want 0x1.9f323ep-2.  */
-svfloat32_t SV_NAME_F1 (log1p) (svfloat32_t x, svbool_t pg)
+SVE_FUNCTION SVE_FUNCTION svfloat32_t SV_NAME_F1 (log1p) (svfloat32_t x, svbool_t pg)
 {
   /* x < -1, Inf/Nan.  */
   svbool_t special = svcmpeq (pg, svreinterpret_u32 (x), 0x7f800000);
diff --git a/sysdeps/aarch64/fpu/log2_sve.c b/sysdeps/aarch64/fpu/log2_sve.c
index 918ed420258..c3f639082e1 100644
--- a/sysdeps/aarch64/fpu/log2_sve.c
+++ b/sysdeps/aarch64/fpu/log2_sve.c
@@ -43,7 +43,7 @@  static const struct data
   .thresh = (0x7fe0000000000000), /* infinity - min.  */
 };
 
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
 v_log2_inline (svuint64_t ix, const svbool_t pg, const struct data *d)
 {
   /* x = 2^k z; where z is in range [Off,2*Off) and exact.
@@ -81,7 +81,7 @@  v_log2_inline (svuint64_t ix, const svbool_t pg, const struct data *d)
    calculation of x * 2^52 (2^mantissa) to normalise the number at entry to
    the log function and then subtract log2(2) * 52 = 52 to re-subnormalise the
    output to the correct result.  */
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 special_case (svfloat64_t x, svbool_t pg, svbool_t special)
 {
   const struct data *d = ptr_barrier (&data);
@@ -116,7 +116,7 @@  special_case (svfloat64_t x, svbool_t pg, svbool_t special)
    The maximum observed error is 2.58 ULP:
    SV_NAME_D1 (log2)(0x1.0b556b093869bp+0) got 0x1.fffb34198d9dap-5
 					  want 0x1.fffb34198d9ddp-5.  */
-svfloat64_t SV_NAME_D1 (log2) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (log2) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/log2f_sve.c b/sysdeps/aarch64/fpu/log2f_sve.c
index 89d676e5a92..4c76b30f54d 100644
--- a/sysdeps/aarch64/fpu/log2f_sve.c
+++ b/sysdeps/aarch64/fpu/log2f_sve.c
@@ -49,7 +49,7 @@  static const struct data
 
 #define MantissaMask 0x007fffff
 
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
 v_log2f_inline (svuint32_t u_off, svbool_t pg, const struct data *d)
 {
   svuint32_t u = svand_x (pg, u_off, MantissaMask);
@@ -80,7 +80,7 @@  v_log2f_inline (svuint32_t u_off, svbool_t pg, const struct data *d)
    calculation of x * 2^23 (2^mantissa) to normalise the number at entry to
    the log function and then subtract log2(2) * 23 = 23 to re-subnormalise the
    output to the correct result.  */
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
 special_case (svfloat32_t x, svbool_t pg, svbool_t special,
 	      const struct data *d)
 {
@@ -115,7 +115,7 @@  special_case (svfloat32_t x, svbool_t pg, svbool_t special,
    Maximum error is 2.48 ULPs:
    SV_NAME_F1 (log2)(0x1.558174p+0) got 0x1.a9be84p-2
 				   want 0x1.a9be8p-2.  */
-svfloat32_t SV_NAME_F1 (log2) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (log2) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/log2p1_sve.c b/sysdeps/aarch64/fpu/log2p1_sve.c
index c68d9dde9e9..647ae2f531f 100644
--- a/sysdeps/aarch64/fpu/log2p1_sve.c
+++ b/sysdeps/aarch64/fpu/log2p1_sve.c
@@ -53,7 +53,7 @@  static const struct data
   .inv_ln2 = 0x1.71547652b82fep+0,
 };
 
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 special_case (svfloat64_t x, svfloat64_t y, svbool_t special, svbool_t pg)
 {
 
@@ -68,7 +68,7 @@  special_case (svfloat64_t x, svfloat64_t y, svbool_t special, svbool_t pg)
    Worst-case error is 3.0 ULP:
    _ZGVsMxv_log2p1(0x1.62e029c6f784fp-18) got 0x1.fff9d9148a06fp-18
 					 want 0x1.fff9d9148a072p-18 .  */
-svfloat64_t SV_NAME_D1 (log2p1) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (log2p1) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/log2p1f_sve.c b/sysdeps/aarch64/fpu/log2p1f_sve.c
index f1048b07568..e9adeb09034 100644
--- a/sysdeps/aarch64/fpu/log2p1f_sve.c
+++ b/sysdeps/aarch64/fpu/log2p1f_sve.c
@@ -34,7 +34,7 @@  static const struct data
   .four = 0x40800000,	 .three_quarters = 0x3f400000
 };
 
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
 special_case (svfloat32_t x, svfloat32_t y, const svbool_t pg,
 	      svbool_t special)
 {
@@ -49,7 +49,7 @@  special_case (svfloat32_t x, svfloat32_t y, const svbool_t pg,
    Worst-case error is 1.90 ULP:
    _ZGVsMxv_log2p1f(0x1.8789fcp-2) got 0x1.de58d4p-2
 				  want 0x1.de58d8p-2.  */
-svfloat32_t SV_NAME_F1 (log2p1) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (log2p1) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/log_sve.c b/sysdeps/aarch64/fpu/log_sve.c
index 5eaae0ae568..6014769d4e1 100644
--- a/sysdeps/aarch64/fpu/log_sve.c
+++ b/sysdeps/aarch64/fpu/log_sve.c
@@ -44,7 +44,7 @@  static const struct data
   .thresh = (0x7fe0000000000000), /* infinity - min.  */
 };
 
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
 v_log_inline (svuint64_t ix, const svbool_t pg, const struct data *d)
 {
   /* x = 2^k z; where z is in range [Off,2*Off) and exact.
@@ -85,7 +85,7 @@  v_log_inline (svuint64_t ix, const svbool_t pg, const struct data *d)
    calculation of x * 2^52 (2^mantissa) to normalise the number at entry to
    the log function and then subtract ln(2) * 52 to re-subnormalise the
    output to the correct result.  */
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 special_case (svfloat64_t x, svbool_t pg, svbool_t special)
 {
   const struct data *d = ptr_barrier (&data);
@@ -118,7 +118,7 @@  special_case (svfloat64_t x, svbool_t pg, svbool_t special)
    Maximum measured error is 2.64 ulp:
    SV_NAME_D1 (log)(0x1.95e54bc91a5e2p+184) got 0x1.fffffffe88cacp+6
 					   want 0x1.fffffffe88cafp+6.  */
-svfloat64_t SV_NAME_D1 (log) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (log) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/logf_sve.c b/sysdeps/aarch64/fpu/logf_sve.c
index 95af72519fd..acaefebc851 100644
--- a/sysdeps/aarch64/fpu/logf_sve.c
+++ b/sysdeps/aarch64/fpu/logf_sve.c
@@ -49,7 +49,7 @@  static const struct data
 
 #define MantissaMask 0x007fffff
 
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
 v_logf_inline (svuint32_t u_off, const svbool_t pg, const struct data *d)
 {
   svuint32_t u = svand_x (pg, u_off, MantissaMask);
@@ -81,7 +81,7 @@  v_logf_inline (svuint32_t u_off, const svbool_t pg, const struct data *d)
    calculation of x * 2^23 (2^mantissa) to normalise the number at entry to
    the log function and then subtract ln(2) * 23 to re-subnormalise the result
    output to the correct result.  */
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
 special_case (svfloat32_t x, svbool_t pg, svbool_t special,
 	      const struct data *d)
 {
@@ -115,7 +115,7 @@  special_case (svfloat32_t x, svbool_t pg, svbool_t special,
    polynomial as the AdvSIMD routine. Maximum error is 3.34 ULPs:
    SV_NAME_F1 (log)(0x1.557298p+0) got 0x1.26edecp-2
 				  want 0x1.26ede6p-2.  */
-svfloat32_t SV_NAME_F1 (log) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (log) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/poly_sve_generic.h b/sysdeps/aarch64/fpu/poly_sve_generic.h
index 2c1fb996445..54600f5f4bf 100644
--- a/sysdeps/aarch64/fpu/poly_sve_generic.h
+++ b/sysdeps/aarch64/fpu/poly_sve_generic.h
@@ -31,6 +31,8 @@ 
 # error Cannot use poly_generic without defining DUP
 #endif
 
+OPTIONS_PUSH_SVE
+
 static inline VTYPE VWRAP (pairwise_poly_3) (svbool_t pg, VTYPE x, VTYPE x2,
 					     const STYPE *poly)
 {
@@ -311,3 +313,5 @@  static inline VTYPE VWRAP (pw_horner_18) (svbool_t pg, VTYPE x, VTYPE x2,
   VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
   return svmla_x (pg, p01, x2, p2_18);
 }
+
+OPTIONS_POP
diff --git a/sysdeps/aarch64/fpu/pow_sve.c b/sysdeps/aarch64/fpu/pow_sve.c
index 19062b5375d..0e0e978a44d 100644
--- a/sysdeps/aarch64/fpu/pow_sve.c
+++ b/sysdeps/aarch64/fpu/pow_sve.c
@@ -89,21 +89,21 @@  static const struct data
 };
 
 /* Check if x is an integer.  */
-static inline svbool_t
+SVE_FUNCTION static inline svbool_t
 sv_isint (svbool_t pg, svfloat64_t x)
 {
   return svcmpeq (pg, svrintz_z (pg, x), x);
 }
 
 /* Check if x is real not integer valued.  */
-static inline svbool_t
+SVE_FUNCTION static inline svbool_t
 sv_isnotint (svbool_t pg, svfloat64_t x)
 {
   return svcmpne (pg, svrintz_z (pg, x), x);
 }
 
 /* Check if x is an odd integer.  */
-static inline svbool_t
+SVE_FUNCTION static inline svbool_t
 sv_isodd (svbool_t pg, svfloat64_t x)
 {
   svfloat64_t y = svmul_x (svptrue_b64 (), x, 0.5);
@@ -128,7 +128,7 @@  checkint (uint64_t iy)
 }
 
 /* Top 12 bits (sign and exponent of each double float lane).  */
-static inline svuint64_t
+SVE_FUNCTION static inline svuint64_t
 sv_top12 (svfloat64_t x)
 {
   return svlsr_x (svptrue_b64 (), svreinterpret_u64 (x), 52);
@@ -142,7 +142,7 @@  zeroinfnan (uint64_t i)
 }
 
 /* Returns 1 if input is the bit representation of 0, infinity or nan.  */
-static inline svbool_t
+SVE_FUNCTION static inline svbool_t
 sv_zeroinfnan (svbool_t pg, svuint64_t i)
 {
   return svcmpge (pg, svsub_x (pg, svadd_x (pg, i, i), 1),
@@ -156,7 +156,7 @@  sv_zeroinfnan (svbool_t pg, svuint64_t i)
    a double.  (int32_t)KI is the k used in the argument reduction and exponent
    adjustment of scale, positive k here means the result may overflow and
    negative k means the result may underflow.  */
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
 specialcase (svfloat64_t tmp, svuint64_t sbits, svuint64_t ki, svbool_t cmp)
 {
   svbool_t p_pos = svcmpge_n_f64 (cmp, svreinterpret_f64_u64 (ki), 0.0);
@@ -177,7 +177,7 @@  specialcase (svfloat64_t tmp, svuint64_t sbits, svuint64_t ki, svbool_t cmp)
 /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
    additional 15 bits precision.  IX is the bit representation of x, but
    normalized in the subnormal range using the sign bit for the exponent.  */
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
 sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail,
 	       const struct data *d)
 {
@@ -239,7 +239,7 @@  sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail,
   return y;
 }
 
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
 sv_exp_core (svbool_t pg, svfloat64_t x, svfloat64_t xtail,
 	     svuint64_t sign_bias, svfloat64_t *tmp, svuint64_t *sbits,
 	     svuint64_t *ki, const struct data *d)
@@ -280,7 +280,7 @@  sv_exp_core (svbool_t pg, svfloat64_t x, svfloat64_t xtail,
 
 /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
    The sign_bias argument is SignBias or 0 and sets the sign to -1 or 1.  */
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
 sv_exp_inline (svbool_t pg, svfloat64_t x, svfloat64_t xtail,
 	       svuint64_t sign_bias, const struct data *d)
 {
@@ -363,14 +363,15 @@  pow_specialcase (double x, double y)
 }
 
 /* Scalar fallback for special case routines with custom signature.  */
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 sv_pow_specialcase (svfloat64_t x1, svfloat64_t x2, svfloat64_t y,
 		    svbool_t cmp)
 {
   return sv_call2_f64 (pow_specialcase, x1, x2, y, cmp);
 }
 
-svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y,
+					   const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/powf_sve.c b/sysdeps/aarch64/fpu/powf_sve.c
index 46b006c845c..3e8b85c2008 100644
--- a/sysdeps/aarch64/fpu/powf_sve.c
+++ b/sysdeps/aarch64/fpu/powf_sve.c
@@ -59,21 +59,21 @@  static const struct data
 #define C(i) sv_f64 (d->exp_poly[i])
 
 /* Check if x is an integer.  */
-static inline svbool_t
+SVE_FUNCTION static inline svbool_t
 svisint (svbool_t pg, svfloat32_t x)
 {
   return svcmpeq (pg, svrintz_z (pg, x), x);
 }
 
 /* Check if x is real not integer valued.  */
-static inline svbool_t
+SVE_FUNCTION static inline svbool_t
 svisnotint (svbool_t pg, svfloat32_t x)
 {
   return svcmpne (pg, svrintz_z (pg, x), x);
 }
 
 /* Check if x is an odd integer.  */
-static inline svbool_t
+SVE_FUNCTION static inline svbool_t
 svisodd (svbool_t pg, svfloat32_t x)
 {
   svfloat32_t y = svmul_x (pg, x, 0.5f);
@@ -81,7 +81,7 @@  svisodd (svbool_t pg, svfloat32_t x)
 }
 
 /* Check if zero, inf or nan.  */
-static inline svbool_t
+SVE_FUNCTION static inline svbool_t
 sv_zeroinfnan (svbool_t pg, svuint32_t i)
 {
   return svcmpge (pg, svsub_x (pg, svadd_x (pg, i, i), 1),
@@ -146,14 +146,14 @@  powf_specialcase (float x, float y)
 }
 
 /* Scalar fallback for special case routines with custom signature.  */
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
 sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y, svbool_t cmp)
 {
   return sv_call2_f32 (powf_specialcase, x1, x2, y, cmp);
 }
 
 /* Compute core for half of the lanes in double precision.  */
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
 sv_powf_core_ext (const svbool_t pg, svuint64_t i, svfloat64_t z, svint64_t k,
 		  svfloat64_t y, svuint64_t sign_bias, svfloat64_t *pylogx,
 		  const struct data *d)
@@ -197,7 +197,7 @@  sv_powf_core_ext (const svbool_t pg, svuint64_t i, svfloat64_t z, svint64_t k,
 
 /* Widen vector to double precision and compute core on both halves of the
    vector. Lower cost of promotion by considering all lanes active.  */
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
 sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k,
 	      svfloat32_t y, svuint32_t sign_bias, svfloat32_t *pylogx,
 	      const struct data *d)
@@ -243,7 +243,8 @@  sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k,
    Maximum measured error is 2.57 ULPs:
    SV_NAME_F2 (pow) (0x1.031706p+0, 0x1.ce2ec2p+12) got 0x1.fff868p+127
 						   want 0x1.fff862p+127.  */
-svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y,
+					   const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/rsqrt_sve.c b/sysdeps/aarch64/fpu/rsqrt_sve.c
index 789c8060d6b..44fbda62f64 100644
--- a/sysdeps/aarch64/fpu/rsqrt_sve.c
+++ b/sysdeps/aarch64/fpu/rsqrt_sve.c
@@ -34,7 +34,7 @@  static const struct data
   .scale_down = 27,
 };
 
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
 inline_rsqrt (svfloat64_t x)
 {
   /* Do estimate instruction.  */
@@ -58,7 +58,7 @@  inline_rsqrt (svfloat64_t x)
   return estimate;
 }
 
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 special_case (svfloat64_t x, svbool_t special, const struct data *d)
 {
   x = svscale_f64_m (special, x, sv_s64 (d->scale_up));
@@ -70,7 +70,7 @@  special_case (svfloat64_t x, svbool_t special, const struct data *d)
   Maximum observed error: 1.45 + 0.5
   _ZGVnN2v_rsqrt(0x1.d13fb41254643p+1023) got 0x1.0c8dee1b29dfap-512
 					 want 0x1.0c8dee1b29df8p-512.  */
-svfloat64_t SV_NAME_D1 (rsqrt) (svfloat64_t x, svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (rsqrt) (svfloat64_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/rsqrtf_sve.c b/sysdeps/aarch64/fpu/rsqrtf_sve.c
index 525f3f32371..27f3ef3b061 100644
--- a/sysdeps/aarch64/fpu/rsqrtf_sve.c
+++ b/sysdeps/aarch64/fpu/rsqrtf_sve.c
@@ -35,7 +35,7 @@  static const struct data
   .scale_down = 11,
 };
 
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
 inline_rsqrt (svfloat32_t x)
 {
   /* Do estimate instruction.  */
@@ -54,7 +54,7 @@  inline_rsqrt (svfloat32_t x)
   return estimate;
 }
 
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
 special_case (svfloat32_t x, svbool_t special, const struct data *d)
 {
   x = svscale_f32_m (special, x, sv_s32 (d->scale_up));
@@ -66,7 +66,7 @@  special_case (svfloat32_t x, svbool_t special, const struct data *d)
   Maximum observed error: 1.47 + 0.5
   _ZGVsMxv_rsqrtf (0x1.f610dep+127) got 0x1.02852cp-64
 				   want 0x1.02853p-64.  */
-svfloat32_t SV_NAME_F1 (rsqrt) (svfloat32_t x, svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (rsqrt) (svfloat32_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/sin_sve.c b/sysdeps/aarch64/fpu/sin_sve.c
index c28d00cd88a..65c9eadf9f2 100644
--- a/sysdeps/aarch64/fpu/sin_sve.c
+++ b/sysdeps/aarch64/fpu/sin_sve.c
@@ -38,7 +38,7 @@  static const struct data
 
 #define C(i) sv_f64 (d->poly[i])
 
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 special_case (svfloat64_t x, svfloat64_t y, svbool_t cmp)
 {
   return sv_call_f64 (sin, x, y, cmp);
@@ -53,7 +53,7 @@  special_case (svfloat64_t x, svfloat64_t y, svbool_t cmp)
    is 3.22 ULP:
    _ZGVsMxv_sin (0x1.5702447b6f17bp+22) got 0x1.ffdcd125c84fbp-3
 				       want 0x1.ffdcd125c84f8p-3.  */
-svfloat64_t SV_NAME_D1 (sin) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (sin) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/sinf_sve.c b/sysdeps/aarch64/fpu/sinf_sve.c
index 5f5aa609241..c81b1f62882 100644
--- a/sysdeps/aarch64/fpu/sinf_sve.c
+++ b/sysdeps/aarch64/fpu/sinf_sve.c
@@ -42,7 +42,7 @@  static const struct data
 #define RangeVal 0x49800000 /* asuint32 (0x1p20f).  */
 #define C(i) sv_f32 (d->poly[i])
 
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
 special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
 {
   return sv_call_f32 (sinf, x, y, cmp);
@@ -53,7 +53,7 @@  special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
    This maximum error is achieved at multiple values in [-2^18, 2^18]
    but one example is:
    SV_NAME_F1 (sin)(0x1.9247a4p+0) got 0x1.fffff6p-1 want 0x1.fffffap-1.  */
-svfloat32_t SV_NAME_F1 (sin) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (sin) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/sinh_sve.c b/sysdeps/aarch64/fpu/sinh_sve.c
index 2b8d04833c2..7ed7e855180 100644
--- a/sysdeps/aarch64/fpu/sinh_sve.c
+++ b/sysdeps/aarch64/fpu/sinh_sve.c
@@ -59,7 +59,7 @@  static const struct data
 /* A specialised FEXPA expm1 that is only valid for positive inputs and
    has no special cases. Based off the full FEXPA expm1 implementated for
    _ZGVsMxv_expm1, with a slightly modified file to keep sinh under 3.5ULP.  */
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
 expm1_inline (svbool_t pg, svfloat64_t x)
 {
   const struct data *d = ptr_barrier (&data);
@@ -114,7 +114,7 @@  expm1_inline (svbool_t pg, svfloat64_t x)
    sinh (A + B) = cosh(A)cosh(B) + sinh(A)sinh(B)
    By choosing sufficiently large values whereby after rounding sinh == cosh,
    this can be simplified into: sinh (A + B) = sinh(A) * e^B.  */
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 special_case (svuint64_t sign, svbool_t pg, svbool_t special, svfloat64_t ax,
 	      svfloat64_t halfsign)
 {
@@ -151,7 +151,7 @@  special_case (svuint64_t sign, svbool_t pg, svbool_t special, svfloat64_t ax,
    The greatest observed error is 2.62 + 0.5 ULP:
    _ZGVsMxv_sinh (0x1.b5e0e13ba88aep-2) got 0x1.c3587faf97b0cp-2
 				       want 0x1.c3587faf97b09p-2.  */
-svfloat64_t SV_NAME_D1 (sinh) (svfloat64_t x, svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (sinh) (svfloat64_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/sinhf_sve.c b/sysdeps/aarch64/fpu/sinhf_sve.c
index e52712438ae..d007a654d69 100644
--- a/sysdeps/aarch64/fpu/sinhf_sve.c
+++ b/sysdeps/aarch64/fpu/sinhf_sve.c
@@ -40,7 +40,7 @@  static const struct data
    sinh (A + B) = cosh(A)cosh(B) + sinh(A)sinh(B)
    By choosing sufficiently large values whereby after rounding sinh == cosh,
    this can be simplified into: sinh (A + B) = sinh(A) * e^B.  */
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
 special_case (const svbool_t pg, svbool_t special, svfloat32_t ax,
 	      svfloat32_t x, svfloat32_t t, const struct data *d)
 {
@@ -80,7 +80,7 @@  special_case (const svbool_t pg, svbool_t special, svfloat32_t ax,
    Maximum error is 2.76 +0.5 ULP:
    _ZGVsMxv_sinhf (0x1.6587e8p+6) got 0x1.ef3f98p+127
 				 want 0x1.ef3f92p+127.  */
-svfloat32_t SV_NAME_F1 (sinh) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (sinh) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/sinpi_sve.c b/sysdeps/aarch64/fpu/sinpi_sve.c
index af2ba77f98e..75cc7bdcf82 100644
--- a/sysdeps/aarch64/fpu/sinpi_sve.c
+++ b/sysdeps/aarch64/fpu/sinpi_sve.c
@@ -37,7 +37,7 @@  static const struct data
    Maximum error 3.10 ULP:
    _ZGVsMxv_sinpi(0x1.df1a14f1b235p-2) got 0x1.fd64f541606cp-1
 				      want 0x1.fd64f541606c3p-1.  */
-svfloat64_t SV_NAME_D1 (sinpi) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (sinpi) (svfloat64_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/sinpif_sve.c b/sysdeps/aarch64/fpu/sinpif_sve.c
index 026423b0627..0979c93242a 100644
--- a/sysdeps/aarch64/fpu/sinpif_sve.c
+++ b/sysdeps/aarch64/fpu/sinpif_sve.c
@@ -34,7 +34,7 @@  static const struct data
    Maximum error 2.48 ULP:
    _ZGVsMxv_sinpif(0x1.d062b6p-2) got 0x1.fa8c06p-1
 				 want 0x1.fa8c02p-1.  */
-svfloat32_t SV_NAME_F1 (sinpi) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (sinpi) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/sv_exp_special_inline.h b/sysdeps/aarch64/fpu/sv_exp_special_inline.h
index aea7c174373..955f511760d 100644
--- a/sysdeps/aarch64/fpu/sv_exp_special_inline.h
+++ b/sysdeps/aarch64/fpu/sv_exp_special_inline.h
@@ -31,7 +31,7 @@  static const struct sv_exp_special_data
   .special_bias2 = 0x3010000000000000, /* 0x1p-254.  */
 };
 
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
 special_case (svfloat64_t scale, svfloat64_t poly, svfloat64_t n,
 	      const struct sv_exp_special_data *ds)
 {
diff --git a/sysdeps/aarch64/fpu/sv_expf_inline.h b/sysdeps/aarch64/fpu/sv_expf_inline.h
index 0ba5892f4d1..d3e74564fc2 100644
--- a/sysdeps/aarch64/fpu/sv_expf_inline.h
+++ b/sysdeps/aarch64/fpu/sv_expf_inline.h
@@ -35,7 +35,7 @@  struct sv_expf_data
     .ln2_lo = 0x1.7f7d1cp-20f, .shift = 0x1.803f8p17f,                        \
   }
 
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
 expf_inline (svfloat32_t x, const svbool_t pg, const struct sv_expf_data *d)
 {
   /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
diff --git a/sysdeps/aarch64/fpu/sv_expf_special_inline.h b/sysdeps/aarch64/fpu/sv_expf_special_inline.h
index 83b36e3a249..9f1ddf170a4 100644
--- a/sysdeps/aarch64/fpu/sv_expf_special_inline.h
+++ b/sysdeps/aarch64/fpu/sv_expf_special_inline.h
@@ -32,7 +32,7 @@  static const struct sv_expf_special_data
 };
 
 /* Special case routine shared with other expBm1 routines.  */
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
 special_exp (svfloat32_t poly, svfloat32_t n, svuint32_t e, svbool_t cmp1,
 	     svfloat32_t scale, const struct sv_expf_special_data *ds)
 {
@@ -53,7 +53,7 @@  special_exp (svfloat32_t poly, svfloat32_t n, svuint32_t e, svbool_t cmp1,
 }
 
 /* Special case routine for expBm1.  */
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
 special_case (svfloat32_t poly, svfloat32_t n, svfloat32_t scale,
 	      svbool_t cmp1, const struct sv_expf_special_data *ds)
 {
diff --git a/sysdeps/aarch64/fpu/sv_expm1f_inline.h b/sysdeps/aarch64/fpu/sv_expm1f_inline.h
index 8dcda485fe2..3eea00da56f 100644
--- a/sysdeps/aarch64/fpu/sv_expm1f_inline.h
+++ b/sysdeps/aarch64/fpu/sv_expm1f_inline.h
@@ -39,7 +39,7 @@  struct sv_expm1f_data
     .c4 = 0x1.6b55a2p-10, .ln2_lo = 0x1.7f7d1cp-20f, .ln2_hi = 0x1.62e4p-1f,  \
   }
 
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
 expm1f_inline (svfloat32_t x, svbool_t pg, const struct sv_expm1f_data *d)
 {
   /* This vector is reliant on layout of data - it contains constants
diff --git a/sysdeps/aarch64/fpu/sv_log1p_inline.h b/sysdeps/aarch64/fpu/sv_log1p_inline.h
index 1b2c542851c..16f3ca16c17 100644
--- a/sysdeps/aarch64/fpu/sv_log1p_inline.h
+++ b/sysdeps/aarch64/fpu/sv_log1p_inline.h
@@ -63,7 +63,7 @@  static const struct sv_log1p_data
   .one_top = 0x3ff
 };
 
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
 sv_log1p_inline (svfloat64_t x, const svbool_t pg)
 {
   /* Helper for calculating log(x + 1). Adapted from v_log1p_inline.h, which
diff --git a/sysdeps/aarch64/fpu/sv_log1pf_inline.h b/sysdeps/aarch64/fpu/sv_log1pf_inline.h
index 494fa279f89..444879d2db3 100644
--- a/sysdeps/aarch64/fpu/sv_log1pf_inline.h
+++ b/sysdeps/aarch64/fpu/sv_log1pf_inline.h
@@ -42,7 +42,7 @@  static const struct sv_log1pf_data
   .three_quarters = 0x3f400000,
 };
 
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
 sv_log1pf_inline (svfloat32_t x, svbool_t pg)
 {
   const struct sv_log1pf_data *d = ptr_barrier (&sv_log1pf_data);
diff --git a/sysdeps/aarch64/fpu/sv_math.h b/sysdeps/aarch64/fpu/sv_math.h
index f9199d04e0a..5269b523318 100644
--- a/sysdeps/aarch64/fpu/sv_math.h
+++ b/sysdeps/aarch64/fpu/sv_math.h
@@ -35,11 +35,25 @@ 
 /* Predicate is stored as one bit per byte of VL so requires VL / 64 bytes.  */
 #define SVE_NUM_PG_BYTES (SVE_VECTOR_BYTES / sizeof (uint64_t))
 
+#define SVE_FUNCTION __attribute__((target("+sve")))
+#ifdef __clang__
+# define OPTIONS_PUSH_SVE \
+  _Pragma("clang attribute push (__attribute__((target(\"+sve\"))), apply_to=function)")
+# define OPTIONS_POP         _Pragma("clang attribute pop")
+#else
+# define OPTIONS_PUSH_SVE    _Pragma ("GCC push_options"); \
+  			     _Pragma ("GCC target(\"+sve\")")
+# define OPTIONS_POP         _Pragma ("GCC pop_options")
+#endif
+
+
 #define SV_NAME_F1(fun) _ZGVsMxv_##fun##f
 #define SV_NAME_D1(fun) _ZGVsMxv_##fun
 #define SV_NAME_F2(fun) _ZGVsMxvv_##fun##f
 #define SV_NAME_D2(fun) _ZGVsMxvv_##fun
 
+OPTIONS_PUSH_SVE
+
 static inline void
 svstr_p (uint8_t *dst, svbool_t p)
 {
@@ -165,4 +179,7 @@  sv_call2_f32 (float (*f) (float, float), svfloat32_t x1, svfloat32_t x2,
     }
   return svld1 (svptrue_b32 (), tmp1);
 }
+
+OPTIONS_POP
+
 #endif
diff --git a/sysdeps/aarch64/fpu/tan_sve.c b/sysdeps/aarch64/fpu/tan_sve.c
index 1231541531e..ae7a38aff04 100644
--- a/sysdeps/aarch64/fpu/tan_sve.c
+++ b/sysdeps/aarch64/fpu/tan_sve.c
@@ -41,7 +41,7 @@  static const struct data
   .range_val = 0x1p23,
 };
 
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
 special_case (svfloat64_t x, svfloat64_t p, svfloat64_t q, svbool_t pg,
 	      svbool_t special)
 {
@@ -61,7 +61,7 @@  special_case (svfloat64_t x, svfloat64_t p, svfloat64_t q, svbool_t pg,
    Maximum measured error is 3.48 ULP:
    _ZGVsMxv_tan(0x1.4457047ef78d8p+20) got -0x1.f6ccd8ecf7dedp+37
 				      want -0x1.f6ccd8ecf7deap+37.  */
-svfloat64_t SV_NAME_D1 (tan) (svfloat64_t x, svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (tan) (svfloat64_t x, svbool_t pg)
 {
   const struct data *dat = ptr_barrier (&data);
   svfloat64_t half_pi_c0 = svld1rq (svptrue_b64 (), &dat->c0);
diff --git a/sysdeps/aarch64/fpu/tanf_sve.c b/sysdeps/aarch64/fpu/tanf_sve.c
index 60d93fd7e25..3c077e2cb6c 100644
--- a/sysdeps/aarch64/fpu/tanf_sve.c
+++ b/sysdeps/aarch64/fpu/tanf_sve.c
@@ -46,7 +46,7 @@  static const struct data
   .range_val = 0x1p15f,	      .shift = 0x1.8p+23f
 };
 
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
 special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
 {
   return sv_call_f32 (tanf, x, y, cmp);
@@ -56,7 +56,7 @@  special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
    Maximum error is 3.45 ULP:
    SV_NAME_F1 (tan)(-0x1.e5f0cap+13) got 0x1.ff9856p-1
 				    want 0x1.ff9850p-1.  */
-svfloat32_t SV_NAME_F1 (tan) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (tan) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/tanh_sve.c b/sysdeps/aarch64/fpu/tanh_sve.c
index 65712c6d49d..5a935569e76 100644
--- a/sysdeps/aarch64/fpu/tanh_sve.c
+++ b/sysdeps/aarch64/fpu/tanh_sve.c
@@ -55,7 +55,7 @@  static const struct data
 /* An expm1 inspired, FEXPA based helper function that returns an
    accurate estimate for e^2x - 1. With no special case or support for
    negative inputs of x.  */
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
 e2xm1_inline (const svbool_t pg, svfloat64_t x, const struct data *d)
 {
   svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->two_over_ln2);
@@ -108,7 +108,7 @@  e2xm1_inline (const svbool_t pg, svfloat64_t x, const struct data *d)
    The greatest observed error is 2.79 + 0.5 ULP:
    _ZGVsMxv_tanh (0x1.fff868eb3c223p-9) got 0x1.fff7be486cae6p-9
 				       want 0x1.fff7be486cae9p-9.  */
-svfloat64_t SV_NAME_D1 (tanh) (svfloat64_t x, svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (tanh) (svfloat64_t x, svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/tanhf_sve.c b/sysdeps/aarch64/fpu/tanhf_sve.c
index 6fd2879c277..dc1007fb4bf 100644
--- a/sysdeps/aarch64/fpu/tanhf_sve.c
+++ b/sysdeps/aarch64/fpu/tanhf_sve.c
@@ -44,7 +44,7 @@  static const struct data
 
 /* An expm1 inspired helper function that returns an accurate
    estimate for e^2x - 1.  */
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
 e2xm1f_inline (svfloat32_t x, svbool_t pg, const struct data *d)
 {
   /* This vector is reliant on layout of data - it contains constants
@@ -82,7 +82,7 @@  e2xm1f_inline (svfloat32_t x, svbool_t pg, const struct data *d)
   return svmla_x (pg, svsub_x (pg, t, 1.0f), p, t);
 }
 
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
 special_case (svfloat32_t x, svbool_t pg, svbool_t special, svfloat32_t q)
 {
   /* Finish fastpass to compute values for non-special cases.  */
@@ -108,7 +108,7 @@  special_case (svfloat32_t x, svbool_t pg, svbool_t special, svfloat32_t q)
    Maximum error is 2.06 +0.5 ULP:
    _ZGVsMxv_tanhf (0x1.fc1832p-5) got 0x1.fb71a4p-5
 				 want 0x1.fb71aap-5.  */
-svfloat32_t SV_NAME_F1 (tanh) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (tanh) (svfloat32_t x, const svbool_t pg)
 {
   const struct data *d = ptr_barrier (&data);
 
diff --git a/sysdeps/aarch64/fpu/tanpi_sve.c b/sysdeps/aarch64/fpu/tanpi_sve.c
index f6b402ff221..7a208a3b985 100644
--- a/sysdeps/aarch64/fpu/tanpi_sve.c
+++ b/sysdeps/aarch64/fpu/tanpi_sve.c
@@ -42,7 +42,7 @@  const static struct v_tanpi_data
    The maximum error is 3.06 ULP:
    _ZGVsMxv_tanpi(0x1.0a4a07dfcca3ep-1) got -0x1.fa30112702c98p+3
 				       want -0x1.fa30112702c95p+3.  */
-svfloat64_t SV_NAME_D1 (tanpi) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (tanpi) (svfloat64_t x, const svbool_t pg)
 {
   const struct v_tanpi_data *d = ptr_barrier (&tanpi_data);
 
diff --git a/sysdeps/aarch64/fpu/tanpif_sve.c b/sysdeps/aarch64/fpu/tanpif_sve.c
index c652f370622..7e3d635ce84 100644
--- a/sysdeps/aarch64/fpu/tanpif_sve.c
+++ b/sysdeps/aarch64/fpu/tanpif_sve.c
@@ -34,7 +34,7 @@  const static struct v_tanpif_data
    The maximum error is 3.34 ULP:
    _ZGVsMxv_tanpif(0x1.d6c09ap-2) got 0x1.f70aacp+2
 				 want 0x1.f70aa6p+2.  */
-svfloat32_t SV_NAME_F1 (tanpi) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (tanpi) (svfloat32_t x, const svbool_t pg)
 {
   const struct v_tanpif_data *d = ptr_barrier (&tanpif_data);
   svfloat32_t odd_coeffs = svld1rq (svptrue_b32 (), &d->c1);
diff --git a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
index 86e73756a23..24245c8ac6f 100644
--- a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
@@ -20,12 +20,14 @@ 
 #include <arm_sve.h>
 
 #include "test-double-sve.h"
+#define attribute_hidden
+#include "sv_math.h"
 
 /* Wrapper from scalar to SVE function. Cannot just use VECTOR_WRAPPER due to
    predication.  */
 #define SVE_VECTOR_WRAPPER(scalar_func, vector_func)                          \
   extern VEC_TYPE vector_func (VEC_TYPE, svbool_t);                           \
-  FLOAT scalar_func (FLOAT x)                                                 \
+  SVE_FUNCTION FLOAT scalar_func (FLOAT x)                                    \
   {                                                                           \
     VEC_TYPE mx = svdup_n_f64 (x);                                            \
     VEC_TYPE mr = vector_func (mx, svptrue_b64 ());                           \
@@ -34,7 +36,7 @@ 
 
 #define SVE_VECTOR_WRAPPER_ff(scalar_func, vector_func)                       \
   extern VEC_TYPE vector_func (VEC_TYPE, VEC_TYPE, svbool_t);                 \
-  FLOAT scalar_func (FLOAT x, FLOAT y)                                        \
+  SVE_FUNCTION FLOAT scalar_func (FLOAT x, FLOAT y)                           \
   {                                                                           \
     VEC_TYPE mx = svdup_n_f64 (x);                                            \
     VEC_TYPE my = svdup_n_f64 (y);                                            \
diff --git a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
index 0d9a7e5b93b..0469eb16458 100644
--- a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
@@ -20,12 +20,14 @@ 
 #include <arm_sve.h>
 
 #include "test-float-sve.h"
+#define attribute_hidden
+#include "sv_math.h"
 
 /* Wrapper from scalar to SVE function. Cannot just use VECTOR_WRAPPER due to
    predication.  */
 #define SVE_VECTOR_WRAPPER(scalar_func, vector_func)                          \
   extern VEC_TYPE vector_func (VEC_TYPE, svbool_t);                           \
-  FLOAT scalar_func (FLOAT x)                                                 \
+  SVE_FUNCTION FLOAT scalar_func (FLOAT x)                                    \
   {                                                                           \
     VEC_TYPE mx = svdup_n_f32 (x);                                            \
     VEC_TYPE mr = vector_func (mx, svptrue_b32 ());                           \
@@ -34,7 +36,7 @@ 
 
 #define SVE_VECTOR_WRAPPER_ff(scalar_func, vector_func)                       \
   extern VEC_TYPE vector_func (VEC_TYPE, VEC_TYPE, svbool_t);                 \
-  FLOAT scalar_func (FLOAT x, FLOAT y)                                        \
+  SVE_FUNCTION FLOAT scalar_func (FLOAT x, FLOAT y)                           \
   {                                                                           \
     VEC_TYPE mx = svdup_n_f32 (x);                                            \
     VEC_TYPE my = svdup_n_f32 (y);                                            \