@@ -65,7 +65,7 @@ endif
# add a generic -mcpu and -march with SVE enabled. Also use a tune for a modern
# SVE core.
-sve-cflags = -mcpu=generic+sve -march=armv8-a+sve -mtune=neoverse-v1
+sve-cflags = -mtune=neoverse-v1
ifeq ($(build-mathvec),yes)
bench-libmvec = $(addprefix float-advsimd-,$(float-advsimd-funcs)) \
@@ -54,7 +54,7 @@ static const struct data
The largest observed error in this region is 1.50 ulp:
_ZGVsMxv_acos (0x1.252a2cf3fb9acp-1) got 0x1.ec1a46aa82901p-1
want 0x1.ec1a46aa829p-1. */
-svfloat64_t SV_NAME_D1 (acos) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (acos) (svfloat64_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
svbool_t ptrue = svptrue_b64 ();
@@ -51,7 +51,7 @@ static const struct data
The largest observed error in this region is 1.32 ulps,
_ZGVsMxv_acosf (0x1.15ba56p-1) got 0x1.feb33p-1
want 0x1.feb32ep-1. */
-svfloat32_t SV_NAME_F1 (acos) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (acos) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -35,7 +35,7 @@ const static struct data
Right before returning we check if x is infinity or if x is lower than 1,
in which case we return infinity or NaN. */
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
special_case (svfloat64_t x, svfloat64_t xm1, svfloat64_t y, svbool_t special,
svbool_t pg, const struct data *d)
{
@@ -54,7 +54,7 @@ special_case (svfloat64_t x, svfloat64_t xm1, svfloat64_t y, svbool_t special,
argument to log1p falls in the k=0 interval, i.e. x close to 1:
SV_NAME_D1 (acosh)(0x1.1e80ed12f0ad1p+0) got 0x1.ef0cee7c33ce1p-2
want 0x1.ef0cee7c33ce4p-2. */
-svfloat64_t SV_NAME_D1 (acosh) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (acosh) (svfloat64_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -23,7 +23,7 @@
#define One 0x3f800000
#define Thres 0x20000000 /* asuint(0x1p64) - One. */
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
special_case (svfloat32_t xm1, svfloat32_t tmp, svbool_t special)
{
svfloat32_t x = svadd_x (svptrue_b32 (), xm1, 1.0f);
@@ -37,7 +37,7 @@ special_case (svfloat32_t xm1, svfloat32_t tmp, svbool_t special)
Maximum error is 2.47 ULPs:
SV_NAME_F1 (acosh) (0x1.01ca76p+0) got 0x1.e435a6p-4
want 0x1.e435a2p-4. */
-svfloat32_t SV_NAME_F1 (acosh) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (acosh) (svfloat32_t x, const svbool_t pg)
{
svuint32_t ix = svreinterpret_u32 (x);
svbool_t special = svcmpge (pg, svsub_x (pg, ix, One), Thres);
@@ -56,7 +56,7 @@ static const struct data
The largest observed error in this region is 2.55 ulp:
_ZGVsMxv_acospi(0x1.d90d50357410cp-1) got 0x1.ffd43d5dd3a9ep-4
want 0x1.ffd43d5dd3a9bp-4. */
-svfloat64_t SV_NAME_D1 (acospi) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (acospi) (svfloat64_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
svbool_t ptrue = svptrue_b64 ();
@@ -49,7 +49,7 @@ static const struct data
The largest observed error in this region is 2.61 ulps,
_ZGVsMxv_acospif (0x1.6b232ep-1) got 0x1.fe04bap-3
want 0x1.fe04cp-3. */
-svfloat32_t SV_NAME_F1 (acospi) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (acospi) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -51,7 +51,7 @@ static const struct data
The largest observed error in this region is 2.66 ulp:
_ZGVsMxv_asin (0x1.04024f6e2a2fbp-1) got 0x1.10b9586f087a8p-1
want 0x1.10b9586f087abp-1. */
-svfloat64_t SV_NAME_D1 (asin) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (asin) (svfloat64_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
svbool_t ptrue = svptrue_b64 ();
@@ -48,7 +48,7 @@ static const struct data
The largest observed error in this region is 2.41 ulps,
_ZGVsMxv_asinf (-0x1.00203ep-1) got -0x1.0c3a64p-1
want -0x1.0c3a6p-1. */
-svfloat32_t SV_NAME_F1 (asin) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (asin) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -64,7 +64,7 @@ static const struct data
.inf = INFINITY
};
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
__sv_log_inline (svfloat64_t x, const struct data *d, const svbool_t pg)
{
/* Double-precision SVE log, copied from SVE log implementation with some
@@ -99,7 +99,7 @@ __sv_log_inline (svfloat64_t x, const struct data *d, const svbool_t pg)
return y;
}
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
special_case (svfloat64_t ax, svfloat64_t y, svuint64_t sign, svbool_t special,
svbool_t pg, const struct data *d)
{
@@ -136,7 +136,7 @@ special_case (svfloat64_t ax, svfloat64_t y, svuint64_t sign, svbool_t special,
|x| >= 1:
_ZGVsMxv_asinh(0x1.170469d024505p+0) got 0x1.e3181c43b0f36p-1
want 0x1.e3181c43b0f39p-1. */
-svfloat64_t SV_NAME_D1 (asinh) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (asinh) (svfloat64_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -22,7 +22,7 @@
#define BigBound 0x5f800000 /* asuint(0x1p64). */
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
special_case (svuint32_t iax, svuint32_t sign, svfloat32_t y, svbool_t special)
{
svfloat32_t x = svreinterpret_f32 (sveor_x (svptrue_b32 (), iax, sign));
@@ -37,7 +37,7 @@ special_case (svuint32_t iax, svuint32_t sign, svfloat32_t y, svbool_t special)
Maximum error is 1.92 ULPs:
SV_NAME_F1 (asinh) (-0x1.0922ecp-1) got -0x1.fd0bccp-2
want -0x1.fd0bc8p-2. */
-svfloat32_t SV_NAME_F1 (asinh) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (asinh) (svfloat32_t x, const svbool_t pg)
{
svfloat32_t ax = svabs_x (pg, x);
svuint32_t iax = svreinterpret_u32 (ax);
@@ -51,7 +51,7 @@ static const struct data
The largest observed error in this region is 3.48 ulp:
_ZGVsMxv_asinpi (0x1.03da0c2295424p-1) got 0x1.5b02b3dcafaefp-3
want 0x1.5b02b3dcafaf2p-3. */
-svfloat64_t SV_NAME_D1 (asinpi) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (asinpi) (svfloat64_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
svbool_t ptrue = svptrue_b64 ();
@@ -47,7 +47,7 @@ static const struct data
The largest observed error in this region is 3.46 ulps:
_ZGVsMxv_asinpif (0x1.0df892p-1) got 0x1.6a114cp-3
want 0x1.6a1146p-3. */
-svfloat32_t SV_NAME_F1 (asinpi) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (asinpi) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
svbool_t ptrue = svptrue_b32 ();
@@ -39,7 +39,7 @@ static const struct data
.c18 = -0x1.44a4b059b6f67p-13, .c19 = 0x1.c4a45029e5a91p-17,
};
/* Special cases i.e. 0, infinity, nan (fall back to scalar calls). */
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
special_case (svfloat64_t y, svfloat64_t x, svfloat64_t ret,
const svbool_t cmp)
{
@@ -48,7 +48,7 @@ special_case (svfloat64_t y, svfloat64_t x, svfloat64_t ret,
/* Returns a predicate indicating true if the input is the bit representation
of 0, infinity or nan. */
-static inline svbool_t
+SVE_FUNCTION static inline svbool_t
zeroinfnan (svuint64_t i, const svbool_t pg)
{
return svcmpge (pg, svsub_x (pg, svlsl_x (pg, i, 1), 1),
@@ -59,8 +59,8 @@ zeroinfnan (svuint64_t i, const svbool_t pg)
x are reasonably close together. The greatest observed error is 1.94 ULP:
_ZGVsMxvv_atan2 (0x1.8a4bf7167228ap+5, 0x1.84971226bb57bp+5)
got 0x1.95db19dfef9ccp-1 want 0x1.95db19dfef9cep-1. */
-svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x,
- const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x,
+ const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -33,7 +33,7 @@ static const struct data
};
/* Special cases i.e. 0, infinity, nan (fall back to scalar calls). */
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
special_case (svfloat32_t y, svfloat32_t x, svfloat32_t ret,
const svbool_t cmp)
{
@@ -42,7 +42,7 @@ special_case (svfloat32_t y, svfloat32_t x, svfloat32_t ret,
/* Returns a predicate indicating true if the input is the bit representation
of 0, infinity or nan. */
-static inline svbool_t
+SVE_FUNCTION static inline svbool_t
zeroinfnan (svuint32_t i, const svbool_t pg)
{
return svcmpge (pg, svsub_x (pg, svlsl_x (pg, i, 1), 1),
@@ -54,8 +54,8 @@ zeroinfnan (svuint32_t i, const svbool_t pg)
observed error is 2.21 ULP:
_ZGVnN4vv_atan2f (0x1.a04aa8p+6, 0x1.9a274p+6) got 0x1.95ed3ap-1
want 0x1.95ed36p-1. */
-svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x,
- const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x,
+ const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
svbool_t ptrue = svptrue_b32 ();
@@ -44,7 +44,7 @@ static const struct data
#define OneOverPi sv_f64 (0x1.45f306dc9c883p-2)
/* Special cases i.e. 0, infinity, nan (fall back to scalar calls). */
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
special_case (svfloat64_t y, svfloat64_t x, svfloat64_t ret,
const svbool_t cmp)
{
@@ -54,7 +54,7 @@ special_case (svfloat64_t y, svfloat64_t x, svfloat64_t ret,
/* Returns a predicate indicating true if the input is the bit representation
of 0, infinity or nan. */
-static inline svbool_t
+SVE_FUNCTION static inline svbool_t
zeroinfnan (svuint64_t i, const svbool_t pg)
{
return svcmpge (pg, svsub_x (pg, svlsl_x (pg, i, 1), 1),
@@ -65,8 +65,8 @@ zeroinfnan (svuint64_t i, const svbool_t pg)
Maximum observed error is 3.11 ulps:
_ZGVsMxvv_atan2pi (0x1.ef284a877f6b5p+6, 0x1.03fdde8242b17p+7)
got 0x1.f00f800163079p-3 want 0x1.f00f800163076p-3. */
-svfloat64_t SV_NAME_D2 (atan2pi) (svfloat64_t y, svfloat64_t x,
- const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D2 (atan2pi) (svfloat64_t y, svfloat64_t x,
+ const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
svbool_t ptrue = svptrue_b64 ();
@@ -44,7 +44,7 @@ static const struct data
#define OneOverPi sv_f32 (0x1.45f307p-2)
/* Special cases i.e. 0, infinity, nan (fall back to scalar calls). */
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
special_case (svfloat32_t y, svfloat32_t x, svfloat32_t ret,
const svbool_t cmp)
{
@@ -54,7 +54,7 @@ special_case (svfloat32_t y, svfloat32_t x, svfloat32_t ret,
/* Returns a predicate indicating true if the input is the bit representation
of 0, infinity or nan. */
-static inline svbool_t
+SVE_FUNCTION static inline svbool_t
zeroinfnan (svuint32_t i, const svbool_t pg, const struct data *d)
{
return svcmpge (pg, svsub_x (pg, svlsl_x (pg, i, 1), 1),
@@ -66,8 +66,8 @@ zeroinfnan (svuint32_t i, const svbool_t pg, const struct data *d)
observed error is 2.90 ULP:
_ZGVsMxvv_atan2pif (0x1.a28542p+5, 0x1.adb7c6p+5) got 0x1.f76524p-3
want 0x1.f7651ep-3. */
-svfloat32_t SV_NAME_F2 (atan2pi) (svfloat32_t y, svfloat32_t x,
- const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F2 (atan2pi) (svfloat32_t y, svfloat32_t x,
+ const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
svbool_t ptrue = svptrue_b32 ();
@@ -49,7 +49,7 @@ static const struct data
error is 2.08 ulps:
_ZGVsMxv_atan (0x1.000a7c56975e8p+0) got 0x1.922a3163e15c2p-1
want 0x1.922a3163e15c4p-1. */
-svfloat64_t SV_NAME_D1 (atan) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (atan) (svfloat64_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -48,7 +48,7 @@ static const struct data
Largest observed error is 2.12 ULP:
_ZGVsMxv_atanf (0x1.03d4cep+0) got 0x1.95ed3ap-1
want 0x1.95ed36p-1. */
-svfloat32_t SV_NAME_F1 (atan) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (atan) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
svbool_t ptrue = svptrue_b32 ();
@@ -27,7 +27,7 @@ static const struct data
double nan;
} data = { .half = 0x3fe0000000000000, .inf = INFINITY, .nan = NAN };
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
special_case (svfloat64_t ax, svfloat64_t y, svbool_t pg, svbool_t special,
svfloat64_t halfsign, const struct data *d)
{
@@ -40,7 +40,7 @@ special_case (svfloat64_t ax, svfloat64_t y, svbool_t pg, svbool_t special,
The greatest observed error is 3.3 ULP:
_ZGVsMxv_atanh(0x1.ffae6288b601p-6) got 0x1.ffd8ff31b5019p-6
want 0x1.ffd8ff31b501cp-6. */
-svfloat64_t SV_NAME_D1 (atanh) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (atanh) (svfloat64_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -23,7 +23,7 @@
#define One (0x3f800000)
#define Half (0x3f000000)
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
special_case (svuint32_t iax, svuint32_t sign, svfloat32_t halfsign,
svfloat32_t y, svbool_t special)
{
@@ -36,7 +36,7 @@ special_case (svuint32_t iax, svuint32_t sign, svfloat32_t halfsign,
The maximum error is 1.99 ULP:
_ZGVsMxv_atanhf(0x1.f1583p-5) got 0x1.f1f4fap-5
want 0x1.f1f4f6p-5. */
-svfloat32_t SV_NAME_F1 (atanh) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (atanh) (svfloat32_t x, const svbool_t pg)
{
svfloat32_t ax = svabs_x (pg, x);
svuint32_t iax = svreinterpret_u32 (ax);
@@ -58,7 +58,7 @@ static const struct data
error is 2.80 ulps:
_ZGVsMxv_atanpi(0x1.f19587d63c76fp-1) got 0x1.f6b1304817d02p-3
want 0x1.f6b1304817d05p-3. */
-svfloat64_t SV_NAME_D1 (atanpi) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (atanpi) (svfloat64_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -41,7 +41,7 @@ static const struct data
Largest observed error is 2.59 ULP, close to +/-1.0:
_ZGVsMxv_atanpif(0x1.f2a89cp-1) got 0x1.f76524p-3
want 0x1.f7651ep-3. */
-svfloat32_t SV_NAME_F1 (atanpi) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (atanpi) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
svbool_t ptrue = svptrue_b32 ();
@@ -45,13 +45,13 @@ const static struct data
#define MantissaMask 0x000fffffffffffff
#define HalfExp 0x3fe0000000000000
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
{
return sv_call_f64 (cbrt, x, y, special);
}
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
shifted_lookup (const svbool_t pg, const float64_t *table, svint64_t i)
{
return svld1_gather_index (pg, table, svadd_x (pg, i, 2));
@@ -64,7 +64,7 @@ shifted_lookup (const svbool_t pg, const float64_t *table, svint64_t i)
is an integer.
_ZGVsMxv_cbrt (0x0.3fffb8d4413f3p-1022) got 0x1.965f53b0e5d97p-342
want 0x1.965f53b0e5d95p-342. */
-svfloat64_t SV_NAME_D1 (cbrt) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (cbrt) (svfloat64_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -41,13 +41,13 @@ const static struct data
#define MantissaMask 0x007fffff
#define HalfExp 0x3f000000
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
{
return sv_call_f32 (cbrtf, x, y, special);
}
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
shifted_lookup (const svbool_t pg, const float32_t *table, svint32_t i)
{
return svld1_gather_index (pg, table, svadd_x (pg, i, 2));
@@ -59,7 +59,7 @@ shifted_lookup (const svbool_t pg, const float32_t *table, svint32_t i)
0x1.85a2aa and the exponent is a multiple of 3, for example:
_ZGVsMxv_cbrtf (0x1.85a2aap+3) got 0x1.267936p+1
want 0x1.267932p+1. */
-svfloat32_t SV_NAME_F1 (cbrt) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (cbrt) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -36,7 +36,7 @@ static const struct data
#define RangeVal 0x4160000000000000 /* asuint64 (0x1p23). */
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
special_case (svfloat64_t x, svfloat64_t y, svbool_t oob)
{
return sv_call_f64 (cos, x, y, oob);
@@ -47,7 +47,7 @@ special_case (svfloat64_t x, svfloat64_t y, svbool_t oob)
Maximum measured error: 2.108 ULPs.
SV_NAME_D1 (cos)(0x1.9b0ba158c98f3p+7) got -0x1.fddd4c65c7f07p-3
want -0x1.fddd4c65c7f05p-3. */
-svfloat64_t SV_NAME_D1 (cos) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (cos) (svfloat64_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -36,7 +36,7 @@ static const struct data
#define RangeVal 0x49800000 /* asuint32(0x1p20f). */
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
special_case (svfloat32_t x, svfloat32_t y, svbool_t oob)
{
return sv_call_f32 (cosf, x, y, oob);
@@ -47,7 +47,7 @@ special_case (svfloat32_t x, svfloat32_t y, svbool_t oob)
Maximum measured error: 2.06 ULPs.
SV_NAME_F1 (cos)(0x1.dea2f2p+19) got 0x1.fffe7ap-6
want 0x1.fffe76p-6. */
-svfloat32_t SV_NAME_F1 (cos) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (cos) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -46,7 +46,7 @@ static const struct data
Functionally identical to FEXPA exp(x), but an adjustment in
the shift value which leads to a reduction in the exponent of scale by 1,
thus halving the result at no cost. */
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
exp_over_two_inline (const svbool_t pg, svfloat64_t x, const struct data *d)
{
/* Calculate exp(x). */
@@ -76,7 +76,7 @@ exp_over_two_inline (const svbool_t pg, svfloat64_t x, const struct data *d)
cosh (A + B) = cosh(A)cosh(B) + sinh(A)sinh(B)
By choosing sufficiently large values whereby after rounding cosh == sinh,
this can be simplified into: cosh (A + B) = cosh(A) * e^B. */
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
special_case (svfloat64_t x, svbool_t pg, svbool_t special, svfloat64_t t,
const struct data *d)
{
@@ -108,7 +108,7 @@ special_case (svfloat64_t x, svbool_t pg, svbool_t special, svfloat64_t t,
The greatest observed error is 2.10 + 0.5 ULP:
_ZGVsMxv_cosh (-0x1.2acb2978bd15ep+4) got 0x1.ebbd8806ea342p+25
want 0x1.ebbd8806ea33fp+25. */
-svfloat64_t SV_NAME_D1 (cosh) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (cosh) (svfloat64_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -38,7 +38,7 @@ static const struct data
cosh (A + B) = cosh(A)cosh(B) + sinh(A)sinh(B)
By choosing sufficiently large values whereby after rounding cosh == sinh,
this can be simplified into: cosh (A + B) = cosh(A) * e^B. */
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
special_case (svfloat32_t x, svbool_t special, svfloat32_t half_e,
svfloat32_t half_over_e, const struct data *d)
{
@@ -70,7 +70,7 @@ special_case (svfloat32_t x, svbool_t special, svfloat32_t half_e,
Maximum error is 2.55 +0.5 ULP:
_ZGVsMxv_coshf(-0x1.5b40f4p+1) got 0x1.e47748p+2
want 0x1.e4774ep+2. */
-svfloat32_t SV_NAME_F1 (cosh) (svfloat32_t x, svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (cosh) (svfloat32_t x, svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -38,7 +38,7 @@ static const struct data
Maximum error 3.20 ULP:
_ZGVsMxv_cospi(0x1.f18ba32c63159p-6) got 0x1.fdabf595f9763p-1
want 0x1.fdabf595f9766p-1. */
-svfloat64_t SV_NAME_D1 (cospi) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (cospi) (svfloat64_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -35,7 +35,7 @@ static const struct data
Maximum error: 2.60 ULP:
_ZGVsMxv_cospif(+/-0x1.cae664p-4) got 0x1.e09c9ep-1
want 0x1.e09c98p-1. */
-svfloat32_t SV_NAME_F1 (cospi) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (cospi) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -54,7 +54,7 @@ static const struct data
Maximum measure error: 2.29 ULP
_ZGVsMxv_erf(-0x1.00003c924e5d1p-8) got -0x1.20dd59132ebadp-8
want -0x1.20dd59132ebafp-8. */
-svfloat64_t SV_NAME_D1 (erf) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (erf) (svfloat64_t x, const svbool_t pg)
{
const struct data *dat = ptr_barrier (&data);
@@ -86,7 +86,7 @@ static const struct data
Maximum measured error: 1.71 ULP
_ZGVsMxv_erfc(0x1.46cfe976733p+4) got 0x1.e15fcbea3e7afp-608
want 0x1.e15fcbea3e7adp-608. */
-svfloat64_t SV_NAME_D1 (erfc) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (erfc) (svfloat64_t x, const svbool_t pg)
{
const struct data *dat = ptr_barrier (&data);
@@ -59,7 +59,7 @@ static const struct data
Maximum error: 1.63 ULP (~1.0 ULP for x < 0.0).
_ZGVsMxv_erfcf(0x1.1dbf7ap+3) got 0x1.f51212p-120
want 0x1.f51216p-120. */
-svfloat32_t SV_NAME_F1 (erfc) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (erfc) (svfloat32_t x, const svbool_t pg)
{
const struct data *dat = ptr_barrier (&data);
@@ -49,7 +49,7 @@ static const struct data
_ZGVsMxv_erff(0x1.c373e6p-9) got 0x1.fd686cp-9 want 0x1.fd6868p-9
- [0x1.cp-7, 4.0]: 1.26 ULP
_ZGVsMxv_erff(0x1.1d002ep+0) got 0x1.c4eb9ap-1 want 0x1.c4eb98p-1. */
-svfloat32_t SV_NAME_F1 (erf) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (erf) (svfloat32_t x, const svbool_t pg)
{
const struct data *dat = ptr_barrier (&data);
@@ -50,7 +50,7 @@ static const struct data
.special_bound = SpecialBound,
};
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
special_exp (svfloat64_t scale, svfloat64_t poly, svfloat64_t n, svuint64_t u,
const struct sv_exp_special_data *ds)
{
@@ -69,7 +69,7 @@ special_exp (svfloat64_t scale, svfloat64_t poly, svfloat64_t n, svuint64_t u,
Maximum measured error is 1.02 ulp.
SV_NAME_D1 (exp10)(-0x1.2862fec805e58p+2) got 0x1.885a89551d782p-16
want 0x1.885a89551d781p-16. */
-svfloat64_t SV_NAME_D1 (exp10) (svfloat64_t x, svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (exp10) (svfloat64_t x, svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -46,7 +46,7 @@ static const struct data
.zero_bound = ZeroBound,
};
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
sv_exp10f_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
{
/* exp10(x) = 2^(n/N) * 10^r = 2^n * (1 + poly (r)),
@@ -73,7 +73,7 @@ sv_exp10f_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
return svmla_x (pg, scale, scale, poly);
}
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
special_case (svfloat32_t x, svbool_t pg, svbool_t special,
const struct data *d)
{
@@ -98,7 +98,7 @@ special_case (svfloat32_t x, svbool_t pg, svbool_t special,
Worst case error is 2.86 ULP +0.50 ULP.
_ZGVsMxv_exp10f (0x1.31b778p+5) got 0x1.ed399p+126
want 0x1.ed398ap+126. */
-svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
svbool_t special = svacgt (pg, x, d->special_bound);
@@ -72,7 +72,7 @@ static const struct data
},
};
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
special_m1 (svbool_t special, svfloat64_t y, svfloat64_t z, svfloat64_t scale,
svfloat64_t poly, svfloat64_t n,
const struct sv_exp_special_data *ds)
@@ -94,7 +94,7 @@ special_m1 (svbool_t special, svfloat64_t y, svfloat64_t z, svfloat64_t scale,
Maximum measured error is 2.87 + 0.5 ULP:
_ZGVsMxv_exp10m1(0x1.64645f11e94c6p-4) got 0x1.c64d54eb7658dp-3
want 0x1.c64d54eb7658ap-3. */
-svfloat64_t SV_NAME_D1 (exp10m1) (svfloat64_t x, svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (exp10m1) (svfloat64_t x, svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -57,7 +57,7 @@ static const struct data
Algorithm is accurate to 1.68 + 0.5 ULP.
_ZGVnN4v_exp10m1f(0x1.3aeffep-3) got 0x1.b3139p-2
want 0x1.b3138cp-2. */
-svfloat32_t SV_NAME_F1 (exp10m1) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (exp10m1) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -46,7 +46,7 @@ static const struct data
.special_bound = SpecialBound,
};
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
special_exp (svfloat64_t poly, svfloat64_t scale, svfloat64_t n, svfloat64_t z,
const struct sv_exp_special_data *ds)
{
@@ -66,7 +66,7 @@ special_exp (svfloat64_t poly, svfloat64_t scale, svfloat64_t n, svfloat64_t z,
Maximum measured error is 0.52 + 0.5 ulp.
_ZGVsMxv_exp2 (0x1.3b72ad5b701bfp-1) got 0x1.8861641b49e08p+0
want 0x1.8861641b49e07p+0. */
-svfloat64_t SV_NAME_D1 (exp2) (svfloat64_t x, svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (exp2) (svfloat64_t x, svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -43,7 +43,7 @@ static const struct data
.zero_bound = ZeroBound,
};
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
sv_exp2f_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
{
/* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
@@ -60,7 +60,7 @@ sv_exp2f_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
return svmla_x (pg, scale, scale, poly);
}
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
special_case (svfloat32_t x, svbool_t pg, svbool_t special,
const struct data *d)
{
@@ -85,7 +85,7 @@ special_case (svfloat32_t x, svbool_t pg, svbool_t special,
Worst case error is 2.87 +0.50 ULP.
_ZGVsMxv_exp2f (0x1.fbcb78p+6) got 0x1.ee1d32p+126
want 0x1.ee1d2cp+126. */
-svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
svbool_t special = svacgt (pg, x, d->special_bound);
@@ -64,7 +64,7 @@ static const struct data
},
};
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
special_m1 (svbool_t special, svfloat64_t y, svfloat64_t z, svfloat64_t scale,
svfloat64_t poly, svfloat64_t n,
const struct sv_exp_special_data *ds)
@@ -86,7 +86,7 @@ special_m1 (svbool_t special, svfloat64_t y, svfloat64_t z, svfloat64_t scale,
Maximum error is 2.58 + 0.5 ULP.
_ZGVsMxv_exp2m1(0x1.0284a345c99bfp-8) got 0x1.66df630cd2965p-9
want 0x1.66df630cd2962p-9. */
-svfloat64_t SV_NAME_D1 (exp2m1) (svfloat64_t x, svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (exp2m1) (svfloat64_t x, svbool_t pg)
{
/* exp2(x) = 2^n (1 + poly(r))
x = n + r, with r in [-1/2N, 1/2N].
@@ -50,7 +50,7 @@ static const struct data
The maximum error is 1.76 + 0.5 ULP.
_ZGVsMxv_exp2m1f (0x1.018af8p-1) got 0x1.ab2ebcp-2
want 0x1.ab2ecp-2. */
-svfloat32_t SV_NAME_F1 (exp2m1) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (exp2m1) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -46,7 +46,7 @@ static const struct data
.special_bound = SpecialBound,
};
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
special_exp (svfloat64_t poly, svfloat64_t scale, svfloat64_t n, svuint64_t u,
const struct sv_exp_special_data *ds)
{
@@ -64,7 +64,7 @@ special_exp (svfloat64_t poly, svfloat64_t scale, svfloat64_t n, svuint64_t u,
/* SVE exp algorithm. Maximum measured error is 1.01ulps:
SV_NAME_D1 (exp)(0x1.4619d7b04da41p+6) got 0x1.885d9acc41da7p+117
want 0x1.885d9acc41da6p+117. */
-svfloat64_t SV_NAME_D1 (exp) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (exp) (svfloat64_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -48,7 +48,7 @@ static const struct data
.zero_bound = ZeroBound,
};
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
expf_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
{
/* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
@@ -78,7 +78,7 @@ expf_inline (svfloat32_t x, const svbool_t pg, const struct data *d)
The approximation needs to match that of the fast path.
To achieve this we assemble the same polynomial, ie `r + 0.5 * r^2`,
then we conditionally add an extra `c2 * r^3` term. */
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
expf_slow_inline (svfloat32_t x, const svbool_t special, const struct data *d)
{
svfloat32_t lane_constants = svld1rq (svptrue_b32 (), &d->ln2_hi);
@@ -102,7 +102,7 @@ expf_slow_inline (svfloat32_t x, const svbool_t special, const struct data *d)
return svmla_x (svptrue_b32 (), scale, scale, poly);
}
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
special_case (svfloat32_t x, svbool_t pg, svbool_t special,
const struct data *d)
{
@@ -127,7 +127,7 @@ special_case (svfloat32_t x, svbool_t pg, svbool_t special,
Worst-case error is 2.70 +0.50 ULP:
_ZGVsMxv_expf(0x1.5fec38p+6) got 0x1.e7831ep+126
want 0x1.e78318p+126. */
-svfloat32_t SV_NAME_F1 (exp) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (exp) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
svbool_t special = svacgt (pg, x, d->special_bound);
@@ -66,7 +66,7 @@ static const struct data
.fexpa_bound = FexpaBound,
};
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
special_m1 (svbool_t special, svfloat64_t y, svfloat64_t z, svfloat64_t scale,
svfloat64_t poly, svfloat64_t n,
const struct sv_exp_special_data *ds)
@@ -88,7 +88,7 @@ special_m1 (svbool_t special, svfloat64_t y, svfloat64_t z, svfloat64_t scale,
Maximum measured error is 2.81 + 0.5 ULP:
_ZGVsMxv_expm1 (0x1.974060e619bfp-3) got 0x1.c290e5858bb53p-3
want 0x1.c290e5858bb5p-3. */
-svfloat64_t SV_NAME_D1 (expm1) (svfloat64_t x, svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (expm1) (svfloat64_t x, svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -50,7 +50,7 @@ static const struct data
Maximum error is 1.02 +0.5ULP:
_ZGVsMxv_expm1f(0x1.8f4ebcp-2) got 0x1.e859dp-2
want 0x1.e859d4p-2. */
-svfloat32_t SV_NAME_F1 (expm1) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (expm1) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -27,7 +27,7 @@ static const struct data
.thres = 0x7300000000000000, /* asuint (inf) - tiny_bound. */
};
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
special_case (svfloat64_t sqsum, svfloat64_t x, svfloat64_t y, svbool_t pg,
svbool_t special)
{
@@ -39,7 +39,8 @@ special_case (svfloat64_t sqsum, svfloat64_t x, svfloat64_t y, svbool_t pg,
_ZGVsMxvv_hypot (-0x1.6a22d0412cdd3p+352, 0x1.d3d89bd66fb1ap+330)
got 0x1.6a22d0412cfp+352
want 0x1.6a22d0412cf01p+352. */
-svfloat64_t SV_NAME_D2 (hypot) (svfloat64_t x, svfloat64_t y, svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D2 (hypot) (svfloat64_t x, svfloat64_t y,
+ svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -22,7 +22,7 @@
#define TinyBound 0x0c800000 /* asuint (0x1p-102). */
#define Thres 0x73000000 /* 0x70000000 - TinyBound. */
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
special_case (svfloat32_t sqsum, svfloat32_t x, svfloat32_t y, svbool_t pg,
svbool_t special)
{
@@ -33,8 +33,8 @@ special_case (svfloat32_t sqsum, svfloat32_t x, svfloat32_t y, svbool_t pg,
Maximum error observed is 1.21 ULP:
_ZGVsMxvv_hypotf (0x1.6a213cp-19, -0x1.32b982p-26) got 0x1.6a2346p-19
want 0x1.6a2344p-19. */
-svfloat32_t SV_NAME_F2 (hypot) (svfloat32_t x, svfloat32_t y,
- const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F2 (hypot) (svfloat32_t x, svfloat32_t y,
+ const svbool_t pg)
{
svfloat32_t sqsum = svmla_x (pg, svmul_x (pg, x, x), y, y);
@@ -45,7 +45,7 @@ static const struct data
.thresh = 0x7fe0000000000000, /* infinity - min. */
};
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
v_log10_inline (svuint64_t ix, const svbool_t pg, const struct data *d)
{
/* x = 2^k z; where z is in range [Off,2*Off) and exact.
@@ -89,7 +89,7 @@ v_log10_inline (svuint64_t ix, const svbool_t pg, const struct data *d)
calculation of x * 2^52 (2^mantissa) to normalise the number at entry to
the log function and then subtract log10(2) * 52 to re-subnormalise the
output to the correct result. */
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
special_case (svfloat64_t x, svbool_t pg, svbool_t special)
{
const struct data *d = ptr_barrier (&data);
@@ -122,7 +122,7 @@ special_case (svfloat64_t x, svbool_t pg, svbool_t special)
Maximum measured error is 2.46 ulps.
SV_NAME_D1 (log10)(0x1.131956cd4b627p+0) got 0x1.fffbdf6eaa669p-6
want 0x1.fffbdf6eaa667p-6. */
-svfloat64_t SV_NAME_D1 (log10) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (log10) (svfloat64_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -52,7 +52,7 @@ static const struct data
#define MantissaMask 0x007fffff
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
v_log10f_inline (svuint32_t u_off, const svbool_t pg, const struct data *d)
{
/* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */
@@ -90,7 +90,7 @@ v_log10f_inline (svuint32_t u_off, const svbool_t pg, const struct data *d)
calculation of x * 2^23 (2^mantissa) to normalise the number at entry to
the log function and then subtract log10(2) * 23 to re-subnormalise the
output to the correct result. */
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
special_case (svfloat32_t x, svbool_t pg, svbool_t special,
const struct data *d)
{
@@ -125,7 +125,7 @@ special_case (svfloat32_t x, svbool_t pg, svbool_t special,
Maximum error is 3.31ulps:
SV_NAME_F1 (log10)(0x1.555c16p+0) got 0x1.ffe2fap-4
want 0x1.ffe2f4p-4. */
-svfloat32_t SV_NAME_F1 (log10) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (log10) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -58,7 +58,7 @@ static const struct data
.bottom_mask = 0x00000000ffffffff,
};
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
special_case (svfloat64_t x, svfloat64_t y, svbool_t special, svbool_t pg)
{
@@ -73,7 +73,7 @@ special_case (svfloat64_t x, svfloat64_t y, svbool_t special, svbool_t pg)
Worst-case error is 2.81 ULP:
_ZGVsMxv_log10p1(0x1.25c3f17d7602p-53) got 0x1.fe52a1624aad1p-55
want 0x1.fe52a1624aacep-55. */
-svfloat64_t SV_NAME_D1 (log10p1) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (log10p1) (svfloat64_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -47,7 +47,7 @@ static const struct data
#define SignedExpMask sv_s32 (0xff800000)
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
special_case (svfloat32_t x, svfloat32_t y, const svbool_t pg,
svbool_t special)
{
@@ -62,7 +62,7 @@ special_case (svfloat32_t x, svfloat32_t y, const svbool_t pg,
Worst-case error is 3.40 ULP:
_ZGVsMxv_log10p1f(0x1.8bfff6p+6) got 0x1.000002p+1
want 0x1.fffffep+0. */
-svfloat32_t SV_NAME_F1 (log10p1) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (log10p1) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -62,7 +62,7 @@ static const struct data
#define AbsMask 0x7fffffffffffffff
#define BottomMask 0xffffffff
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
{
return sv_call_f64 (log1p, x, y, special);
@@ -72,7 +72,7 @@ special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
observed error is 2.46 ULP:
_ZGVsMxv_log1p(0x1.654a1307242a4p+11) got 0x1.fd5565fb590f4p+2
want 0x1.fd5565fb590f6p+2. */
-svfloat64_t SV_NAME_D1 (log1p) (svfloat64_t x, svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (log1p) (svfloat64_t x, svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
svuint64_t ix = svreinterpret_u64 (x);
@@ -20,7 +20,7 @@
#include "sv_math.h"
#include "sv_log1pf_inline.h"
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
special_case (svfloat32_t x, svbool_t special)
{
return sv_call_f32 (log1pf, x, sv_log1pf_inline (x, svptrue_b32 ()),
@@ -31,7 +31,7 @@ special_case (svfloat32_t x, svbool_t special)
error is 1.27 ULP very close to 0.5.
_ZGVsMxv_log1pf(0x1.fffffep-2) got 0x1.9f324p-2
want 0x1.9f323ep-2. */
-svfloat32_t SV_NAME_F1 (log1p) (svfloat32_t x, svbool_t pg)
+SVE_FUNCTION SVE_FUNCTION svfloat32_t SV_NAME_F1 (log1p) (svfloat32_t x, svbool_t pg)
{
/* x < -1, Inf/Nan. */
svbool_t special = svcmpeq (pg, svreinterpret_u32 (x), 0x7f800000);
@@ -43,7 +43,7 @@ static const struct data
.thresh = (0x7fe0000000000000), /* infinity - min. */
};
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
v_log2_inline (svuint64_t ix, const svbool_t pg, const struct data *d)
{
/* x = 2^k z; where z is in range [Off,2*Off) and exact.
@@ -81,7 +81,7 @@ v_log2_inline (svuint64_t ix, const svbool_t pg, const struct data *d)
calculation of x * 2^52 (2^mantissa) to normalise the number at entry to
the log function and then subtract log2(2) * 52 = 52 to re-subnormalise the
output to the correct result. */
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
special_case (svfloat64_t x, svbool_t pg, svbool_t special)
{
const struct data *d = ptr_barrier (&data);
@@ -116,7 +116,7 @@ special_case (svfloat64_t x, svbool_t pg, svbool_t special)
The maximum observed error is 2.58 ULP:
SV_NAME_D1 (log2)(0x1.0b556b093869bp+0) got 0x1.fffb34198d9dap-5
want 0x1.fffb34198d9ddp-5. */
-svfloat64_t SV_NAME_D1 (log2) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (log2) (svfloat64_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -49,7 +49,7 @@ static const struct data
#define MantissaMask 0x007fffff
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
v_log2f_inline (svuint32_t u_off, svbool_t pg, const struct data *d)
{
svuint32_t u = svand_x (pg, u_off, MantissaMask);
@@ -80,7 +80,7 @@ v_log2f_inline (svuint32_t u_off, svbool_t pg, const struct data *d)
calculation of x * 2^23 (2^mantissa) to normalise the number at entry to
the log function and then subtract log2(2) * 23 = 23 to re-subnormalise the
output to the correct result. */
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
special_case (svfloat32_t x, svbool_t pg, svbool_t special,
const struct data *d)
{
@@ -115,7 +115,7 @@ special_case (svfloat32_t x, svbool_t pg, svbool_t special,
Maximum error is 2.48 ULPs:
SV_NAME_F1 (log2)(0x1.558174p+0) got 0x1.a9be84p-2
want 0x1.a9be8p-2. */
-svfloat32_t SV_NAME_F1 (log2) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (log2) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -53,7 +53,7 @@ static const struct data
.inv_ln2 = 0x1.71547652b82fep+0,
};
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
special_case (svfloat64_t x, svfloat64_t y, svbool_t special, svbool_t pg)
{
@@ -68,7 +68,7 @@ special_case (svfloat64_t x, svfloat64_t y, svbool_t special, svbool_t pg)
Worst-case error is 3.0 ULP:
_ZGVsMxv_log2p1(0x1.62e029c6f784fp-18) got 0x1.fff9d9148a06fp-18
want 0x1.fff9d9148a072p-18 . */
-svfloat64_t SV_NAME_D1 (log2p1) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (log2p1) (svfloat64_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -34,7 +34,7 @@ static const struct data
.four = 0x40800000, .three_quarters = 0x3f400000
};
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
special_case (svfloat32_t x, svfloat32_t y, const svbool_t pg,
svbool_t special)
{
@@ -49,7 +49,7 @@ special_case (svfloat32_t x, svfloat32_t y, const svbool_t pg,
Worst-case error is 1.90 ULP:
_ZGVsMxv_log2p1f(0x1.8789fcp-2) got 0x1.de58d4p-2
want 0x1.de58d8p-2. */
-svfloat32_t SV_NAME_F1 (log2p1) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (log2p1) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -44,7 +44,7 @@ static const struct data
.thresh = (0x7fe0000000000000), /* infinity - min. */
};
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
v_log_inline (svuint64_t ix, const svbool_t pg, const struct data *d)
{
/* x = 2^k z; where z is in range [Off,2*Off) and exact.
@@ -85,7 +85,7 @@ v_log_inline (svuint64_t ix, const svbool_t pg, const struct data *d)
calculation of x * 2^52 (2^mantissa) to normalise the number at entry to
the log function and then subtract ln(2) * 52 to re-subnormalise the
output to the correct result. */
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
special_case (svfloat64_t x, svbool_t pg, svbool_t special)
{
const struct data *d = ptr_barrier (&data);
@@ -118,7 +118,7 @@ special_case (svfloat64_t x, svbool_t pg, svbool_t special)
Maximum measured error is 2.64 ulp:
SV_NAME_D1 (log)(0x1.95e54bc91a5e2p+184) got 0x1.fffffffe88cacp+6
want 0x1.fffffffe88cafp+6. */
-svfloat64_t SV_NAME_D1 (log) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (log) (svfloat64_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -49,7 +49,7 @@ static const struct data
#define MantissaMask 0x007fffff
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
v_logf_inline (svuint32_t u_off, const svbool_t pg, const struct data *d)
{
svuint32_t u = svand_x (pg, u_off, MantissaMask);
@@ -81,7 +81,7 @@ v_logf_inline (svuint32_t u_off, const svbool_t pg, const struct data *d)
calculation of x * 2^23 (2^mantissa) to normalise the number at entry to
the log function and then subtract ln(2) * 23 to re-subnormalise the result
output to the correct result. */
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
special_case (svfloat32_t x, svbool_t pg, svbool_t special,
const struct data *d)
{
@@ -115,7 +115,7 @@ special_case (svfloat32_t x, svbool_t pg, svbool_t special,
polynomial as the AdvSIMD routine. Maximum error is 3.34 ULPs:
SV_NAME_F1 (log)(0x1.557298p+0) got 0x1.26edecp-2
want 0x1.26ede6p-2. */
-svfloat32_t SV_NAME_F1 (log) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (log) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -31,6 +31,8 @@
# error Cannot use poly_generic without defining DUP
#endif
+OPTIONS_PUSH_SVE
+
static inline VTYPE VWRAP (pairwise_poly_3) (svbool_t pg, VTYPE x, VTYPE x2,
const STYPE *poly)
{
@@ -311,3 +313,5 @@ static inline VTYPE VWRAP (pw_horner_18) (svbool_t pg, VTYPE x, VTYPE x2,
VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
return svmla_x (pg, p01, x2, p2_18);
}
+
+OPTIONS_POP
@@ -89,21 +89,21 @@ static const struct data
};
/* Check if x is an integer. */
-static inline svbool_t
+SVE_FUNCTION static inline svbool_t
sv_isint (svbool_t pg, svfloat64_t x)
{
return svcmpeq (pg, svrintz_z (pg, x), x);
}
/* Check if x is real not integer valued. */
-static inline svbool_t
+SVE_FUNCTION static inline svbool_t
sv_isnotint (svbool_t pg, svfloat64_t x)
{
return svcmpne (pg, svrintz_z (pg, x), x);
}
/* Check if x is an odd integer. */
-static inline svbool_t
+SVE_FUNCTION static inline svbool_t
sv_isodd (svbool_t pg, svfloat64_t x)
{
svfloat64_t y = svmul_x (svptrue_b64 (), x, 0.5);
@@ -128,7 +128,7 @@ checkint (uint64_t iy)
}
/* Top 12 bits (sign and exponent of each double float lane). */
-static inline svuint64_t
+SVE_FUNCTION static inline svuint64_t
sv_top12 (svfloat64_t x)
{
return svlsr_x (svptrue_b64 (), svreinterpret_u64 (x), 52);
@@ -142,7 +142,7 @@ zeroinfnan (uint64_t i)
}
/* Returns 1 if input is the bit representation of 0, infinity or nan. */
-static inline svbool_t
+SVE_FUNCTION static inline svbool_t
sv_zeroinfnan (svbool_t pg, svuint64_t i)
{
return svcmpge (pg, svsub_x (pg, svadd_x (pg, i, i), 1),
@@ -156,7 +156,7 @@ sv_zeroinfnan (svbool_t pg, svuint64_t i)
a double. (int32_t)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
specialcase (svfloat64_t tmp, svuint64_t sbits, svuint64_t ki, svbool_t cmp)
{
svbool_t p_pos = svcmpge_n_f64 (cmp, svreinterpret_f64_u64 (ki), 0.0);
@@ -177,7 +177,7 @@ specialcase (svfloat64_t tmp, svuint64_t sbits, svuint64_t ki, svbool_t cmp)
/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
additional 15 bits precision. IX is the bit representation of x, but
normalized in the subnormal range using the sign bit for the exponent. */
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail,
const struct data *d)
{
@@ -239,7 +239,7 @@ sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail,
return y;
}
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
sv_exp_core (svbool_t pg, svfloat64_t x, svfloat64_t xtail,
svuint64_t sign_bias, svfloat64_t *tmp, svuint64_t *sbits,
svuint64_t *ki, const struct data *d)
@@ -280,7 +280,7 @@ sv_exp_core (svbool_t pg, svfloat64_t x, svfloat64_t xtail,
/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
The sign_bias argument is SignBias or 0 and sets the sign to -1 or 1. */
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
sv_exp_inline (svbool_t pg, svfloat64_t x, svfloat64_t xtail,
svuint64_t sign_bias, const struct data *d)
{
@@ -363,14 +363,15 @@ pow_specialcase (double x, double y)
}
/* Scalar fallback for special case routines with custom signature. */
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
sv_pow_specialcase (svfloat64_t x1, svfloat64_t x2, svfloat64_t y,
svbool_t cmp)
{
return sv_call2_f64 (pow_specialcase, x1, x2, y, cmp);
}
-svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y,
+ const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -59,21 +59,21 @@ static const struct data
#define C(i) sv_f64 (d->exp_poly[i])
/* Check if x is an integer. */
-static inline svbool_t
+SVE_FUNCTION static inline svbool_t
svisint (svbool_t pg, svfloat32_t x)
{
return svcmpeq (pg, svrintz_z (pg, x), x);
}
/* Check if x is real not integer valued. */
-static inline svbool_t
+SVE_FUNCTION static inline svbool_t
svisnotint (svbool_t pg, svfloat32_t x)
{
return svcmpne (pg, svrintz_z (pg, x), x);
}
/* Check if x is an odd integer. */
-static inline svbool_t
+SVE_FUNCTION static inline svbool_t
svisodd (svbool_t pg, svfloat32_t x)
{
svfloat32_t y = svmul_x (pg, x, 0.5f);
@@ -81,7 +81,7 @@ svisodd (svbool_t pg, svfloat32_t x)
}
/* Check if zero, inf or nan. */
-static inline svbool_t
+SVE_FUNCTION static inline svbool_t
sv_zeroinfnan (svbool_t pg, svuint32_t i)
{
return svcmpge (pg, svsub_x (pg, svadd_x (pg, i, i), 1),
@@ -146,14 +146,14 @@ powf_specialcase (float x, float y)
}
/* Scalar fallback for special case routines with custom signature. */
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y, svbool_t cmp)
{
return sv_call2_f32 (powf_specialcase, x1, x2, y, cmp);
}
/* Compute core for half of the lanes in double precision. */
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
sv_powf_core_ext (const svbool_t pg, svuint64_t i, svfloat64_t z, svint64_t k,
svfloat64_t y, svuint64_t sign_bias, svfloat64_t *pylogx,
const struct data *d)
@@ -197,7 +197,7 @@ sv_powf_core_ext (const svbool_t pg, svuint64_t i, svfloat64_t z, svint64_t k,
/* Widen vector to double precision and compute core on both halves of the
vector. Lower cost of promotion by considering all lanes active. */
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k,
svfloat32_t y, svuint32_t sign_bias, svfloat32_t *pylogx,
const struct data *d)
@@ -243,7 +243,8 @@ sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k,
Maximum measured error is 2.57 ULPs:
SV_NAME_F2 (pow) (0x1.031706p+0, 0x1.ce2ec2p+12) got 0x1.fff868p+127
want 0x1.fff862p+127. */
-svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y,
+ const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -34,7 +34,7 @@ static const struct data
.scale_down = 27,
};
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
inline_rsqrt (svfloat64_t x)
{
/* Do estimate instruction. */
@@ -58,7 +58,7 @@ inline_rsqrt (svfloat64_t x)
return estimate;
}
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
special_case (svfloat64_t x, svbool_t special, const struct data *d)
{
x = svscale_f64_m (special, x, sv_s64 (d->scale_up));
@@ -70,7 +70,7 @@ special_case (svfloat64_t x, svbool_t special, const struct data *d)
Maximum observed error: 1.45 + 0.5
_ZGVnN2v_rsqrt(0x1.d13fb41254643p+1023) got 0x1.0c8dee1b29dfap-512
want 0x1.0c8dee1b29df8p-512. */
-svfloat64_t SV_NAME_D1 (rsqrt) (svfloat64_t x, svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (rsqrt) (svfloat64_t x, svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -35,7 +35,7 @@ static const struct data
.scale_down = 11,
};
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
inline_rsqrt (svfloat32_t x)
{
/* Do estimate instruction. */
@@ -54,7 +54,7 @@ inline_rsqrt (svfloat32_t x)
return estimate;
}
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
special_case (svfloat32_t x, svbool_t special, const struct data *d)
{
x = svscale_f32_m (special, x, sv_s32 (d->scale_up));
@@ -66,7 +66,7 @@ special_case (svfloat32_t x, svbool_t special, const struct data *d)
Maximum observed error: 1.47 + 0.5
_ZGVsMxv_rsqrtf (0x1.f610dep+127) got 0x1.02852cp-64
want 0x1.02853p-64. */
-svfloat32_t SV_NAME_F1 (rsqrt) (svfloat32_t x, svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (rsqrt) (svfloat32_t x, svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -38,7 +38,7 @@ static const struct data
#define C(i) sv_f64 (d->poly[i])
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
special_case (svfloat64_t x, svfloat64_t y, svbool_t cmp)
{
return sv_call_f64 (sin, x, y, cmp);
@@ -53,7 +53,7 @@ special_case (svfloat64_t x, svfloat64_t y, svbool_t cmp)
is 3.22 ULP:
_ZGVsMxv_sin (0x1.5702447b6f17bp+22) got 0x1.ffdcd125c84fbp-3
want 0x1.ffdcd125c84f8p-3. */
-svfloat64_t SV_NAME_D1 (sin) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (sin) (svfloat64_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -42,7 +42,7 @@ static const struct data
#define RangeVal 0x49800000 /* asuint32 (0x1p20f). */
#define C(i) sv_f32 (d->poly[i])
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
{
return sv_call_f32 (sinf, x, y, cmp);
@@ -53,7 +53,7 @@ special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
This maximum error is achieved at multiple values in [-2^18, 2^18]
but one example is:
SV_NAME_F1 (sin)(0x1.9247a4p+0) got 0x1.fffff6p-1 want 0x1.fffffap-1. */
-svfloat32_t SV_NAME_F1 (sin) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (sin) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -59,7 +59,7 @@ static const struct data
/* A specialised FEXPA expm1 that is only valid for positive inputs and
has no special cases. Based off the full FEXPA expm1 implementated for
_ZGVsMxv_expm1, with a slightly modified file to keep sinh under 3.5ULP. */
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
expm1_inline (svbool_t pg, svfloat64_t x)
{
const struct data *d = ptr_barrier (&data);
@@ -114,7 +114,7 @@ expm1_inline (svbool_t pg, svfloat64_t x)
sinh (A + B) = cosh(A)cosh(B) + sinh(A)sinh(B)
By choosing sufficiently large values whereby after rounding sinh == cosh,
this can be simplified into: sinh (A + B) = sinh(A) * e^B. */
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
special_case (svuint64_t sign, svbool_t pg, svbool_t special, svfloat64_t ax,
svfloat64_t halfsign)
{
@@ -151,7 +151,7 @@ special_case (svuint64_t sign, svbool_t pg, svbool_t special, svfloat64_t ax,
The greatest observed error is 2.62 + 0.5 ULP:
_ZGVsMxv_sinh (0x1.b5e0e13ba88aep-2) got 0x1.c3587faf97b0cp-2
want 0x1.c3587faf97b09p-2. */
-svfloat64_t SV_NAME_D1 (sinh) (svfloat64_t x, svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (sinh) (svfloat64_t x, svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -40,7 +40,7 @@ static const struct data
sinh (A + B) = cosh(A)cosh(B) + sinh(A)sinh(B)
By choosing sufficiently large values whereby after rounding sinh == cosh,
this can be simplified into: sinh (A + B) = sinh(A) * e^B. */
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
special_case (const svbool_t pg, svbool_t special, svfloat32_t ax,
svfloat32_t x, svfloat32_t t, const struct data *d)
{
@@ -80,7 +80,7 @@ special_case (const svbool_t pg, svbool_t special, svfloat32_t ax,
Maximum error is 2.76 +0.5 ULP:
_ZGVsMxv_sinhf (0x1.6587e8p+6) got 0x1.ef3f98p+127
want 0x1.ef3f92p+127. */
-svfloat32_t SV_NAME_F1 (sinh) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (sinh) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -37,7 +37,7 @@ static const struct data
Maximum error 3.10 ULP:
_ZGVsMxv_sinpi(0x1.df1a14f1b235p-2) got 0x1.fd64f541606cp-1
want 0x1.fd64f541606c3p-1. */
-svfloat64_t SV_NAME_D1 (sinpi) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (sinpi) (svfloat64_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -34,7 +34,7 @@ static const struct data
Maximum error 2.48 ULP:
_ZGVsMxv_sinpif(0x1.d062b6p-2) got 0x1.fa8c06p-1
want 0x1.fa8c02p-1. */
-svfloat32_t SV_NAME_F1 (sinpi) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (sinpi) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -31,7 +31,7 @@ static const struct sv_exp_special_data
.special_bias2 = 0x3010000000000000, /* 0x1p-254. */
};
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
special_case (svfloat64_t scale, svfloat64_t poly, svfloat64_t n,
const struct sv_exp_special_data *ds)
{
@@ -35,7 +35,7 @@ struct sv_expf_data
.ln2_lo = 0x1.7f7d1cp-20f, .shift = 0x1.803f8p17f, \
}
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
expf_inline (svfloat32_t x, const svbool_t pg, const struct sv_expf_data *d)
{
/* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
@@ -32,7 +32,7 @@ static const struct sv_expf_special_data
};
/* Special case routine shared with other expBm1 routines. */
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
special_exp (svfloat32_t poly, svfloat32_t n, svuint32_t e, svbool_t cmp1,
svfloat32_t scale, const struct sv_expf_special_data *ds)
{
@@ -53,7 +53,7 @@ special_exp (svfloat32_t poly, svfloat32_t n, svuint32_t e, svbool_t cmp1,
}
/* Special case routine for expBm1. */
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
special_case (svfloat32_t poly, svfloat32_t n, svfloat32_t scale,
svbool_t cmp1, const struct sv_expf_special_data *ds)
{
@@ -39,7 +39,7 @@ struct sv_expm1f_data
.c4 = 0x1.6b55a2p-10, .ln2_lo = 0x1.7f7d1cp-20f, .ln2_hi = 0x1.62e4p-1f, \
}
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
expm1f_inline (svfloat32_t x, svbool_t pg, const struct sv_expm1f_data *d)
{
/* This vector is reliant on layout of data - it contains constants
@@ -63,7 +63,7 @@ static const struct sv_log1p_data
.one_top = 0x3ff
};
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
sv_log1p_inline (svfloat64_t x, const svbool_t pg)
{
/* Helper for calculating log(x + 1). Adapted from v_log1p_inline.h, which
@@ -42,7 +42,7 @@ static const struct sv_log1pf_data
.three_quarters = 0x3f400000,
};
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
sv_log1pf_inline (svfloat32_t x, svbool_t pg)
{
const struct sv_log1pf_data *d = ptr_barrier (&sv_log1pf_data);
@@ -35,11 +35,25 @@
/* Predicate is stored as one bit per byte of VL so requires VL / 64 bytes. */
#define SVE_NUM_PG_BYTES (SVE_VECTOR_BYTES / sizeof (uint64_t))
+#define SVE_FUNCTION __attribute__((target("+sve")))
+#ifdef __clang__
+# define OPTIONS_PUSH_SVE \
+ _Pragma("clang attribute push (__attribute__((target(\"+sve\"))), apply_to=function)")
+# define OPTIONS_POP _Pragma("clang attribute pop")
+#else
+# define OPTIONS_PUSH_SVE _Pragma ("GCC push_options"); \
+ _Pragma ("GCC target(\"+sve\")")
+# define OPTIONS_POP _Pragma ("GCC pop_options")
+#endif
+
+
#define SV_NAME_F1(fun) _ZGVsMxv_##fun##f
#define SV_NAME_D1(fun) _ZGVsMxv_##fun
#define SV_NAME_F2(fun) _ZGVsMxvv_##fun##f
#define SV_NAME_D2(fun) _ZGVsMxvv_##fun
+OPTIONS_PUSH_SVE
+
static inline void
svstr_p (uint8_t *dst, svbool_t p)
{
@@ -165,4 +179,7 @@ sv_call2_f32 (float (*f) (float, float), svfloat32_t x1, svfloat32_t x2,
}
return svld1 (svptrue_b32 (), tmp1);
}
+
+OPTIONS_POP
+
#endif
@@ -41,7 +41,7 @@ static const struct data
.range_val = 0x1p23,
};
-static svfloat64_t NOINLINE
+SVE_FUNCTION static svfloat64_t NOINLINE
special_case (svfloat64_t x, svfloat64_t p, svfloat64_t q, svbool_t pg,
svbool_t special)
{
@@ -61,7 +61,7 @@ special_case (svfloat64_t x, svfloat64_t p, svfloat64_t q, svbool_t pg,
Maximum measured error is 3.48 ULP:
_ZGVsMxv_tan(0x1.4457047ef78d8p+20) got -0x1.f6ccd8ecf7dedp+37
want -0x1.f6ccd8ecf7deap+37. */
-svfloat64_t SV_NAME_D1 (tan) (svfloat64_t x, svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (tan) (svfloat64_t x, svbool_t pg)
{
const struct data *dat = ptr_barrier (&data);
svfloat64_t half_pi_c0 = svld1rq (svptrue_b64 (), &dat->c0);
@@ -46,7 +46,7 @@ static const struct data
.range_val = 0x1p15f, .shift = 0x1.8p+23f
};
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
{
return sv_call_f32 (tanf, x, y, cmp);
@@ -56,7 +56,7 @@ special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
Maximum error is 3.45 ULP:
SV_NAME_F1 (tan)(-0x1.e5f0cap+13) got 0x1.ff9856p-1
want 0x1.ff9850p-1. */
-svfloat32_t SV_NAME_F1 (tan) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (tan) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -55,7 +55,7 @@ static const struct data
/* An expm1 inspired, FEXPA based helper function that returns an
accurate estimate for e^2x - 1. With no special case or support for
negative inputs of x. */
-static inline svfloat64_t
+SVE_FUNCTION static inline svfloat64_t
e2xm1_inline (const svbool_t pg, svfloat64_t x, const struct data *d)
{
svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->two_over_ln2);
@@ -108,7 +108,7 @@ e2xm1_inline (const svbool_t pg, svfloat64_t x, const struct data *d)
The greatest observed error is 2.79 + 0.5 ULP:
_ZGVsMxv_tanh (0x1.fff868eb3c223p-9) got 0x1.fff7be486cae6p-9
want 0x1.fff7be486cae9p-9. */
-svfloat64_t SV_NAME_D1 (tanh) (svfloat64_t x, svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (tanh) (svfloat64_t x, svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -44,7 +44,7 @@ static const struct data
/* An expm1 inspired helper function that returns an accurate
estimate for e^2x - 1. */
-static inline svfloat32_t
+SVE_FUNCTION static inline svfloat32_t
e2xm1f_inline (svfloat32_t x, svbool_t pg, const struct data *d)
{
/* This vector is reliant on layout of data - it contains constants
@@ -82,7 +82,7 @@ e2xm1f_inline (svfloat32_t x, svbool_t pg, const struct data *d)
return svmla_x (pg, svsub_x (pg, t, 1.0f), p, t);
}
-static svfloat32_t NOINLINE
+SVE_FUNCTION static svfloat32_t NOINLINE
special_case (svfloat32_t x, svbool_t pg, svbool_t special, svfloat32_t q)
{
/* Finish fastpass to compute values for non-special cases. */
@@ -108,7 +108,7 @@ special_case (svfloat32_t x, svbool_t pg, svbool_t special, svfloat32_t q)
Maximum error is 2.06 +0.5 ULP:
_ZGVsMxv_tanhf (0x1.fc1832p-5) got 0x1.fb71a4p-5
want 0x1.fb71aap-5. */
-svfloat32_t SV_NAME_F1 (tanh) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (tanh) (svfloat32_t x, const svbool_t pg)
{
const struct data *d = ptr_barrier (&data);
@@ -42,7 +42,7 @@ const static struct v_tanpi_data
The maximum error is 3.06 ULP:
_ZGVsMxv_tanpi(0x1.0a4a07dfcca3ep-1) got -0x1.fa30112702c98p+3
want -0x1.fa30112702c95p+3. */
-svfloat64_t SV_NAME_D1 (tanpi) (svfloat64_t x, const svbool_t pg)
+SVE_FUNCTION svfloat64_t SV_NAME_D1 (tanpi) (svfloat64_t x, const svbool_t pg)
{
const struct v_tanpi_data *d = ptr_barrier (&tanpi_data);
@@ -34,7 +34,7 @@ const static struct v_tanpif_data
The maximum error is 3.34 ULP:
_ZGVsMxv_tanpif(0x1.d6c09ap-2) got 0x1.f70aacp+2
want 0x1.f70aa6p+2. */
-svfloat32_t SV_NAME_F1 (tanpi) (svfloat32_t x, const svbool_t pg)
+SVE_FUNCTION svfloat32_t SV_NAME_F1 (tanpi) (svfloat32_t x, const svbool_t pg)
{
const struct v_tanpif_data *d = ptr_barrier (&tanpif_data);
svfloat32_t odd_coeffs = svld1rq (svptrue_b32 (), &d->c1);
@@ -20,12 +20,14 @@
#include <arm_sve.h>
#include "test-double-sve.h"
+#define attribute_hidden
+#include "sv_math.h"
/* Wrapper from scalar to SVE function. Cannot just use VECTOR_WRAPPER due to
predication. */
#define SVE_VECTOR_WRAPPER(scalar_func, vector_func) \
extern VEC_TYPE vector_func (VEC_TYPE, svbool_t); \
- FLOAT scalar_func (FLOAT x) \
+ SVE_FUNCTION FLOAT scalar_func (FLOAT x) \
{ \
VEC_TYPE mx = svdup_n_f64 (x); \
VEC_TYPE mr = vector_func (mx, svptrue_b64 ()); \
@@ -34,7 +36,7 @@
#define SVE_VECTOR_WRAPPER_ff(scalar_func, vector_func) \
extern VEC_TYPE vector_func (VEC_TYPE, VEC_TYPE, svbool_t); \
- FLOAT scalar_func (FLOAT x, FLOAT y) \
+ SVE_FUNCTION FLOAT scalar_func (FLOAT x, FLOAT y) \
{ \
VEC_TYPE mx = svdup_n_f64 (x); \
VEC_TYPE my = svdup_n_f64 (y); \
@@ -20,12 +20,14 @@
#include <arm_sve.h>
#include "test-float-sve.h"
+#define attribute_hidden
+#include "sv_math.h"
/* Wrapper from scalar to SVE function. Cannot just use VECTOR_WRAPPER due to
predication. */
#define SVE_VECTOR_WRAPPER(scalar_func, vector_func) \
extern VEC_TYPE vector_func (VEC_TYPE, svbool_t); \
- FLOAT scalar_func (FLOAT x) \
+ SVE_FUNCTION FLOAT scalar_func (FLOAT x) \
{ \
VEC_TYPE mx = svdup_n_f32 (x); \
VEC_TYPE mr = vector_func (mx, svptrue_b32 ()); \
@@ -34,7 +36,7 @@
#define SVE_VECTOR_WRAPPER_ff(scalar_func, vector_func) \
extern VEC_TYPE vector_func (VEC_TYPE, VEC_TYPE, svbool_t); \
- FLOAT scalar_func (FLOAT x, FLOAT y) \
+ SVE_FUNCTION FLOAT scalar_func (FLOAT x, FLOAT y) \
{ \
VEC_TYPE mx = svdup_n_f32 (x); \
VEC_TYPE my = svdup_n_f32 (y); \