Hi,
This RFC is in response to a query about the AdvSIMD sin routines,
which manually fix the sign of zero. These routines are designed to be
enabled at -Ofast, where signed zero is disabled, so from the
compiler's point of view this special handling is pointless. Because
of this it seems reasonable to introduce a mechanism to disable tests
for -0 when the routine under test comes from libmvec.
However the routines may still be called by devs writing vector code,
and this is not one of the caveats described in the libmvec
wiki. Maybe it should be? The patch is just to illustrate the kind of
simplification that relaxing these tests unlocks.
Thanks,
Joe
---
math/auto-libm-test-out-sin | 4 ++--
math/gen-libm-test.py | 3 ++-
sysdeps/aarch64/fpu/sin_advsimd.c | 7 +------
sysdeps/aarch64/fpu/sinf_advsimd.c | 7 +------
4 files changed, 6 insertions(+), 15 deletions(-)
@@ -25,11 +25,11 @@ sin 0
= sin upward ibm128 0x0p+0 : 0x0p+0 : inexact-ok
sin -0
= sin downward binary32 -0x0p+0 : -0x0p+0 : inexact-ok
-= sin tonearest binary32 -0x0p+0 : -0x0p+0 : inexact-ok
+= sin tonearest binary32 -0x0p+0 : -0x0p+0 : inexact-ok no-mathvec
= sin towardzero binary32 -0x0p+0 : -0x0p+0 : inexact-ok
= sin upward binary32 -0x0p+0 : -0x0p+0 : inexact-ok
= sin downward binary64 -0x0p+0 : -0x0p+0 : inexact-ok
-= sin tonearest binary64 -0x0p+0 : -0x0p+0 : inexact-ok
+= sin tonearest binary64 -0x0p+0 : -0x0p+0 : inexact-ok no-mathvec
= sin towardzero binary64 -0x0p+0 : -0x0p+0 : inexact-ok
= sin upward binary64 -0x0p+0 : -0x0p+0 : inexact-ok
= sin downward intel96 -0x0p+0 : -0x0p+0 : inexact-ok
@@ -93,7 +93,8 @@ BEAUTIFY_MAP = {'minus_zero': '-0',
# Flags in auto-libm-test-out that map directly to C flags.
FLAGS_SIMPLE = {'ignore-zero-inf-sign': 'IGNORE_ZERO_INF_SIGN',
- 'xfail': 'XFAIL_TEST'}
+ 'xfail': 'XFAIL_TEST',
+ 'no-mathvec': 'NO_TEST_MATHVEC'}
# Exceptions in auto-libm-test-out, and their corresponding C flags
# for being required, OK or required to be absent.
@@ -56,7 +56,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sin) (float64x2_t x)
{
const struct data *d = ptr_barrier (&data);
float64x2_t n, r, r2, r3, r4, y, t1, t2, t3;
- uint64x2_t odd, cmp, eqz;
+ uint64x2_t odd, cmp;
#if WANT_SIMD_EXCEPT
/* Detect |x| <= TinyBound or |x| >= RangeVal. If fenv exceptions are to be
@@ -70,7 +70,6 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sin) (float64x2_t x)
cmp = vcageq_f64 (d->range_val, x);
cmp = vceqzq_u64 (cmp); /* cmp = ~cmp. */
#endif
- eqz = vceqzq_f64 (x);
/* n = rint(|x|/pi). */
n = vfmaq_f64 (d->shift, d->inv_pi, r);
@@ -96,10 +95,6 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sin) (float64x2_t x)
y = vfmaq_f64 (t3, y, r4);
y = vfmaq_f64 (r, y, r3);
- /* Sign of 0 is discarded by polynomial, so copy it back here. */
- if (__glibc_unlikely (v_any_u64 (eqz)))
- y = vbslq_f64 (eqz, x, y);
-
if (__glibc_unlikely (v_any_u64 (cmp)))
return special_case (x, y, odd, cmp);
return vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
@@ -56,7 +56,7 @@ float32x4_t VPCS_ATTR V_NAME_F1 (sin) (float32x4_t x)
{
const struct data *d = ptr_barrier (&data);
float32x4_t n, r, r2, y;
- uint32x4_t odd, cmp, eqz;
+ uint32x4_t odd, cmp;
#if WANT_SIMD_EXCEPT
uint32x4_t ir = vreinterpretq_u32_f32 (vabsq_f32 (x));
@@ -70,7 +70,6 @@ float32x4_t VPCS_ATTR V_NAME_F1 (sin) (float32x4_t x)
cmp = vcageq_f32 (d->range_val, x);
cmp = vceqzq_u32 (cmp); /* cmp = ~cmp. */
#endif
- eqz = vceqzq_f32 (x);
/* n = rint(|x|/pi) */
n = vfmaq_f32 (d->shift, d->inv_pi, r);
@@ -89,10 +88,6 @@ float32x4_t VPCS_ATTR V_NAME_F1 (sin) (float32x4_t x)
y = vfmaq_f32 (C (0), y, r2);
y = vfmaq_f32 (r, vmulq_f32 (y, r2), r);
- /* Sign of 0 is discarded by polynomial, so copy it back here. */
- if (__glibc_unlikely (v_any_u32 (eqz)))
- y = vbslq_f32 (eqz, x, y);
-
if (__glibc_unlikely (v_any_u32 (cmp)))
return special_case (x, y, odd, cmp);
return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));