[v1,5/12] PPC64: Add libmvec SIMD double-precision sincos function.
Commit Message
1. This implementation is basically a combination of the double-precision cosine and
sine functions. Those are in, respectively, patches No. 1 and No. 3 in this sequence.
2. As sincos returns both a vector of sines and a vector of cosines, the ABI used requires
that: the caller of sincos pass, as input arguments 2 and 3, pointers to vector doubles in
which the sine and cosine results will be stored.
Of course, if the final ABI defines a different format for returning sincos results, we will
make changes as necessary.
Comments
GT <tnggil@protonmail.com> writes:
> diff --git a/ChangeLog b/ChangeLog
> index ceb69838c7..d56d1f2a28 100644
> --- a/ChangeLog
> +++ b/ChangeLog
> @@ -1,3 +1,19 @@
> +2019-04-06 Bert Tenjy <bert.tenjy@gmail.com>
> +
> + [BZ #24207]
> + * NEWS: Note the addition of PPC64 vector sincos.
> + * sysdeps/powerpc/bits/math-vector.h: Added sincos entry.
> + * sysdeps/powerpc/fpu/libm-test-ulps: Regenerated.
> + * sysdeps/powerpc/powerpc64/fpu/Versions: Added sincos entry.
> + * sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile: (libmvec-sysdep_routines)
> + (CFLAGS-vec_d_sincos2_vsx.c, libmvec-tests, double-vlen2-funcs)
> + (double-vlen2-arch-ext-cflags): Added build of VSX SIMD sincos function
> + and its tests.
> + * sysdeps/powerpc/powerpc64/fpu/multiarch/test-double-vlen2-wrappers.c: Added sincos entry.
> + * sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_sincos2_vsx.c: New file.
> + * sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_sincos2_vsx.h: Likewise.
> + * sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist: SIMD sincos added.
> +
Likewise.
Fixed and pushed to branch tuliom/libmvec.
Thanks!
From 2cfc351b7c43cb1ab976908c45e4f151926e55a6 Mon Sep 17 00:00:00 2001
From: Bert Tenjy <bert.tenjy@gmail.com>
Date: Sat, 6 Apr 2019 21:11:52 +0000
Subject: [PATCH v1 5/12] PPC64: Add libmvec SIMD double-precision sincos
function.
[BZ #24207]
Implements double-precision vector sincos function. The polynomial approxima-
ting algorithm is adapted for PPC64 from x86_64 [commit #c9a8c526ac].
The patch has been tested on PPC64/POWER8 Little Endian and Big Endian.
Testing uses the framework created for libmvec on x86_64 which runs tests on
issuing 'make check'. Tests of the new vector sincos function all pass.
---
ChangeLog | 16 ++
NEWS | 1 +
sysdeps/powerpc/bits/math-vector.h | 2 +
sysdeps/powerpc/fpu/libm-test-ulps | 3 +
sysdeps/powerpc/powerpc64/fpu/Versions | 1 +
.../powerpc/powerpc64/fpu/multiarch/Makefile | 6 +-
.../multiarch/test-double-vlen2-wrappers.c | 2 +
.../fpu/multiarch/vec_d_sincos2_vsx.c | 33 ++++
.../fpu/multiarch/vec_d_sincos2_vsx.h | 169 ++++++++++++++++++
.../linux/powerpc/powerpc64/libmvec.abilist | 1 +
10 files changed, 232 insertions(+), 2 deletions(-)
create mode 100644 sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_sincos2_vsx.c
create mode 100644 sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_sincos2_vsx.h
@@ -1,3 +1,19 @@
+2019-04-06 Bert Tenjy <bert.tenjy@gmail.com>
+
+ [BZ #24207]
+ * NEWS: Note the addition of PPC64 vector sincos.
+ * sysdeps/powerpc/bits/math-vector.h: Added sincos entry.
+ * sysdeps/powerpc/fpu/libm-test-ulps: Regenerated.
+ * sysdeps/powerpc/powerpc64/fpu/Versions: Added sincos entry.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile: (libmvec-sysdep_routines)
+ (CFLAGS-vec_d_sincos2_vsx.c, libmvec-tests, double-vlen2-funcs)
+ (double-vlen2-arch-ext-cflags): Added build of VSX SIMD sincos function
+ and its tests.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/test-double-vlen2-wrappers.c: Added sincos entry.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_sincos2_vsx.c: New file.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_sincos2_vsx.h: Likewise.
+ * sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist: SIMD sincos added.
+
2019-04-03 Bert Tenjy <bert.tenjy@gmail.com>
[BZ #24206]
@@ -17,6 +17,7 @@ Major new features:
- single-precision cosine: cosf
- double-precision sine: sin
- single-precision cosine: sinf
+ - double-precision sincos: sincos
GCC support for auto-vectorization of functions on PPC64 is not yet
available. Until that is done, the new vector math functions are
@@ -36,6 +36,8 @@
# ifdef __DECL_SIMD_PPC64
# undef __DECL_SIMD_cos
# define __DECL_SIMD_cos __DECL_SIMD_PPC64
+# undef __DECL_SIMD_sincos
+# define __DECL_SIMD_sincos __DECL_SIMD_PPC64
# endif
#endif
@@ -2616,6 +2616,9 @@ ifloat128: 3
ildouble: 7
ldouble: 7
+Function: "sincos_vlen2":
+double: 2
+
Function: "sinh":
double: 2
float: 2
@@ -1,5 +1,6 @@
libmvec {
GLIBC_2.30 {
_ZGVbN2v_cos; _ZGVbN4v_cosf; _ZGVbN2v_sin; _ZGVbN4v_sinf;
+ _ZGVbN2vvv_sincos;
}
}
@@ -45,11 +45,13 @@ endif
ifeq ($(subdir),mathvec)
libmvec-sysdep_routines += vec_d_cos2_vsx vec_s_cosf4_vsx \
- vec_d_sin2_vsx vec_s_sinf4_vsx
+ vec_d_sin2_vsx vec_s_sinf4_vsx \
+ vec_d_sincos2_vsx
CFLAGS-vec_d_cos2_vsx.c += -mabi=altivec -maltivec -mvsx
CFLAGS-vec_s_cosf4_vsx.c += -mabi=altivec -maltivec -mvsx
CFLAGS-vec_d_sin2_vsx.c += -mabi=altivec -maltivec -mvsx
CFLAGS-vec_s_sinf4_vsx.c += -mabi=altivec -maltivec -mvsx
+CFLAGS-vec_d_sincos2_vsx.c += -mabi=altivec -maltivec -mvsx
endif
# Variables for libmvec tests.
@@ -57,7 +59,7 @@ ifeq ($(subdir),math)
ifeq ($(build-mathvec),yes)
libmvec-tests += double-vlen2 float-vlen4
-double-vlen2-funcs = cos sin
+double-vlen2-funcs = cos sin sincos
float-vlen4-funcs = cos sin
double-vlen2-arch-ext-cflags = -mabi=altivec -maltivec -mvsx -DREQUIRE_VSX
@@ -23,3 +23,5 @@
VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVbN2v_cos)
VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVbN2v_sin)
+
+VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincos), _ZGVbN2vvv_sincos)
new file mode 100644
@@ -0,0 +1,33 @@
+/* Function sincos vectorized with VSX.
+ Copyright (C) 2019 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <math.h>
+#include "vec_d_trig_data.h"
+#include "vec_d_sincos2_vsx.h"
+
+void
+_ZGVbN2vvv_sincos (vector double x, vector double * sines_x, vector double * cosines_x)
+{
+
+ /* Call vector sine evaluator. */
+ *sines_x = __d_sin_poly_eval(x);
+
+ /* Call vector cosine evaluator. */
+ *cosines_x = __d_cos_poly_eval(x);
+
+} /* Function _ZGVbN2_vvv_sincos. */
new file mode 100644
@@ -0,0 +1,169 @@
+/* Definitions to simplify code by allowing reuse of sine and cosine
+ function implementations.
+ Copyright (C) 2019 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <math.h>
+#include "vec_d_trig_data.h"
+
+static inline vector double
+__d_cos_poly_eval (vector double x)
+{
+
+ /*
+ ARGUMENT RANGE REDUCTION:
+ Add Pi/2 to argument: X' = X+Pi/2. */
+ vector double x_prime = (vector double) __d_half_pi + x;
+
+ /* Get absolute argument value: X' = |X'|. */
+ vector double abs_x_prime = vec_abs (x_prime);
+
+ /* Y = X'*InvPi + RS : right shifter add. */
+ vector double y = (x_prime * __d_inv_pi) + __d_rshifter;
+
+ /* Check for large arguments path. */
+ vector bool long long large_in = vec_cmpgt (abs_x_prime, __d_rangeval);
+
+ /* N = Y - RS : right shifter sub. */
+ vector double n = y - __d_rshifter;
+
+ /* SignRes = Y<<63 : shift LSB to MSB place for result sign. */
+ vector double sign_res = (vector double) vec_sl ((vector long long) y,
+ (vector unsigned long long)
+ vec_splats (63));
+
+ /* N = N - 0.5. */
+ n = n - __d_one_half;
+
+ /* R = X - N*Pi1. */
+ vector double r = x - (n * __d_pi1_fma);
+
+ /* R = R - N*Pi2. */
+ r = r - (n * __d_pi2_fma);
+
+ /* R = R - N*Pi3. */
+ r = r - (n * __d_pi3_fma);
+
+ /* R2 = R*R. */
+ vector double r2 = r * r;
+
+ /* Poly = C3+R2*(C4+R2*(C5+R2*(C6+R2*C7))). */
+ vector double poly = r2 * __d_coeff7 + __d_coeff6;
+ poly = poly * r2 + __d_coeff5;
+ poly = poly * r2 + __d_coeff4;
+ poly = poly * r2 + __d_coeff3;
+
+ /* Poly = R+R*(R2*(C1+R2*(C2+R2*Poly))). */
+ poly = poly * r2 + __d_coeff2;
+ poly = poly * r2 + __d_coeff1;
+ poly = poly * r2 * r + r;
+
+ /*
+ RECONSTRUCTION:
+ Final sign setting: Res = Poly^SignRes. */
+ vector double out
+ = (vector double) ((vector long long) poly ^ (vector long long) sign_res);
+
+ if (large_in[0] != 0)
+ out[0] = cos (x[0]);
+
+ if (large_in[1] != 0)
+ out[1] = cos (x[1]);
+
+ return out;
+
+} /* Function __d_cos_poly_eval. */
+
+static inline vector double
+__d_sin_poly_eval (vector double x)
+{
+ /* ALGORITHM DESCRIPTION:
+
+ ( low accuracy ( < 4ulp ) or enhanced performance
+ ( half of correct mantissa ) implementation )
+
+ Argument representation:
+ arg = N*Pi + R
+
+ Result calculation:
+ sin(arg) = sin(N*Pi + R) = (-1)^N * sin(R)
+ sin(R) is approximated by corresponding polynomial. */
+
+ /* ARGUMENT RANGE REDUCTION: X' = |X|. */
+ vector double abs_x_prime = vec_abs (x);
+
+ /* Y = X'*InvPi + RS : right shifter add. */
+ vector double y = (abs_x_prime * __d_inv_pi) + __d_rshifter;
+
+ /* N = Y - RS : right shifter sub. */
+ vector double n = y - __d_rshifter;
+
+ /* SignRes = Y<<63 : shift LSB to MSB place for result sign. */
+ vector double sign_res = (vector double) vec_sl ((vector long long) y,
+ (vector unsigned long long)
+ vec_splats (63));
+
+ /* Check for large arguments path. */
+ vector bool long long large_in = vec_cmpgt (abs_x_prime, __d_rangeval);
+
+ /* R = X' - N*Pi1. */
+ vector double r = abs_x_prime - (n * __d_pi1_fma);
+
+ /* R = R - N*Pi2. */
+ r = r - (n * __d_pi2_fma);
+
+ /* R = R - N*Pi3. */
+ r = r - (n * __d_pi3_fma);
+
+ /* POLYNOMIAL APPROXIMATION: R2 = R*R. */
+ vector double r2 = r * r;
+
+ /* R = R^SignRes : update sign of reduced argument. */
+ vector double r_sign
+ = (vector double) ((vector long long) r ^ (vector long long) sign_res);
+
+ /* Poly = C3+R2*(C4+R2*(C5+R2*(C6+R2*C7))). */
+ vector double poly = r2 * __d_coeff7_sin + __d_coeff6_sin;
+ poly = poly * r2 + __d_coeff5_sin;
+ poly = poly * r2 + __d_coeff4_sin;
+ poly = poly * r2 + __d_coeff3_sin;
+
+ /* Poly = R2*(C1+R2*(C2+R2*Poly)). */
+ poly = poly * r2 + __d_coeff2_sin;
+ poly = poly * r2 + __d_coeff1_sin;
+ poly = poly * r2;
+
+ /* Poly = Poly*R + R. */
+ poly = poly * r_sign + r_sign;
+
+ /* SignX: -ve sign bit of X. */
+ vector double neg_sign
+ = (vector double) vec_andc ((vector bool long long) x, __d_abs_mask);
+
+ /* RECONSTRUCTION: Final sign setting: Res = Poly^SignX. */
+ vector double out
+ = (vector double) ((vector long long) poly ^ (vector long long) neg_sign);
+
+ if (large_in[0] != 0)
+ out[0] = sin (x[0]);
+
+ if (large_in[1] != 0)
+ out[1] = sin (x[1]);
+
+ return out;
+
+} /* Function __d_sin_poly_eval. */
@@ -1,4 +1,5 @@
GLIBC_2.30 _ZGVbN2v_cos F
GLIBC_2.30 _ZGVbN2v_sin F
+GLIBC_2.30 _ZGVbN2vvv_sincos F
GLIBC_2.30 _ZGVbN4v_cosf F
GLIBC_2.30 _ZGVbN4v_sinf F
--
2.20.1