[v1,6/12] PPC64: Add libmvec SIMD single-precision sincosf function.
Commit Message
1. This implementation is basically a combination of the single-precision cosine and sine
functions. Those are in, respectively, patches No. 2 and No. 4 in this sequence.
2. As sincosf returns both a vector of sines and a vector of cosines, the ABI used requires
that: the caller of sincosf pass, as input arguments 2 and 3, pointers to vector floats in
which the sine and cosine results will be stored.
Comments
GT <tnggil@protonmail.com> writes:
> diff --git a/ChangeLog b/ChangeLog
> index d56d1f2a28..ecf71c6c69 100644
> --- a/ChangeLog
> +++ b/ChangeLog
> @@ -1,3 +1,18 @@
> +2019-04-07 Bert Tenjy <bert.tenjy@gmail.com>
> +
> + [BZ #24207]
> + * NEWS: Note the addition of PPC64 vector sincosf.
> + * sysdeps/powerpc/bits/math-vector.h: Added sincosf entry.
> + * sysdeps/powerpc/fpu/libm-test-ulps: Regenerated.
> + * sysdeps/powerpc/powerpc64/fpu/Versions: Added sincosf entry.
> + * sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile: (libmvec-sysdep_routines)
> + (CFLAGS-vec_s_sincosf4_vsx.c, float-vlen4-funcs): Added build of VSX SIMD sincosf function
> + and its tests.
> + * sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c: Added sincosf entry.
> + * sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_sincosf4_vsx.c: New file.
> + * sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_sincosf4_vsx.h: Likewise.
> + * sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist: SIMD sincosf added.
> +
Likewise.
Fixed and pushed to branch tuliom/libmvec.
Thanks!
From 5a03c4d8b7bc6d69d286f7411283e3892dd4ee26 Mon Sep 17 00:00:00 2001
From: Bert Tenjy <bert.tenjy@gmail.com>
Date: Sun, 7 Apr 2019 05:13:47 +0000
Subject: [PATCH v1 6/12] PPC64: Add libmvec SIMD single-precision sincosf
function.
[BZ #24207]
Implements single-precision vector sincosf function. The polynomial approxima-
ting algorithm is adapted for PPC64 from x86_64 [commit #a6336cc446].
The patch has been tested on PPC64/POWER8 Little Endian and Big Endian.
Testing uses the framework created for libmvec on x86_64 which runs tests on
issuing 'make check'. Tests of the new vector sincosf function all pass.
---
ChangeLog | 15 ++
NEWS | 1 +
sysdeps/powerpc/bits/math-vector.h | 2 +
sysdeps/powerpc/fpu/libm-test-ulps | 3 +
sysdeps/powerpc/powerpc64/fpu/Versions | 2 +-
.../powerpc/powerpc64/fpu/multiarch/Makefile | 5 +-
.../fpu/multiarch/test-float-vlen4-wrappers.c | 2 +
.../fpu/multiarch/vec_s_sincosf4_vsx.c | 33 +++
.../fpu/multiarch/vec_s_sincosf4_vsx.h | 203 ++++++++++++++++++
.../linux/powerpc/powerpc64/libmvec.abilist | 1 +
10 files changed, 264 insertions(+), 3 deletions(-)
create mode 100644 sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_sincosf4_vsx.c
create mode 100644 sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_sincosf4_vsx.h
@@ -1,3 +1,18 @@
+2019-04-07 Bert Tenjy <bert.tenjy@gmail.com>
+
+ [BZ #24207]
+ * NEWS: Note the addition of PPC64 vector sincosf.
+ * sysdeps/powerpc/bits/math-vector.h: Added sincosf entry.
+ * sysdeps/powerpc/fpu/libm-test-ulps: Regenerated.
+ * sysdeps/powerpc/powerpc64/fpu/Versions: Added sincosf entry.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile: (libmvec-sysdep_routines)
+ (CFLAGS-vec_s_sincosf4_vsx.c, float-vlen4-funcs): Added build of VSX SIMD sincosf function
+ and its tests.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c: Added sincosf entry.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_sincosf4_vsx.c: New file.
+ * sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_sincosf4_vsx.h: Likewise.
+ * sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist: SIMD sincosf added.
+
2019-04-06 Bert Tenjy <bert.tenjy@gmail.com>
[BZ #24207]
@@ -18,6 +18,7 @@ Major new features:
- double-precision sine: sin
- single-precision cosine: sinf
- double-precision sincos: sincos
+ - single-precision sincos: sincosf
GCC support for auto-vectorization of functions on PPC64 is not yet
available. Until that is done, the new vector math functions are
@@ -38,6 +38,8 @@
# define __DECL_SIMD_cos __DECL_SIMD_PPC64
# undef __DECL_SIMD_sincos
# define __DECL_SIMD_sincos __DECL_SIMD_PPC64
+# undef __DECL_SIMD_sincosf
+# define __DECL_SIMD_sincosf __DECL_SIMD_PPC64
# endif
#endif
@@ -2619,6 +2619,9 @@ ldouble: 7
Function: "sincos_vlen2":
double: 2
+Function: "sincos_vlen4":
+float: 1
+
Function: "sinh":
double: 2
float: 2
@@ -1,6 +1,6 @@
libmvec {
GLIBC_2.30 {
_ZGVbN2v_cos; _ZGVbN4v_cosf; _ZGVbN2v_sin; _ZGVbN4v_sinf;
- _ZGVbN2vvv_sincos;
+ _ZGVbN2vvv_sincos; _ZGVbN4vvv_sincosf;
}
}
@@ -46,12 +46,13 @@ endif
ifeq ($(subdir),mathvec)
libmvec-sysdep_routines += vec_d_cos2_vsx vec_s_cosf4_vsx \
vec_d_sin2_vsx vec_s_sinf4_vsx \
- vec_d_sincos2_vsx
+ vec_d_sincos2_vsx vec_s_sincosf4_vsx
CFLAGS-vec_d_cos2_vsx.c += -mabi=altivec -maltivec -mvsx
CFLAGS-vec_s_cosf4_vsx.c += -mabi=altivec -maltivec -mvsx
CFLAGS-vec_d_sin2_vsx.c += -mabi=altivec -maltivec -mvsx
CFLAGS-vec_s_sinf4_vsx.c += -mabi=altivec -maltivec -mvsx
CFLAGS-vec_d_sincos2_vsx.c += -mabi=altivec -maltivec -mvsx
+CFLAGS-vec_s_sincosf4_vsx.c += -mabi=altivec -maltivec -mvsx
endif
# Variables for libmvec tests.
@@ -60,7 +61,7 @@ ifeq ($(build-mathvec),yes)
libmvec-tests += double-vlen2 float-vlen4
double-vlen2-funcs = cos sin sincos
-float-vlen4-funcs = cos sin
+float-vlen4-funcs = cos sin sincos
double-vlen2-arch-ext-cflags = -mabi=altivec -maltivec -mvsx -DREQUIRE_VSX
float-vlen4-arch-ext-cflags = -mabi=altivec -maltivec -mvsx -DREQUIRE_VSX
@@ -23,3 +23,5 @@
VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVbN4v_cosf)
VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVbN4v_sinf)
+
+VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincosf), _ZGVbN4vvv_sincosf)
new file mode 100644
@@ -0,0 +1,33 @@
+/* Function sincosf vectorized with VSX.
+ Copyright (C) 2019 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <math.h>
+#include "vec_s_trig_data.h"
+#include "vec_s_sincosf4_vsx.h"
+
+void
+_ZGVbN4vvv_sincosf (vector float x, vector float * sines_x, vector float * cosines_x)
+{
+
+ /* Call vector sine evaluator. */
+ *sines_x = __s_sin_poly_eval(x);
+
+ /* Call vector cosine evaluator. */
+ *cosines_x = __s_cos_poly_eval(x);
+
+} /* Function _ZGVbN4_vvv_sincosf. */
new file mode 100644
@@ -0,0 +1,203 @@
+/* Definitions to simplify code by allowing reuse of sine and cosine
+ function implementations.
+ Copyright (C) 2019 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <math.h>
+#include "vec_s_trig_data.h"
+
+static inline vector float
+__s_cos_poly_eval (vector float x)
+{
+
+ /*
+ ALGORITHM DESCRIPTION:
+
+ 1) Range reduction to [-Pi/2; +Pi/2] interval
+ a) We remove sign using absolute value operation
+ b) Add Pi/2 value to argument X for Cos to Sin transformation
+ c) Getting octant Y by 1/Pi multiplication
+ d) Add "Right Shifter" value
+ e) Treat obtained value as integer for destination sign setting.
+ Shift first bit of this value to the last (sign) position
+ f) Subtract "Right Shifter" value
+ g) Subtract 0.5 from result for octant correction
+ h) Subtract Y*PI from X argument, where PI divided to 4 parts:
+ X = X - Y*PI1 - Y*PI2 - Y*PI3 - Y*PI4;
+ 2) Polynomial (minimax for sin within [-Pi/2; +Pi/2] interval)
+ a) Calculate X^2 = X * X
+ b) Calculate polynomial:
+ R = X + X * X^2 * (A3 + x^2 * (A5 + .....
+ 3) Destination sign setting
+ a) Set shifted destination sign using XOR operation:
+ R = XOR( R, S ). */
+
+ /*
+ ARGUMENT RANGE REDUCTION:
+ Add Pi/2 to argument: X' = X+Pi/2. Transforms cos to sin. */
+ vector float x_prime = __s_half_pi + x;
+
+ /* Y = X'*InvPi + RS : right shifter add. */
+ vector float y = (x_prime * __s_inv_pi) + __s_rshifter;
+
+ /* N = Y - RS : right shifter sub. */
+ vector float n = y - __s_rshifter;
+
+ /* SignRes = Y<<31 : shift LSB to MSB place for result sign. */
+ vector float sign_res = (vector float)
+ vec_sl ((vector signed int) y, (vector unsigned int) vec_splats (31));
+
+ /* N = N - 0.5. */
+ n = n - __s_one_half;
+
+ /* Get absolute argument value: X = |X|. */
+ vector float abs_x = vec_abs (x);
+
+ /* Check for large arguments path. */
+ vector bool int large_in = vec_cmpgt (abs_x, __s_rangeval);
+
+ /* R = X - N*Pi1. */
+ vector float r = x - (n * __s_pi1_fma);
+
+ /* R = R - N*Pi2. */
+ r = r - (n * __s_pi2_fma);
+
+ /* R = R - N*Pi3. */
+ r = r - (n * __s_pi3_fma);
+
+ /* R2 = R*R. */
+ vector float r2 = r * r;
+
+ /* RECONSTRUCTION:
+ Final sign setting: Res = Poly^SignRes. */
+ vector float res = (vector float)
+ ((vector signed int) r ^ (vector signed int) sign_res);
+
+ /* Poly = R + R * R2*(A3+R2*(A5+R2*(A7+R2*A9))). */
+ vector float poly = r2 * __s_a9_fma + __s_a7_fma;
+ poly = poly * r2 + __s_a5_fma;
+ poly = poly * r2 + __s_a3;
+ poly = poly * r2 * res + res;
+
+ if (large_in[0])
+ poly[0] = cosf (x[0]);
+
+ if (large_in[1])
+ poly[1] = cosf (x[1]);
+
+ if (large_in[2])
+ poly[2] = cosf (x[2]);
+
+ if (large_in[3])
+ poly[3] = cosf (x[3]);
+
+ return poly;
+
+} /* Function __s_cos_poly_eval. */
+
+static inline vector float
+__s_sin_poly_eval (vector float x)
+{
+
+ /*
+ ALGORITHM DESCRIPTION:
+
+ 1) Range reduction to [-Pi/2; +Pi/2] interval
+ a) Grab sign from source argument and save it.
+ b) Remove sign using AND operation
+ c) Getting octant Y by 1/Pi multiplication
+ d) Add "Right Shifter" value
+ e) Treat obtained value as integer for destination sign setting.
+ Shift first bit of this value to the last (sign) position
+ f) Change destination sign if source sign is negative
+ using XOR operation.
+ g) Subtract "Right Shifter" value
+ h) Subtract Y*PI from X argument, where PI divided to 4 parts:
+ X = X - Y*PI1 - Y*PI2 - Y*PI3 - Y*PI4;
+ 2) Polynomial (minimax for sin within [-Pi/2; +Pi/2] interval)
+ a) Calculate X^2 = X * X
+ b) Calculate polynomial:
+ R = X + X * X^2 * (A3 + x^2 * (A5 + ......
+ 3) Destination sign setting
+ a) Set shifted destination sign using XOR operation:
+ R = XOR( R, S ). */
+
+
+ /* Remove sign of input argument: X'=|X|. */
+ vector float abs_x = vec_abs (x);
+
+ /* Getting octant Y by 1/Pi multiplication. Add "Right Shifter" value.
+ Y = X'*InvPi + RS. */
+ vector float y = (abs_x * __s_inv_pi) + __s_rshifter;
+
+ /* N = Y - RS : right shifter sub. */
+ vector float n = y - __s_rshifter;
+
+ /* SignRes = Y<<31 : shift LSB to MSB place for result sign. */
+ vector float sign_res = (vector float)
+ vec_sl ((vector signed int) y, (vector unsigned int) vec_splats (31));
+
+ /* Subtract N*PI from X argument, where PI divided into 3 parts. */
+ /* R = X - N*PI1 - N*PI2 - N*PI3. */
+ vector float r = abs_x - (n * __s_pi1_fma);
+
+ /* R = R - N*Pi2. */
+ r = r - (n * __s_pi2_fma);
+
+ /* R = R - N*Pi3. */
+ r = r - (n * __s_pi3_fma);
+
+ /* Check for large arguments path. */
+ vector bool int large_in = vec_cmpgt (abs_x, __s_rangeval);
+
+ /* Polynomial (minimax for sin within [-Pi/2; +Pi/2] interval). */
+
+ /* R2 = R*R. */
+ vector float r2 = r * r;
+
+ /* Change destination sign if source sign is -ve using XOR operation. */
+ vector float neg_sign = vec_andc (x, __s_abs_mask);
+
+ vector float res = (vector float)
+ ((vector signed int) r ^ (vector signed int) sign_res);
+
+ /* Poly = R + R * R2*(A3+R2*(A5+R2*(A7+R2*A9))). */
+ vector float poly = r2 * __s_a9_fma + __s_a7_fma;
+ poly = poly * r2 + __s_a5_fma;
+ poly = poly * r2 + __s_a3;
+ poly = poly * r2 * res + res;
+
+/* Destination sign setting.
+ Set shifted destination sign using XOR operation: R = XOR( R, S ). */
+ vector float out
+ = (vector float) ((vector int) poly ^ (vector int) neg_sign);
+
+ if (large_in[0])
+ out[0] = sinf (x[0]);
+
+ if (large_in[1])
+ out[1] = sinf (x[1]);
+
+ if (large_in[2])
+ out[2] = sinf (x[2]);
+
+ if (large_in[3])
+ out[3] = sinf (x[3]);
+
+ return out;
+
+} /* Function __s_sin_poly_eval. */
@@ -3,3 +3,4 @@ GLIBC_2.30 _ZGVbN2v_sin F
GLIBC_2.30 _ZGVbN2vvv_sincos F
GLIBC_2.30 _ZGVbN4v_cosf F
GLIBC_2.30 _ZGVbN4v_sinf F
+GLIBC_2.30 _ZGVbN4vvv_sincosf F
--
2.20.1