PPC64: Adds SIMD single-prec. sine function (sinf) for POWER8.

Message ID	aS44h8dkbz2wijTmW917sZTr_UQQf0ukld74m6X3UjMYe_lPxGwaNZMhBjtXhEv1GA4WLAJHtx7Vjt04Hd3jWZXE3uAoiIzQJYDglExUqsM=@protonmail.com
State	Superseded
Headers	Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk Sender: libc-alpha-owner@sourceware.org Date: Tue, 05 Mar 2019 19:16:49 +0000 To: "libc-alpha@sourceware.org" <libc-alpha@sourceware.org>, "tnggil@protonmail.com" <tnggil@protonmail.com> From: GT <tnggil@protonmail.com> Reply-To: GT <tnggil@protonmail.com> Subject: [PATCH] PPC64: Adds SIMD single-prec. sine function (sinf) for POWER8. Message-ID: <aS44h8dkbz2wijTmW917sZTr_UQQf0ukld74m6X3UjMYe_lPxGwaNZMhBjtXhEv1GA4WLAJHtx7Vjt04Hd3jWZXE3uAoiIzQJYDglExUqsM=@protonmail.com> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="b1_a51a0cfe9b6867c24cb431fbbc8a78e1"

From 3cdf7cd68433b373bb4ed1a2f6ace8083b0a66ed Mon Sep 17 00:00:00 2001 From: Bert Tenjy <bert.tenjy@gmail.com> Date: Tue, 5 Mar 2019 18:59:26 +0000 Subject: [PATCH] PPC64: Adds SIMD single-prec. sine function (sinf) for POWER8. [BZ #24206] Implements single-precision vector sine function. The polynomial sine-approximating algorithm is adapted for PPC64 from x86_64 [commit #2a8c2c7b33]. The patch has been tested on PPC64/POWER8 Little Endian and Big Endian. Testing uses the framework created for libmvec on x86_64 which runs tests on issuing 'make check'. Tests of the new vector single-precision sine function all pass. --- ChangeLog | 19 ++- NEWS | 3 +- sysdeps/powerpc/bits/math-vector.h | 2 + sysdeps/powerpc/fpu/libm-test-ulps | 3 + sysdeps/powerpc/powerpc64/fpu/Versions | 2 +- .../powerpc/powerpc64/fpu/multiarch/Makefile | 5 +- .../fpu/multiarch/test-float-vlen4-wrappers.c | 1 + .../powerpc64/fpu/multiarch/vec_s_sinf4_vsx.c | 113 ++++++++++++++++++ .../powerpc64/fpu/multiarch/vec_s_trig_data.h | 28 +++-- .../linux/powerpc/powerpc64/libmvec.abilist | 1 + 10 files changed, 160 insertions(+), 17 deletions(-) create mode 100644 sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_sinf4_vsx.c diff --git a/ChangeLog b/ChangeLog index 75b3d2978f..21e3f4c494 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,21 @@ +2019-03-05 <bert.tenjy@gmail.com> + + [BZ #24206] + + * NEWS: Note the addition of PPC64 vector sinf. + * sysdeps/powerpc/bits/math-vector.h: Added sinf SIMD declaration. + * sysdeps/powerpc/fpu/libm-test-ulps: Regenerated. + * sysdeps/powerpc/powerpc64/fpu/Versions: Added sinf entry. + * sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile: (libmvec-sysdep_routines) + (CFLAGS-vec_s_sinf4_vsx.c, libmvec-tests, float-vlen2-funcs) + (float-vlen2-arch-ext-cflags): Added build of VSX SIMD sinf function + and its tests. + * sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c: Added sinf entry. + * sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_sinf4_vsx.c: New file. + * sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_trig_data.h: Added constant used in + computing single-precision vector sine. + * sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist: SIMD sinf function added. + 2019-03-04 <bert.tenjy@gmail.com> [BZ #24206] @@ -56,7 +74,6 @@ * sysdeps/powerpc/powerpc64/fpu/vec_finite_alias.c: Likewise. * sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist: Likewise. - 2019-02-26 Joseph Myers <joseph@codesourcery.com> * sysdeps/arm/sysdep.h (#if condition): Break lines before rather diff --git a/NEWS b/NEWS index bbb86dfd41..4fa32ab15b 100644 --- a/NEWS +++ b/NEWS @@ -6,11 +6,12 @@ Please send GNU C library bug reports via <https://sourceware.org/bugzilla/> using `glibc' in the "product" field. -* Continuing implementation of vector math library libmvec on PPC64/POWER8. +* Adding to implementation of vector math library libmvec on PPC64/POWER8. The following functions now have vector versions: - cos (double-precision cosine) - cosf (single-precision cosine) - sin (double-precision sine) + - sinf (single-precision sine) GCC support for auto-vectorization of functions on PPC64 is not yet available. Until that is done, the new vector math functions are diff --git a/sysdeps/powerpc/bits/math-vector.h b/sysdeps/powerpc/bits/math-vector.h index 0327f204a9..b4929e40cb 100644 --- a/sysdeps/powerpc/bits/math-vector.h +++ b/sysdeps/powerpc/bits/math-vector.h @@ -40,6 +40,8 @@ # define __DECL_SIMD_cosf __DECL_SIMD_PPC64 # undef __DECL_SIMD_sin # define __DECL_SIMD_sin __DECL_SIMD_PPC64 +# undef __DECL_SIMD_sinf +# define __DECL_SIMD_sinf __DECL_SIMD_PPC64 # endif #endif diff --git a/sysdeps/powerpc/fpu/libm-test-ulps b/sysdeps/powerpc/fpu/libm-test-ulps index e72452e757..32a7a8483c 100644 --- a/sysdeps/powerpc/fpu/libm-test-ulps +++ b/sysdeps/powerpc/fpu/libm-test-ulps @@ -2573,6 +2573,9 @@ ldouble: 5 Function: "sin_vlen2": double: 2 +Function: "sin_vlen4": +float: 1 + Function: "sincos": double: 1 float: 1 diff --git a/sysdeps/powerpc/powerpc64/fpu/Versions b/sysdeps/powerpc/powerpc64/fpu/Versions index 4852a2791f..f7c8fd886b 100644 --- a/sysdeps/powerpc/powerpc64/fpu/Versions +++ b/sysdeps/powerpc/powerpc64/fpu/Versions @@ -1,5 +1,5 @@ libmvec { GLIBC_2.30 { - _ZGVbN2v_cos; _ZGVbN4v_cosf; _ZGVbN2v_sin; + _ZGVbN2v_cos; _ZGVbN4v_cosf; _ZGVbN2v_sin; _ZGVbN4v_sinf; } } diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile index 20a3721854..0fbe4c7077 100644 --- a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile @@ -45,10 +45,11 @@ endif ifeq ($(subdir),mathvec) libmvec-sysdep_routines += vec_d_cos2_vsx vec_s_cosf4_vsx \ - vec_d_sin2_vsx + vec_d_sin2_vsx vec_s_sinf4_vsx CFLAGS-vec_d_cos2_vsx.c += -mvsx CFLAGS-vec_s_cosf4_vsx.c += -mvsx CFLAGS-vec_d_sin2_vsx.c += -mvsx +CFLAGS-vec_s_sinf4_vsx.c += -mvsx endif # Variables for libmvec tests. @@ -57,7 +58,7 @@ ifeq ($(build-mathvec),yes) libmvec-tests += double-vlen2 float-vlen4 double-vlen2-funcs = cos sin -float-vlen4-funcs = cos +float-vlen4-funcs = cos sin double-vlen2-arch-ext-cflags = -mvsx -DREQUIRE_VSX float-vlen4-arch-ext-cflags = -mvsx -DREQUIRE_VSX diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c index f099990d4e..44f94d1c70 100644 --- a/sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c @@ -22,3 +22,4 @@ #define VEC_TYPE vector float VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVbN4v_cosf) +VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVbN4v_sinf) diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_sinf4_vsx.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_sinf4_vsx.c new file mode 100644 index 0000000000..aada446301 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_sinf4_vsx.c @@ -0,0 +1,113 @@ +/* Function sinf vectorized with VSX SIMD. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> +#include "vec_s_trig_data.h" + +vector float +_ZGVbN4v_sinf (vector float x) +{ + + /* + ALGORITHM DESCRIPTION: + + 1) Range reduction to [-Pi/2; +Pi/2] interval + a) Grab sign from source argument and save it. + b) Remove sign using AND operation + c) Getting octant Y by 1/Pi multiplication + d) Add "Right Shifter" value + e) Treat obtained value as integer for destination sign setting. + Shift first bit of this value to the last (sign) position + f) Change destination sign if source sign is negative + using XOR operation. + g) Subtract "Right Shifter" value + h) Subtract Y*PI from X argument, where PI divided to 4 parts: + X = X - Y*PI1 - Y*PI2 - Y*PI3 - Y*PI4; + 2) Polynomial (minimax for sin within [-Pi/2; +Pi/2] interval) + a) Calculate X^2 = X * X + b) Calculate polynomial: + R = X + X * X^2 * (A3 + x^2 * (A5 + ...... + 3) Destination sign setting + a) Set shifted destination sign using XOR operation: + R = XOR( R, S ). */ + + + /* Remove sign of input argument: X'=|X|. */ + vector float abs_x = vec_abs (x); + + /* Getting octant Y by 1/Pi multiplication. Add "Right Shifter" value. + Y = X'*InvPi + RS. */ + vector float y = (abs_x * __s_inv_pi) + __s_rshifter; + + /* N = Y - RS : right shifter sub. */ + vector float n = y - __s_rshifter; + + /* SignRes = Y<<31 : shift LSB to MSB place for result sign. */ + vector float sign_res = (vector float) + vec_sl ((vector signed int) y, (vector unsigned int) vec_splats (31)); + + /* Subtract N*PI from X argument, where PI divided into 3 parts. */ + /* R = X - N*PI1 - N*PI2 - N*PI3. */ + vector float r = abs_x - (n * __s_pi1_fma); + + /* R = R - N*Pi2. */ + r = r - (n * __s_pi2_fma); + + /* R = R - N*Pi3. */ + r = r - (n * __s_pi3_fma); + + /* Check for large arguments path. */ + vector bool int large_in = vec_cmpgt (abs_x, __s_rangeval); + + /* Polynomial (minimax for sin within [-Pi/2; +Pi/2] interval). */ + + /* R2 = R*R. */ + vector float r2 = r * r; + + /* Change destination sign if source sign is -ve using XOR operation. */ + vector float neg_sign = vec_andc (x, __s_abs_mask); + + vector float res = (vector float) + ((vector signed int) r ^ (vector signed int) sign_res); + + /* Poly = R + R * R2*(A3+R2*(A5+R2*(A7+R2*A9))). */ + vector float poly = r2 * __s_a9_fma + __s_a7_fma; + poly = poly * r2 + __s_a5_fma; + poly = poly * r2 + __s_a3; + poly = poly * r2 * res + res; + +/* Destination sign setting. + Set shifted destination sign using XOR operation: R = XOR( R, S ). */ + vector float out + = (vector float) ((vector int) poly ^ (vector int) neg_sign); + + if (large_in[0]) + out[0] = sinf (x[0]); + + if (large_in[1]) + out[1] = sinf (x[1]); + + if (large_in[2]) + out[2] = sinf (x[2]); + + if (large_in[3]) + out[3] = sinf (x[3]); + + return out; + +} /* Function _ZGVbN4v_sinf. */ diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_trig_data.h b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_trig_data.h index 55c28563e7..5e1667afa0 100644 --- a/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_trig_data.h +++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_trig_data.h @@ -23,50 +23,54 @@ #include <altivec.h> /* PI/2. */ -const vector float __s_half_pi = +static const vector float __s_half_pi = { 0x1.921fb6p+0, 0x1.921fb6p+0, 0x1.921fb6p+0, 0x1.921fb6p+0 }; /* Inverse PI. */ -const vector float __s_inv_pi = +static const vector float __s_inv_pi = { 0x1.45f306p-2, 0x1.45f306p-2, 0x1.45f306p-2, 0x1.45f306p-2 }; /* Right-shifter constant. */ -const vector float __s_rshifter = +static const vector float __s_rshifter = { 0x1.8p+23, 0x1.8p+23, 0x1.8p+23, 0x1.8p+23 }; /* One-half. */ -const vector float __s_one_half = +static const vector float __s_one_half = { 0x1p-1, 0x1p-1, 0x1p-1, 0x1p-1 }; /* Threshold for out-of-range values. */ -const vector float __s_rangeval = +static const vector float __s_rangeval = { 0x1.388p+13, 0x1.388p+13, 0x1.388p+13, 0x1.388p+13 }; /* PI1, PI2, and PI3 when FMA is available PI high part (when FMA available). */ -const vector float __s_pi1_fma = +static const vector float __s_pi1_fma = { 0x1.921fb6p+1, 0x1.921fb6p+1, 0x1.921fb6p+1, 0x1.921fb6p+1 }; /* PI mid part (when FMA available). */ -const vector float __s_pi2_fma = +static const vector float __s_pi2_fma = { -0x1.777a5cp-24, -0x1.777a5cp-24, -0x1.777a5cp-24, -0x1.777a5cp-24 }; /* PI low part (when FMA available). */ -const vector float __s_pi3_fma = +static const vector float __s_pi3_fma = { -0x1.ee59dap-49, -0x1.ee59dap-49, -0x1.ee59dap-49, -0x1.ee59dap-49 }; /* Polynomial constants for work w/o FMA, relative error ~ 2^(-26.625). */ -const vector float __s_a3 = +static const vector float __s_a3 = { -0x1.55554cp-3, -0x1.55554cp-3, -0x1.55554cp-3, -0x1.55554cp-3 }; /* Polynomial constants, work with FMA, relative error ~ 2^(-26.417). */ -const vector float __s_a5_fma = +static const vector float __s_a5_fma = { 0x1.110edp-7, 0x1.110edp-7, 0x1.110edp-7, 0x1.110edp-7 }; -const vector float __s_a7_fma = +static const vector float __s_a7_fma = { -0x1.9f6d9ep-13, -0x1.9f6d9ep-13, -0x1.9f6d9ep-13, -0x1.9f6d9ep-13 }; -const vector float __s_a9_fma = +static const vector float __s_a9_fma = { 0x1.5d866ap-19, 0x1.5d866ap-19, 0x1.5d866ap-19, 0x1.5d866ap-19 }; +/* Absolute value mask. */ +static const vector bool int __s_abs_mask = +{ 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; + #endif /* S_TRIG_DATA_H. */ diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist index a1a7f69d4c..48a742c3ef 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist @@ -1,3 +1,4 @@ GLIBC_2.30 _ZGVbN2v_cos F GLIBC_2.30 _ZGVbN2v_sin F GLIBC_2.30 _ZGVbN4v_cosf F +GLIBC_2.30 _ZGVbN4v_sinf F -- 2.20.1

PPC64: Adds SIMD single-prec. sine function (sinf) for POWER8.

Commit Message

Comments

Patch