Patchwork PPC64: Adds SIMD single-prec. sine function (sinf) for POWER8.

login
register
mail settings
Submitter GT
Date March 5, 2019, 7:16 p.m.
Message ID <aS44h8dkbz2wijTmW917sZTr_UQQf0ukld74m6X3UjMYe_lPxGwaNZMhBjtXhEv1GA4WLAJHtx7Vjt04Hd3jWZXE3uAoiIzQJYDglExUqsM=@protonmail.com>
Download mbox | patch
Permalink /patch/31725/
State New
Headers show

Comments

GT - March 5, 2019, 7:16 p.m.
This adds the single-precision vector sine function to PPC64 libmvec.

It's been tested on POWER8 Little Endian and Big Endian.
Joseph Myers - March 6, 2019, 9:21 p.m.
On Tue, 5 Mar 2019, GT wrote:

> +  /* Change destination sign if source sign is -ve using XOR operation.  */
> + vector float neg_sign = vec_andc (x, __s_abs_mask);

This one line has too little indentation (should be two columns not one).

Patch

From 3cdf7cd68433b373bb4ed1a2f6ace8083b0a66ed Mon Sep 17 00:00:00 2001
From: Bert Tenjy <bert.tenjy@gmail.com>
Date: Tue, 5 Mar 2019 18:59:26 +0000
Subject: [PATCH] PPC64: Adds SIMD single-prec. sine function (sinf) for
 POWER8.

[BZ #24206]

Implements single-precision vector sine function. The polynomial
sine-approximating algorithm is adapted for PPC64 from x86_64 [commit #2a8c2c7b33].

The patch has been tested on PPC64/POWER8 Little Endian and Big Endian.
Testing uses the framework created for libmvec on x86_64 which runs tests on
issuing 'make check'. Tests of the new vector single-precision sine function all pass.
---
 ChangeLog                                     |  19 ++-
 NEWS                                          |   3 +-
 sysdeps/powerpc/bits/math-vector.h            |   2 +
 sysdeps/powerpc/fpu/libm-test-ulps            |   3 +
 sysdeps/powerpc/powerpc64/fpu/Versions        |   2 +-
 .../powerpc/powerpc64/fpu/multiarch/Makefile  |   5 +-
 .../fpu/multiarch/test-float-vlen4-wrappers.c |   1 +
 .../powerpc64/fpu/multiarch/vec_s_sinf4_vsx.c | 113 ++++++++++++++++++
 .../powerpc64/fpu/multiarch/vec_s_trig_data.h |  28 +++--
 .../linux/powerpc/powerpc64/libmvec.abilist   |   1 +
 10 files changed, 160 insertions(+), 17 deletions(-)
 create mode 100644 sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_sinf4_vsx.c

diff --git a/ChangeLog b/ChangeLog
index 75b3d2978f..21e3f4c494 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,21 @@ 
+2019-03-05    <bert.tenjy@gmail.com>
+
+	[BZ #24206]
+
+	* NEWS: Note the addition of PPC64 vector sinf.
+	* sysdeps/powerpc/bits/math-vector.h: Added sinf SIMD declaration.
+	* sysdeps/powerpc/fpu/libm-test-ulps: Regenerated.
+	* sysdeps/powerpc/powerpc64/fpu/Versions: Added sinf entry.
+	* sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile: (libmvec-sysdep_routines)
+	(CFLAGS-vec_s_sinf4_vsx.c, libmvec-tests, float-vlen2-funcs)
+	(float-vlen2-arch-ext-cflags): Added build of VSX SIMD sinf function
+	and its tests.
+	* sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c: Added sinf entry.
+	* sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_sinf4_vsx.c: New file.
+	* sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_trig_data.h: Added constant used in
+	computing single-precision vector sine.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist: SIMD sinf function added.
+
 2019-03-04    <bert.tenjy@gmail.com>
 
 	[BZ #24206]
@@ -56,7 +74,6 @@ 
 	* sysdeps/powerpc/powerpc64/fpu/vec_finite_alias.c: Likewise.
 	* sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist: Likewise.
 
-
 2019-02-26  Joseph Myers  <joseph@codesourcery.com>
 
 	* sysdeps/arm/sysdep.h (#if condition): Break lines before rather
diff --git a/NEWS b/NEWS
index bbb86dfd41..4fa32ab15b 100644
--- a/NEWS
+++ b/NEWS
@@ -6,11 +6,12 @@  Please send GNU C library bug reports via <https://sourceware.org/bugzilla/>
 using `glibc' in the "product" field.
 
 
-* Continuing implementation of vector math library libmvec on PPC64/POWER8.
+* Adding to implementation of vector math library libmvec on PPC64/POWER8.
   The following functions now have vector versions:
   - cos (double-precision cosine)
   - cosf (single-precision cosine)
   - sin (double-precision sine)
+  - sinf (single-precision sine)
 
   GCC support for auto-vectorization of functions on PPC64 is not yet
   available. Until that is done, the new vector math functions are
diff --git a/sysdeps/powerpc/bits/math-vector.h b/sysdeps/powerpc/bits/math-vector.h
index 0327f204a9..b4929e40cb 100644
--- a/sysdeps/powerpc/bits/math-vector.h
+++ b/sysdeps/powerpc/bits/math-vector.h
@@ -40,6 +40,8 @@ 
 #  define __DECL_SIMD_cosf __DECL_SIMD_PPC64
 #  undef __DECL_SIMD_sin
 #  define __DECL_SIMD_sin __DECL_SIMD_PPC64
+#  undef __DECL_SIMD_sinf
+#  define __DECL_SIMD_sinf __DECL_SIMD_PPC64
 
 # endif
 #endif
diff --git a/sysdeps/powerpc/fpu/libm-test-ulps b/sysdeps/powerpc/fpu/libm-test-ulps
index e72452e757..32a7a8483c 100644
--- a/sysdeps/powerpc/fpu/libm-test-ulps
+++ b/sysdeps/powerpc/fpu/libm-test-ulps
@@ -2573,6 +2573,9 @@  ldouble: 5
 Function: "sin_vlen2":
 double: 2
 
+Function: "sin_vlen4":
+float: 1
+
 Function: "sincos":
 double: 1
 float: 1
diff --git a/sysdeps/powerpc/powerpc64/fpu/Versions b/sysdeps/powerpc/powerpc64/fpu/Versions
index 4852a2791f..f7c8fd886b 100644
--- a/sysdeps/powerpc/powerpc64/fpu/Versions
+++ b/sysdeps/powerpc/powerpc64/fpu/Versions
@@ -1,5 +1,5 @@ 
 libmvec {
   GLIBC_2.30 {
-    _ZGVbN2v_cos; _ZGVbN4v_cosf; _ZGVbN2v_sin;
+    _ZGVbN2v_cos; _ZGVbN4v_cosf; _ZGVbN2v_sin; _ZGVbN4v_sinf;
   }
 }
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
index 20a3721854..0fbe4c7077 100644
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
@@ -45,10 +45,11 @@  endif
 
 ifeq ($(subdir),mathvec)
 libmvec-sysdep_routines += vec_d_cos2_vsx vec_s_cosf4_vsx \
-			   vec_d_sin2_vsx
+			   vec_d_sin2_vsx vec_s_sinf4_vsx
 CFLAGS-vec_d_cos2_vsx.c += -mvsx
 CFLAGS-vec_s_cosf4_vsx.c += -mvsx
 CFLAGS-vec_d_sin2_vsx.c += -mvsx
+CFLAGS-vec_s_sinf4_vsx.c += -mvsx
 endif
 
 # Variables for libmvec tests.
@@ -57,7 +58,7 @@  ifeq ($(build-mathvec),yes)
 libmvec-tests += double-vlen2 float-vlen4
 
 double-vlen2-funcs = cos sin
-float-vlen4-funcs = cos
+float-vlen4-funcs = cos sin
 
 double-vlen2-arch-ext-cflags = -mvsx -DREQUIRE_VSX
 float-vlen4-arch-ext-cflags = -mvsx -DREQUIRE_VSX
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c
index f099990d4e..44f94d1c70 100644
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/test-float-vlen4-wrappers.c
@@ -22,3 +22,4 @@ 
 #define VEC_TYPE vector float
 
 VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVbN4v_cosf)
+VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVbN4v_sinf)
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_sinf4_vsx.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_sinf4_vsx.c
new file mode 100644
index 0000000000..aada446301
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_sinf4_vsx.c
@@ -0,0 +1,113 @@ 
+/* Function sinf vectorized with VSX SIMD.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <math.h>
+#include "vec_s_trig_data.h"
+
+vector float
+_ZGVbN4v_sinf (vector float x)
+{
+
+  /*
+    ALGORITHM DESCRIPTION:
+
+    1) Range reduction to [-Pi/2; +Pi/2] interval
+       a) Grab sign from source argument and save it.
+       b) Remove sign using AND operation
+       c) Getting octant Y by 1/Pi multiplication
+       d) Add "Right Shifter" value
+       e) Treat obtained value as integer for destination sign setting.
+          Shift first bit of this value to the last (sign) position
+       f) Change destination sign if source sign is negative
+          using XOR operation.
+       g) Subtract "Right Shifter" value
+       h) Subtract Y*PI from X argument, where PI divided to 4 parts:
+          X = X - Y*PI1 - Y*PI2 - Y*PI3 - Y*PI4;
+    2) Polynomial (minimax for sin within [-Pi/2; +Pi/2] interval)
+       a) Calculate X^2 = X * X
+       b) Calculate polynomial:
+          R = X + X * X^2 * (A3 + x^2 * (A5 + ......
+    3) Destination sign setting
+       a) Set shifted destination sign using XOR operation:
+          R = XOR( R, S ).  */
+
+
+  /* Remove sign of input argument: X'=|X|.  */
+  vector float abs_x = vec_abs (x);
+
+  /* Getting octant Y by 1/Pi multiplication. Add "Right Shifter" value.
+     Y = X'*InvPi + RS.  */
+  vector float y = (abs_x * __s_inv_pi) + __s_rshifter;
+
+  /* N = Y - RS : right shifter sub.  */
+  vector float n = y - __s_rshifter;
+
+  /* SignRes = Y<<31 : shift LSB to MSB place for result sign.  */
+  vector float sign_res = (vector float)
+      vec_sl ((vector signed int) y, (vector unsigned int) vec_splats (31));
+
+  /* Subtract N*PI from X argument, where PI divided into 3 parts.  */
+  /* R = X - N*PI1 - N*PI2 - N*PI3.  */
+  vector float r = abs_x - (n * __s_pi1_fma);
+
+  /* R = R - N*Pi2.  */
+  r = r - (n * __s_pi2_fma);
+
+  /* R = R - N*Pi3.  */
+  r = r - (n * __s_pi3_fma);
+
+  /* Check for large arguments path.  */
+  vector bool int large_in = vec_cmpgt (abs_x, __s_rangeval);
+
+  /* Polynomial (minimax for sin within [-Pi/2; +Pi/2] interval).  */
+
+  /* R2 = R*R.  */
+  vector float r2 = r * r;
+
+  /* Change destination sign if source sign is -ve using XOR operation.  */
+ vector float neg_sign = vec_andc (x, __s_abs_mask);
+
+  vector float res = (vector float)
+      ((vector signed int) r ^ (vector signed int) sign_res);
+
+  /* Poly = R + R * R2*(A3+R2*(A5+R2*(A7+R2*A9))). */
+  vector float poly = r2 * __s_a9_fma + __s_a7_fma;
+  poly = poly * r2 + __s_a5_fma;
+  poly = poly * r2 + __s_a3;
+  poly = poly * r2 * res + res;
+
+/* Destination sign setting.
+   Set shifted destination sign using XOR operation: R = XOR( R, S ).  */
+  vector float out
+      = (vector float) ((vector int) poly ^ (vector int) neg_sign);
+
+  if (large_in[0])
+    out[0] = sinf (x[0]);
+
+  if (large_in[1])
+    out[1] = sinf (x[1]);
+
+  if (large_in[2])
+    out[2] = sinf (x[2]);
+
+  if (large_in[3])
+    out[3] = sinf (x[3]);
+
+  return out;
+
+} /* Function _ZGVbN4v_sinf.  */
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_trig_data.h b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_trig_data.h
index 55c28563e7..5e1667afa0 100644
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_trig_data.h
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_s_trig_data.h
@@ -23,50 +23,54 @@ 
 #include <altivec.h>
 
 /* PI/2.  */
-const vector float __s_half_pi =
+static const vector float __s_half_pi =
 { 0x1.921fb6p+0, 0x1.921fb6p+0, 0x1.921fb6p+0, 0x1.921fb6p+0 };
 
 /* Inverse PI.  */
-const vector float __s_inv_pi =
+static const vector float __s_inv_pi =
 { 0x1.45f306p-2, 0x1.45f306p-2, 0x1.45f306p-2, 0x1.45f306p-2 };
 
 /* Right-shifter constant.  */
-const vector float __s_rshifter =
+static const vector float __s_rshifter =
 { 0x1.8p+23, 0x1.8p+23, 0x1.8p+23, 0x1.8p+23 };
 
 /* One-half.  */
-const vector float __s_one_half =
+static const vector float __s_one_half =
 { 0x1p-1, 0x1p-1, 0x1p-1, 0x1p-1 };
 
 /* Threshold for out-of-range values.  */
-const vector float __s_rangeval =
+static const vector float __s_rangeval =
 { 0x1.388p+13, 0x1.388p+13, 0x1.388p+13, 0x1.388p+13 };
 
 /* PI1, PI2, and PI3 when FMA is available
    PI high part (when FMA available).  */
-const vector float __s_pi1_fma =
+static const vector float __s_pi1_fma =
 { 0x1.921fb6p+1, 0x1.921fb6p+1, 0x1.921fb6p+1, 0x1.921fb6p+1 };
 
 /* PI mid part  (when FMA available).  */
-const vector float __s_pi2_fma =
+static const vector float __s_pi2_fma =
 { -0x1.777a5cp-24, -0x1.777a5cp-24, -0x1.777a5cp-24, -0x1.777a5cp-24 };
 
 /* PI low part  (when FMA available).  */
-const vector float __s_pi3_fma =
+static const vector float __s_pi3_fma =
 { -0x1.ee59dap-49, -0x1.ee59dap-49, -0x1.ee59dap-49, -0x1.ee59dap-49 };
 
 /* Polynomial constants for work w/o FMA, relative error ~ 2^(-26.625).  */
-const vector float __s_a3 =
+static const vector float __s_a3 =
 { -0x1.55554cp-3, -0x1.55554cp-3, -0x1.55554cp-3, -0x1.55554cp-3 };
 
 /* Polynomial constants, work with FMA, relative error ~ 2^(-26.417).  */
-const vector float __s_a5_fma =
+static const vector float __s_a5_fma =
 { 0x1.110edp-7, 0x1.110edp-7, 0x1.110edp-7, 0x1.110edp-7 };
 
-const vector float __s_a7_fma =
+static const vector float __s_a7_fma =
 { -0x1.9f6d9ep-13, -0x1.9f6d9ep-13, -0x1.9f6d9ep-13, -0x1.9f6d9ep-13 };
 
-const vector float __s_a9_fma =
+static const vector float __s_a9_fma =
 { 0x1.5d866ap-19, 0x1.5d866ap-19, 0x1.5d866ap-19, 0x1.5d866ap-19 };
 
+/* Absolute value mask.  */
+static const vector bool int __s_abs_mask =
+{ 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff };
+
 #endif /* S_TRIG_DATA_H.  */
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist
index a1a7f69d4c..48a742c3ef 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist
@@ -1,3 +1,4 @@ 
 GLIBC_2.30 _ZGVbN2v_cos F
 GLIBC_2.30 _ZGVbN2v_sin F
 GLIBC_2.30 _ZGVbN4v_cosf F
+GLIBC_2.30 _ZGVbN4v_sinf F
-- 
2.20.1