From patchwork Tue Dec  5 13:08:33 2017
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Lu, Hongjiu" <hongjiu.lu@intel.com>
X-Patchwork-Id: 24735
Received: (qmail 125804 invoked by alias); 5 Dec 2017 13:08:40 -0000
Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm
Precedence: bulk
List-Id: <libc-alpha.sourceware.org>
List-Unsubscribe: <mailto:libc-alpha-unsubscribe-##L=##H@sourceware.org>
List-Subscribe: <mailto:libc-alpha-subscribe@sourceware.org>
List-Archive: <http://sourceware.org/ml/libc-alpha/>
List-Post: <mailto:libc-alpha@sourceware.org>
List-Help: <mailto:libc-alpha-help@sourceware.org>,
	<http://sourceware.org/ml/#faqs>
Sender: libc-alpha-owner@sourceware.org
Delivered-To: mailing list libc-alpha@sourceware.org
Received: (qmail 125792 invoked by uid 89); 5 Dec 2017 13:08:39 -0000
Authentication-Results: sourceware.org; auth=none
X-Virus-Found: No
X-Spam-SWARE-Status: No, score=-25.9 required=5.0 tests=BAYES_00, GIT_PATCH_0,
	GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3,
	KAM_LAZY_DOMAIN_SECURITY,
	NO_DNS_FOR_FROM autolearn=ham version=3.3.2 spammy=
X-HELO: mga09.intel.com
X-ExtLoop1: 1
Date: Tue, 5 Dec 2017 05:08:33 -0800
From: "H.J. Lu" <hongjiu.lu@intel.com>
To: GNU C Library <libc-alpha@sourceware.org>
Subject: [PATCH] s_sinf.c: Replace floor with
	FLOOR_DOUBLE_TO_INT/FLOOR_INT_TO_DOUBLE_HALF
Message-ID: <20171205130832.GA3696@intel.com>
Reply-To: "H.J. Lu" <hjl.tools@gmail.com>
MIME-Version: 1.0
Content-Disposition: inline
User-Agent: Mutt/1.9.1 (2017-09-22)

Since s_sinf.c either assigns the return value of floor to integer
or passes double converted from integer to floor, this patch adds
FLOOR_DOUBLE_TO_INT and FLOOR_INT_TO_DOUBLE_HALF to replace floor.
They are default to floor.  A target can define FLOOR_DOUBLE_TO_INT
and FLOOR_INT_TO_DOUBLE_HALF as simple casts to avoid calling floor.

Also since long == int for 32-bit targets, we can use long instead of
int to avoid 64-bit integer for 64-bit targets.

On Skylake, bench-sinf reports performance improvement:

           Before        After         Improvement
max        130.566       129.564           0.8%
min        7.704         7.706             0%
mean       21.8188       19.1363           14%

Any comments?

H.J.
---
	* sysdeps/generic/math_private.h (FLOOR_DOUBLE_TO_INT): New.
	(FLOOR_INT_TO_DOUBLE_HALF): Likewise.
	* sysdeps/ieee754/flt-32/s_sinf.c (reduced): Replace long with
	int.
	(SINF_FUNC): Likewise.  Replace floor with FLOOR_DOUBLE_TO_INT
	and FLOOR_INT_TO_DOUBLE_HALF.
	* sysdeps/x86_64/fpu/math_private.h (__floor_double_to_int):
	New.
	(FLOOR_DOUBLE_TO_INT): Likewise.
	(__floor_int_to_double_half): Likewise.
	(FLOOR_INT_TO_DOUBLE_HALF): Likewise.
---
 sysdeps/generic/math_private.h    |  8 ++++++++
 sysdeps/ieee754/flt-32/s_sinf.c   | 13 +++++++------
 sysdeps/x86_64/fpu/math_private.h | 17 +++++++++++++++++
 3 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/sysdeps/generic/math_private.h b/sysdeps/generic/math_private.h
index f29898c19c..a2cdce5b6a 100644
--- a/sysdeps/generic/math_private.h
+++ b/sysdeps/generic/math_private.h
@@ -184,6 +184,14 @@ do {								\
 } while (0)
 #endif
 
+#ifndef FLOOR_DOUBLE_TO_INT
+# define FLOOR_DOUBLE_TO_INT(x) ((int) __floor (x))
+#endif
+
+#ifndef FLOOR_INT_TO_DOUBLE_HALF
+# define FLOOR_INT_TO_DOUBLE_HALF(x) __floor ((x) / 2.0)
+#endif
+
 /* We need to guarantee an expansion of name when building
    ldbl-128 files as another type (e.g _Float128).  */
 #define mathx_hidden_def(name) hidden_def(name)
diff --git a/sysdeps/ieee754/flt-32/s_sinf.c b/sysdeps/ieee754/flt-32/s_sinf.c
index 40d3d197a8..13a49ceb1b 100644
--- a/sysdeps/ieee754/flt-32/s_sinf.c
+++ b/sysdeps/ieee754/flt-32/s_sinf.c
@@ -85,8 +85,8 @@ static const int ones[] = { +1, -1 };
    SIGNBIT is used to add the correct sign after the Chebyshev
    polynomial is computed.  */
 static inline float
-reduced (const double theta, const unsigned long int n,
-	 const unsigned long int signbit)
+reduced (const double theta, const unsigned int n,
+	 const unsigned int signbit)
 {
   double sx;
   const double theta2 = theta * theta;
@@ -162,14 +162,14 @@ SINF_FUNC (float x)
     }
   else                          /* |x| >= Pi/4.  */
     {
-      unsigned long int signbit = (x < 0);
+      unsigned int signbit = (x < 0);
       if (abstheta < 9 * M_PI_4)        /* |x| < 9*Pi/4.  */
 	{
 	  /* There are cases where FE_UPWARD rounding mode can
 	     produce a result of abstheta * inv_PI_4 == 9,
 	     where abstheta < 9pi/4, so the domain for
 	     pio2_table must go to 5 (9 / 2 + 1).  */
-	  unsigned long int n = (abstheta * inv_PI_4) + 1;
+	  unsigned int n = (abstheta * inv_PI_4) + 1;
 	  theta = abstheta - pio2_table[n / 2];
 	  return reduced (theta, n, signbit);
 	}
@@ -177,8 +177,9 @@ SINF_FUNC (float x)
 	{
 	  if (abstheta < 0x1p+23)     /* |x| < 2^23.  */
 	    {
-	      unsigned long int n = __floor (abstheta * inv_PI_4) + 1.0;
-	      double x = __floor (n / 2.0);
+	      unsigned int n
+		= FLOOR_DOUBLE_TO_INT (abstheta * inv_PI_4) + 1.0;
+	      double x = FLOOR_INT_TO_DOUBLE_HALF (n);
 	      theta = x * PI_2_lo + (x * PI_2_hi + abstheta);
 	      /* Argument reduction needed.  */
 	      return reduced (theta, n, signbit);
diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h
index 027a6a3a4d..738897c9c6 100644
--- a/sysdeps/x86_64/fpu/math_private.h
+++ b/sysdeps/x86_64/fpu/math_private.h
@@ -45,6 +45,23 @@
     f = f__;								      \
   } while (0)
 
+extern inline int
+__floor_double_to_int (double x)
+{
+  return x;
+}
+
+#define FLOOR_DOUBLE_TO_INT(x) __floor_double_to_int (x)
+
+extern inline double
+__floor_int_to_double_half (int x)
+{
+  x /= 2;
+  return x;
+}
+
+#define FLOOR_INT_TO_DOUBLE_HALF(x) __floor_int_to_double_half (x)
+
 #include <sysdeps/i386/fpu/fenv_private.h>
 #include_next <math_private.h>