diff mbox

[2/7,v2] New generic logf

Message ID 59C8E23D.9030206@arm.com
State New, archived
Headers show

Commit Message

Szabolcs Nagy Sept. 25, 2017, 11:02 a.m. UTC
v2:
- __logf_data is attribute_hidden

Comments

Joseph Myers Sept. 27, 2017, 5:54 p.m. UTC | #1
OK.
Joseph Myers Sept. 27, 2017, 8:03 p.m. UTC | #2
This patch should update the NEWS entry for these optimized functions (as 
should the log2f and powf patches).
diff mbox

Patch

From 953dde0ba3351d49a185e57cc1800a3ad7cc35b1 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Mon, 4 Sep 2017 17:48:46 +0100
Subject: [PATCH 2/7] New generic logf

without wrapper on aarch64:
logf reciprocal-throughput: 2.2x faster
logf latency: 1.9x faster
old worst case error: 0.89 ulp
new worst case error: 0.82 ulp
aarch64 .text size: -356 bytes
aarch64 .rodata size: +248 bytes

Uses double precision arithmetics and a lookup table to allow smaller
polynomial and avoid the use of division.

Data is in a separate translation unit with fixed layout to prevent the
compiler generating suboptimal literal access.

Errors are handled inline according to POSIX rules, but this patch
keeps the wrapper with SVID compatible error handling.

Needs libm-test-ulps adjustment for clogf in non-nearest rounding mode.

2017-09-19  Szabolcs Nagy  <szabolcs.nagy@arm.com>

	* math/Makefile (type-float-routines): Add e_logf_data.
	* sysdeps/ieee754/flt-32/e_logf.c: New implementation.
	* sysdeps/ieee754/flt-32/e_logf_data.c: New file.
	* sysdeps/ieee754/flt-32/math_config.h (__logf_data): Define.
	(LOGF_TABLE_BITS, LOGF_POLY_ORDER): Define.
	* sysdeps/i386/fpu/e_logf_data.c: New file.
	* sysdeps/ia64/fpu/e_logf_data.c: New file.
	* sysdeps/m68k/m680x0/fpu/e_logf_data.c: New file.
---
 math/Makefile                         |   2 +-
 sysdeps/i386/fpu/e_logf_data.c        |   1 +
 sysdeps/ia64/fpu/e_logf_data.c        |   1 +
 sysdeps/ieee754/flt-32/e_logf.c       | 148 +++++++++++++++++-----------------
 sysdeps/ieee754/flt-32/e_logf_data.c  |  44 ++++++++++
 sysdeps/ieee754/flt-32/math_config.h  |  12 +++
 sysdeps/m68k/m680x0/fpu/e_logf_data.c |   1 +
 7 files changed, 135 insertions(+), 74 deletions(-)
 create mode 100644 sysdeps/i386/fpu/e_logf_data.c
 create mode 100644 sysdeps/ia64/fpu/e_logf_data.c
 create mode 100644 sysdeps/ieee754/flt-32/e_logf_data.c
 create mode 100644 sysdeps/m68k/m680x0/fpu/e_logf_data.c

diff --git a/math/Makefile b/math/Makefile
index 04586156f8..919fec13ef 100644
--- a/math/Makefile
+++ b/math/Makefile
@@ -115,7 +115,7 @@  type-double-routines := branred doasin dosincos halfulp mpa mpatan2	\
 
 # float support
 type-float-suffix := f
-type-float-routines := k_rem_pio2f math_errf e_exp2f_data
+type-float-routines := k_rem_pio2f math_errf e_exp2f_data e_logf_data
 
 # _Float128 support
 type-float128-suffix := f128
diff --git a/sysdeps/i386/fpu/e_logf_data.c b/sysdeps/i386/fpu/e_logf_data.c
new file mode 100644
index 0000000000..1cc8931700
--- /dev/null
+++ b/sysdeps/i386/fpu/e_logf_data.c
@@ -0,0 +1 @@ 
+/* Not needed.  */
diff --git a/sysdeps/ia64/fpu/e_logf_data.c b/sysdeps/ia64/fpu/e_logf_data.c
new file mode 100644
index 0000000000..1cc8931700
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_logf_data.c
@@ -0,0 +1 @@ 
+/* Not needed.  */
diff --git a/sysdeps/ieee754/flt-32/e_logf.c b/sysdeps/ieee754/flt-32/e_logf.c
index cf75e11781..b8d262441f 100644
--- a/sysdeps/ieee754/flt-32/e_logf.c
+++ b/sysdeps/ieee754/flt-32/e_logf.c
@@ -1,85 +1,87 @@ 
-/* e_logf.c -- float version of e_log.c.
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- */
+/* Single-precision log function.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
 
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
 
 #include <math.h>
-#include <math_private.h>
+#include <stdint.h>
+#include "math_config.h"
 
-static const float
-ln2_hi =   6.9313812256e-01,	/* 0x3f317180 */
-ln2_lo =   9.0580006145e-06,	/* 0x3717f7d1 */
-two25 =    3.355443200e+07,	/* 0x4c000000 */
-Lg1 = 6.6666668653e-01,	/* 3F2AAAAB */
-Lg2 = 4.0000000596e-01,	/* 3ECCCCCD */
-Lg3 = 2.8571429849e-01, /* 3E924925 */
-Lg4 = 2.2222198546e-01, /* 3E638E29 */
-Lg5 = 1.8183572590e-01, /* 3E3A3325 */
-Lg6 = 1.5313838422e-01, /* 3E1CD04F */
-Lg7 = 1.4798198640e-01; /* 3E178897 */
+/*
+LOGF_TABLE_BITS = 4
+LOGF_POLY_ORDER = 4
 
-static const float zero   =  0.0;
+ULP error: 0.818 (nearest rounding.)
+Relative error: 1.957 * 2^-26 (before rounding.)
+*/
+
+#define T __logf_data.tab
+#define A __logf_data.poly
+#define Ln2 __logf_data.ln2
+#define N (1 << LOGF_TABLE_BITS)
+#define OFF 0x3f330000
 
 float
-__ieee754_logf(float x)
+__ieee754_logf (float x)
 {
-	float hfsq,f,s,z,R,w,t1,t2,dk;
-	int32_t k,ix,i,j;
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t z, r, r2, y, y0, invc, logc;
+  uint32_t ix, iz, tmp;
+  int k, i;
+
+  ix = asuint (x);
+#if WANT_ROUNDING
+  /* Fix sign of zero with downward rounding when x==1.  */
+  if (__glibc_unlikely (ix == 0x3f800000))
+    return 0;
+#endif
+  if (__glibc_unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
+    {
+      /* x < 0x1p-126 or inf or nan.  */
+      if (ix * 2 == 0)
+	return __math_divzerof (1);
+      if (ix == 0x7f800000) /* log(inf) == inf.  */
+	return x;
+      if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
+	return __math_invalidf (x);
+      /* x is subnormal, normalize it.  */
+      ix = asuint (x * 0x1p23f);
+      ix -= 23 << 23;
+    }
+
+  /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  tmp = ix - OFF;
+  i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
+  k = (int32_t) tmp >> 23; /* arithmetic shift */
+  iz = ix - (tmp & 0x1ff << 23);
+  invc = T[i].invc;
+  logc = T[i].logc;
+  z = (double_t) asfloat (iz);
 
-	GET_FLOAT_WORD(ix,x);
+  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
+  r = z * invc - 1;
+  y0 = logc + (double_t) k * Ln2;
 
-	k=0;
-	if (ix < 0x00800000) {			/* x < 2**-126  */
-	    if (__builtin_expect((ix&0x7fffffff)==0, 0))
-		return -two25/zero;		/* log(+-0)=-inf */
-	    if (__builtin_expect(ix<0, 0))
-		return (x-x)/(x-x);	/* log(-#) = NaN */
-	    k -= 25; x *= two25; /* subnormal number, scale up x */
-	    GET_FLOAT_WORD(ix,x);
-	}
-	if (__builtin_expect(ix >= 0x7f800000, 0)) return x+x;
-	k += (ix>>23)-127;
-	ix &= 0x007fffff;
-	i = (ix+(0x95f64<<3))&0x800000;
-	SET_FLOAT_WORD(x,ix|(i^0x3f800000));	/* normalize x or x/2 */
-	k += (i>>23);
-	f = x-(float)1.0;
-	if((0x007fffff&(15+ix))<16) {	/* |f| < 2**-20 */
-	    if(f==zero) {
-	      if(k==0) return zero;  else {dk=(float)k;
-					   return dk*ln2_hi+dk*ln2_lo;}
-	    }
-	    R = f*f*((float)0.5-(float)0.33333333333333333*f);
-	    if(k==0) return f-R; else {dk=(float)k;
-		     return dk*ln2_hi-((R-dk*ln2_lo)-f);}
-	}
-	s = f/((float)2.0+f);
-	dk = (float)k;
-	z = s*s;
-	i = ix-(0x6147a<<3);
-	w = z*z;
-	j = (0x6b851<<3)-ix;
-	t1= w*(Lg2+w*(Lg4+w*Lg6));
-	t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
-	i |= j;
-	R = t2+t1;
-	if(i>0) {
-	    hfsq=(float)0.5*f*f;
-	    if(k==0) return f-(hfsq-s*(hfsq+R)); else
-		     return dk*ln2_hi-((hfsq-(s*(hfsq+R)+dk*ln2_lo))-f);
-	} else {
-	    if(k==0) return f-s*(f-R); else
-		     return dk*ln2_hi-((s*(f-R)-dk*ln2_lo)-f);
-	}
+  /* Pipelined polynomial evaluation to approximate log1p(r).  */
+  r2 = r * r;
+  y = A[1] * r + A[2];
+  y = A[0] * r2 + y;
+  y = y * r2 + (y0 + r);
+  return (float) y;
 }
 strong_alias (__ieee754_logf, __logf_finite)
diff --git a/sysdeps/ieee754/flt-32/e_logf_data.c b/sysdeps/ieee754/flt-32/e_logf_data.c
new file mode 100644
index 0000000000..ce2a0f8a5c
--- /dev/null
+++ b/sysdeps/ieee754/flt-32/e_logf_data.c
@@ -0,0 +1,44 @@ 
+/* Data definition for logf.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "math_config.h"
+
+const struct logf_data __logf_data = {
+  .tab = {
+  { 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 },
+  { 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 },
+  { 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 },
+  { 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 },
+  { 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 },
+  { 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 },
+  { 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 },
+  { 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 },
+  { 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 },
+  { 0x1p+0, 0x0p+0 },
+  { 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 },
+  { 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 },
+  { 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 },
+  { 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 },
+  { 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 },
+  { 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 },
+  },
+  .ln2 = 0x1.62e42fefa39efp-1,
+  .poly = {
+  -0x1.00ea348b88334p-2, 0x1.5575b0be00b6ap-2, -0x1.ffffef20a4123p-2,
+  }
+};
diff --git a/sysdeps/ieee754/flt-32/math_config.h b/sysdeps/ieee754/flt-32/math_config.h
index 31f0470612..9dde0f98eb 100644
--- a/sysdeps/ieee754/flt-32/math_config.h
+++ b/sysdeps/ieee754/flt-32/math_config.h
@@ -111,4 +111,16 @@  extern const struct exp2f_data
   double poly_scaled[EXP2F_POLY_ORDER];
 } __exp2f_data attribute_hidden;
 
+#define LOGF_TABLE_BITS 4
+#define LOGF_POLY_ORDER 4
+extern const struct logf_data
+{
+  struct
+  {
+    double invc, logc;
+  } tab[1 << LOGF_TABLE_BITS];
+  double ln2;
+  double poly[LOGF_POLY_ORDER];
+} __logf_data attribute_hidden;
+
 #endif
diff --git a/sysdeps/m68k/m680x0/fpu/e_logf_data.c b/sysdeps/m68k/m680x0/fpu/e_logf_data.c
new file mode 100644
index 0000000000..1cc8931700
--- /dev/null
+++ b/sysdeps/m68k/m680x0/fpu/e_logf_data.c
@@ -0,0 +1 @@ 
+/* Not needed.  */
-- 
2.11.0