[3/7,v2] New generic log2f

Message ID 59C8E292.6030505@arm.com
State New, archived
Headers

Commit Message

Szabolcs Nagy Sept. 25, 2017, 11:03 a.m. UTC
  v2:
- __log2f_data is attribute_hidden
  

Comments

Joseph Myers Sept. 27, 2017, 6:03 p.m. UTC | #1
In the logf patch, you define LOGF_POLY_ORDER to 4, but actually only have 
3 terms in the polynomial.  In the log2f patch, you define 
LOG2F_POLY_ORDER to 4, and actually have and use 4 terms.  Why should 
log2f need a larger polynomial than logf?
  
Szabolcs Nagy Sept. 28, 2017, 9:11 a.m. UTC | #2
On 27/09/17 19:03, Joseph Myers wrote:
> In the logf patch, you define LOGF_POLY_ORDER to 4, but actually only have 
> 3 terms in the polynomial.  In the log2f patch, you define 
> LOG2F_POLY_ORDER to 4, and actually have and use 4 terms.  Why should 
> log2f need a larger polynomial than logf?
> 

the order is meant to be the order of the polynomial
which is 4.

but in case of logf the first order coeff is fixed to
1.0 so we don't have to multiply with that.

i guess i should use

  double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1.  */

in the __logf_data declaration.

(these macros had more relevance when i had several
implementations of the functions with different table
size and poly order.)
  
Joseph Myers Sept. 28, 2017, 1:04 p.m. UTC | #3
On Thu, 28 Sep 2017, Szabolcs Nagy wrote:

> i guess i should use
> 
>   double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1.  */
> 
> in the __logf_data declaration.

Yes.  Please include that in the commit of the logf patch (and update / 
repost the rest of the patch series with fixes for other issues raised 
such as missing NEWS updates).
  

Patch

From e931f4585585e39ccb5f1f3a29debe670eb53629 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Mon, 4 Sep 2017 17:53:47 +0100
Subject: [PATCH 3/7] New generic log2f

Similar to the new logf: double precision arithmetics and a small
lookup table is used. The argument reduction step is the same as in
the new logf.

without wrapper on aarch64:
log2f reciprocal-throughput: 2.3x faster
log2f latency: 2.1x faster
old worst case error: 1.72 ulp
new worst case error: 0.75 ulp
aarch64 .text size: -252 bytes
aarch64 .rodata size: +244 bytes

2017-09-19  Szabolcs Nagy  <szabolcs.nagy@arm.com>

	* math/Makefile (type-float-routines): Add e_log2f_data.
	* sysdeps/ieee754/flt-32/e_log2f.c: New implementation.
	* sysdeps/ieee754/flt-32/e_log2f_data.c: New file.
	* sysdeps/ieee754/flt-32/math_config.h (__log2f_data): Define.
	(LOG2F_TABLE_BITS, LOG2F_POLY_ORDER): Define.
	* sysdeps/i386/fpu/e_log2f_data.c: New file.
	* sysdeps/ia64/fpu/e_log2f_data.c: New file.
	* sysdeps/m68k/m680x0/fpu/e_log2f_data.c: New file.
---
 math/Makefile                          |   3 +-
 sysdeps/i386/fpu/e_log2f_data.c        |   1 +
 sysdeps/ia64/fpu/e_log2f_data.c        |   1 +
 sysdeps/ieee754/flt-32/e_log2f.c       | 148 +++++++++++++++++----------------
 sysdeps/ieee754/flt-32/e_log2f_data.c  |  44 ++++++++++
 sysdeps/ieee754/flt-32/math_config.h   |  11 +++
 sysdeps/m68k/m680x0/fpu/e_log2f_data.c |   1 +
 7 files changed, 135 insertions(+), 74 deletions(-)
 create mode 100644 sysdeps/i386/fpu/e_log2f_data.c
 create mode 100644 sysdeps/ia64/fpu/e_log2f_data.c
 create mode 100644 sysdeps/ieee754/flt-32/e_log2f_data.c
 create mode 100644 sysdeps/m68k/m680x0/fpu/e_log2f_data.c

diff --git a/math/Makefile b/math/Makefile
index 919fec13ef..b4b3101592 100644
--- a/math/Makefile
+++ b/math/Makefile
@@ -115,7 +115,8 @@  type-double-routines := branred doasin dosincos halfulp mpa mpatan2	\
 
 # float support
 type-float-suffix := f
-type-float-routines := k_rem_pio2f math_errf e_exp2f_data e_logf_data
+type-float-routines := k_rem_pio2f math_errf e_exp2f_data e_logf_data	\
+		       e_log2f_data
 
 # _Float128 support
 type-float128-suffix := f128
diff --git a/sysdeps/i386/fpu/e_log2f_data.c b/sysdeps/i386/fpu/e_log2f_data.c
new file mode 100644
index 0000000000..1cc8931700
--- /dev/null
+++ b/sysdeps/i386/fpu/e_log2f_data.c
@@ -0,0 +1 @@ 
+/* Not needed.  */
diff --git a/sysdeps/ia64/fpu/e_log2f_data.c b/sysdeps/ia64/fpu/e_log2f_data.c
new file mode 100644
index 0000000000..1cc8931700
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_log2f_data.c
@@ -0,0 +1 @@ 
+/* Not needed.  */
diff --git a/sysdeps/ieee754/flt-32/e_log2f.c b/sysdeps/ieee754/flt-32/e_log2f.c
index 782d901094..6c42f27843 100644
--- a/sysdeps/ieee754/flt-32/e_log2f.c
+++ b/sysdeps/ieee754/flt-32/e_log2f.c
@@ -1,86 +1,88 @@ 
-/* e_logf.c -- float version of e_log.c.
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- * adapted for log2 by Ulrich Drepper <drepper@cygnus.com>
- */
+/* Single-precision log2 function.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
 
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
 
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
 
 #include <math.h>
-#include <math_private.h>
-#include <fix-int-fp-convert-zero.h>
+#include <stdint.h>
+#include "math_config.h"
+
+/*
+LOG2F_TABLE_BITS = 4
+LOG2F_POLY_ORDER = 4
 
-static const float
-ln2 = 0.69314718055994530942,
-two25 =    3.355443200e+07,	/* 0x4c000000 */
-Lg1 = 6.6666668653e-01,	/* 3F2AAAAB */
-Lg2 = 4.0000000596e-01,	/* 3ECCCCCD */
-Lg3 = 2.8571429849e-01, /* 3E924925 */
-Lg4 = 2.2222198546e-01, /* 3E638E29 */
-Lg5 = 1.8183572590e-01, /* 3E3A3325 */
-Lg6 = 1.5313838422e-01, /* 3E1CD04F */
-Lg7 = 1.4798198640e-01; /* 3E178897 */
+ULP error: 0.752 (nearest rounding.)
+Relative error: 1.9 * 2^-26 (before rounding.)
+*/
 
-static const float zero   =  0.0;
+#define N (1 << LOG2F_TABLE_BITS)
+#define T __log2f_data.tab
+#define A __log2f_data.poly
+#define OFF 0x3f330000
 
 float
-__ieee754_log2f(float x)
+__ieee754_log2f (float x)
 {
-	float hfsq,f,s,z,R,w,t1,t2,dk;
-	int32_t k,ix,i,j;
+  /* double_t for better performance on targets with FLT_EVAL_METHOD==2.  */
+  double_t z, r, r2, p, y, y0, invc, logc;
+  uint32_t ix, iz, top, tmp;
+  int k, i;
+
+  ix = asuint (x);
+#if WANT_ROUNDING
+  /* Fix sign of zero with downward rounding when x==1.  */
+  if (__glibc_unlikely (ix == 0x3f800000))
+    return 0;
+#endif
+  if (__glibc_unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
+    {
+      /* x < 0x1p-126 or inf or nan.  */
+      if (ix * 2 == 0)
+	return __math_divzerof (1);
+      if (ix == 0x7f800000) /* log2(inf) == inf.  */
+	return x;
+      if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
+	return __math_invalidf (x);
+      /* x is subnormal, normalize it.  */
+      ix = asuint (x * 0x1p23f);
+      ix -= 23 << 23;
+    }
+
+  /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
+     The range is split into N subintervals.
+     The ith subinterval contains z and c is near its center.  */
+  tmp = ix - OFF;
+  i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N;
+  top = tmp & 0xff800000;
+  iz = ix - top;
+  k = (int32_t) tmp >> 23; /* arithmetic shift */
+  invc = T[i].invc;
+  logc = T[i].logc;
+  z = (double_t) asfloat (iz);
 
-	GET_FLOAT_WORD(ix,x);
+  /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
+  r = z * invc - 1;
+  y0 = logc + (double_t) k;
 
-	k=0;
-	if (ix < 0x00800000) {			/* x < 2**-126  */
-	    if (__builtin_expect((ix&0x7fffffff)==0, 0))
-		return -two25/__fabsf (x);	/* log(+-0)=-inf  */
-	    if (__builtin_expect(ix<0, 0))
-		return (x-x)/(x-x);	/* log(-#) = NaN */
-	    k -= 25; x *= two25; /* subnormal number, scale up x */
-	    GET_FLOAT_WORD(ix,x);
-	}
-	if (__builtin_expect(ix >= 0x7f800000, 0)) return x+x;
-	k += (ix>>23)-127;
-	ix &= 0x007fffff;
-	i = (ix+(0x95f64<<3))&0x800000;
-	SET_FLOAT_WORD(x,ix|(i^0x3f800000));	/* normalize x or x/2 */
-	k += (i>>23);
-	dk = (float)k;
-	f = x-(float)1.0;
-	if((0x007fffff&(15+ix))<16) {	/* |f| < 2**-20 */
-	    if(f==zero)
-	      {
-		if (FIX_INT_FP_CONVERT_ZERO && dk == 0.0f)
-		  dk = 0.0f;
-		return dk;
-	      }
-	    R = f*f*((float)0.5-(float)0.33333333333333333*f);
-	    return dk-(R-f)/ln2;
-	}
-	s = f/((float)2.0+f);
-	z = s*s;
-	i = ix-(0x6147a<<3);
-	w = z*z;
-	j = (0x6b851<<3)-ix;
-	t1= w*(Lg2+w*(Lg4+w*Lg6));
-	t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
-	i |= j;
-	R = t2+t1;
-	if(i>0) {
-	    hfsq=(float)0.5*f*f;
-	    return dk-((hfsq-(s*(hfsq+R)))-f)/ln2;
-	} else {
-	    return dk-((s*(f-R))-f)/ln2;
-	}
+  /* Pipelined polynomial evaluation to approximate log1p(r)/ln2.  */
+  r2 = r * r;
+  y = A[1] * r + A[2];
+  y = A[0] * r2 + y;
+  p = A[3] * r + y0;
+  y = y * r2 + p;
+  return (float) y;
 }
 strong_alias (__ieee754_log2f, __log2f_finite)
diff --git a/sysdeps/ieee754/flt-32/e_log2f_data.c b/sysdeps/ieee754/flt-32/e_log2f_data.c
new file mode 100644
index 0000000000..e39de3ba56
--- /dev/null
+++ b/sysdeps/ieee754/flt-32/e_log2f_data.c
@@ -0,0 +1,44 @@ 
+/* Data definition for log2f.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "math_config.h"
+
+const struct log2f_data __log2f_data = {
+  .tab = {
+  { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 },
+  { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 },
+  { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 },
+  { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 },
+  { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 },
+  { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 },
+  { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 },
+  { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 },
+  { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 },
+  { 0x1p+0, 0x0p+0 },
+  { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 },
+  { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 },
+  { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 },
+  { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 },
+  { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 },
+  { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 },
+  },
+  .poly = {
+  -0x1.712b6f70a7e4dp-2, 0x1.ecabf496832ep-2, -0x1.715479ffae3dep-1,
+  0x1.715475f35c8b8p0,
+  }
+};
diff --git a/sysdeps/ieee754/flt-32/math_config.h b/sysdeps/ieee754/flt-32/math_config.h
index 9dde0f98eb..cca0b53d36 100644
--- a/sysdeps/ieee754/flt-32/math_config.h
+++ b/sysdeps/ieee754/flt-32/math_config.h
@@ -123,4 +123,15 @@  extern const struct logf_data
   double poly[LOGF_POLY_ORDER];
 } __logf_data attribute_hidden;
 
+#define LOG2F_TABLE_BITS 4
+#define LOG2F_POLY_ORDER 4
+extern const struct log2f_data
+{
+  struct
+  {
+    double invc, logc;
+  } tab[1 << LOG2F_TABLE_BITS];
+  double poly[LOG2F_POLY_ORDER];
+} __log2f_data attribute_hidden;
+
 #endif
diff --git a/sysdeps/m68k/m680x0/fpu/e_log2f_data.c b/sysdeps/m68k/m680x0/fpu/e_log2f_data.c
new file mode 100644
index 0000000000..1cc8931700
--- /dev/null
+++ b/sysdeps/m68k/m680x0/fpu/e_log2f_data.c
@@ -0,0 +1 @@ 
+/* Not needed.  */
-- 
2.11.0