[v4,04/12] math: Use an improved algorithm for hypotl (ldbl-96)

Message ID 20211203000103.737833-5-adhemerval.zanella@linaro.org
State Superseded
Headers
Series Improve hypot |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Adhemerval Zanella Netto Dec. 3, 2021, midnight UTC
  This implementation is based on 'An Improved Algorithm for hypot(a,b)'
by Carlos F. Borges [1] using the MyHypot3 with the following changes:

 - Handle qNaN and sNaN.
 - Tune the 'widely varying operands' to avoid spurious underflow
   due the multiplication and fix the return value for upwards
   rounding mode.
 - Handle required underflow exception for subnormal results.

The main advantage of the new algorithm is its precision.  With a
random 1e8 input pairs in the range of [LDBL_MIN, LDBL_MAX], glibc
current implementation shows around 0.02% results with an error of
1 ulp (23158 results) while the new implementation only shows
0.0001% of total (111).

[1] https://arxiv.org/pdf/1904.09481.pdf
---
 sysdeps/ieee754/ldbl-96/e_hypotl.c | 231 ++++++++++++-----------------
 1 file changed, 98 insertions(+), 133 deletions(-)
  

Comments

Wilco Dijkstra Dec. 6, 2021, noon UTC | #1
Hi Adhemerval,


+  /* If ay is tiny, scale both inputs up.  */
+  if (__glibc_unlikely (ay < TINY_VAL))
+    {
+      if (__glibc_unlikely (ax >= ay / EPS))
+       return ax;

Same here - return ax + ay.

Cheers,
Wilco
  
Adhemerval Zanella Netto Dec. 6, 2021, 12:21 p.m. UTC | #2
On 06/12/2021 09:00, Wilco Dijkstra wrote:
> Hi Adhemerval,
> 
> 
> +  /* If ay is tiny, scale both inputs up.  */
> +  if (__glibc_unlikely (ay < TINY_VAL))
> +    {
> +      if (__glibc_unlikely (ax >= ay / EPS))
> +       return ax;
> 
> Same here - return ax + ay.

Ack, I have fixed it.
  

Patch

diff --git a/sysdeps/ieee754/ldbl-96/e_hypotl.c b/sysdeps/ieee754/ldbl-96/e_hypotl.c
index 44e72353c0..b32fad757f 100644
--- a/sysdeps/ieee754/ldbl-96/e_hypotl.c
+++ b/sysdeps/ieee754/ldbl-96/e_hypotl.c
@@ -1,142 +1,107 @@ 
-/* e_hypotl.c -- long double version of e_hypot.c.
- */
-
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
-/* __ieee754_hypotl(x,y)
- *
- * Method :
- *	If (assume round-to-nearest) z=x*x+y*y
- *	has error less than sqrt(2)/2 ulp, than
- *	sqrt(z) has error less than 1 ulp (exercise).
- *
- *	So, compute sqrt(x*x+y*y) with some care as
- *	follows to get the error below 1 ulp:
- *
- *	Assume x>y>0;
- *	(if possible, set rounding to round-to-nearest)
- *	1. if x > 2y  use
- *		x1*x1+(y*y+(x2*(x+x1))) for x*x+y*y
- *	where x1 = x with lower 32 bits cleared, x2 = x-x1; else
- *	2. if x <= 2y use
- *		t1*y1+((x-y)*(x-y)+(t1*y2+t2*y))
- *	where t1 = 2x with lower 32 bits cleared, t2 = 2x-t1,
- *	y1= y with lower 32 bits chopped, y2 = y-y1.
- *
- *	NOTE: scaling may be necessary if some argument is too
- *	      large or too tiny
- *
- * Special cases:
- *	hypot(x,y) is INF if x or y is +INF or -INF; else
- *	hypot(x,y) is NAN if x or y is NAN.
- *
- * Accuracy:
- *	hypot(x,y) returns sqrt(x^2+y^2) with error less
- *	than 1 ulps (units in the last place)
- */
+/* Euclidean distance function.  Long Double/Binary96 version.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* This implementation is based on 'An Improved Algorithm for hypot(a,b)' by
+   Carlos F. Borges [1] using the MyHypot3 with the following changes:
+
+   - Handle qNaN and sNaN.
+   - Tune the 'widely varying operands' to avoid spurious underflow
+     due the multiplication and fix the return value for upwards
+     rounding mode.
+   - Handle required underflow exception for subnormal results.
+
+   [1] https://arxiv.org/pdf/1904.09481.pdf  */
 
 #include <math.h>
 #include <math_private.h>
 #include <math-underflow.h>
 #include <libm-alias-finite.h>
 
-long double __ieee754_hypotl(long double x, long double y)
+#define SCALE      0x8p-8257L
+#define LARGE_VAL  0xb.504f333f9de6484p+8188L
+#define TINY_VAL   0x8p-8194L
+#define EPS        0x8p-68L
+
+/* Hypot kernel. The inputs must be adjusted so that ax >= ay >= 0
+   and squaring ax, ay and (ax - ay) does not overflow or underflow.  */
+static inline long double
+kernel (long double ax, long double ay)
 {
-	long double a,b,t1,t2,y1,y2,w;
-	uint32_t j,k,ea,eb;
-
-	GET_LDOUBLE_EXP(ea,x);
-	ea &= 0x7fff;
-	GET_LDOUBLE_EXP(eb,y);
-	eb &= 0x7fff;
-	if(eb > ea) {a=y;b=x;j=ea; ea=eb;eb=j;} else {a=x;b=y;}
-	SET_LDOUBLE_EXP(a,ea);	/* a <- |a| */
-	SET_LDOUBLE_EXP(b,eb);	/* b <- |b| */
-	if((ea-eb)>0x46) {return a+b;} /* x/y > 2**70 */
-	k=0;
-	if(__builtin_expect(ea > 0x5f3f,0)) {	/* a>2**8000 */
-	   if(ea == 0x7fff) {	/* Inf or NaN */
-	       uint32_t exp __attribute__ ((unused));
-	       uint32_t high,low;
-	       w = a+b;			/* for sNaN */
-	       if (issignaling (a) || issignaling (b))
-		 return w;
-	       GET_LDOUBLE_WORDS(exp,high,low,a);
-	       if(((high&0x7fffffff)|low)==0) w = a;
-	       GET_LDOUBLE_WORDS(exp,high,low,b);
-	       if(((eb^0x7fff)|(high&0x7fffffff)|low)==0) w = b;
-	       return w;
-	   }
-	   /* scale a and b by 2**-9600 */
-	   ea -= 0x2580; eb -= 0x2580;	k += 9600;
-	   SET_LDOUBLE_EXP(a,ea);
-	   SET_LDOUBLE_EXP(b,eb);
-	}
-	if(__builtin_expect(eb < 0x20bf, 0)) {	/* b < 2**-8000 */
-	    if(eb == 0) {	/* subnormal b or 0 */
-		uint32_t exp __attribute__ ((unused));
-		uint32_t high,low;
-		GET_LDOUBLE_WORDS(exp,high,low,b);
-		if((high|low)==0) return a;
-		SET_LDOUBLE_WORDS(t1, 0x7ffd, 0x80000000, 0); /* t1=2^16382 */
-		b *= t1;
-		a *= t1;
-		k -= 16382;
-		GET_LDOUBLE_EXP (ea, a);
-		GET_LDOUBLE_EXP (eb, b);
-		if (eb > ea)
-		  {
-		    t1 = a;
-		    a = b;
-		    b = t1;
-		    j = ea;
-		    ea = eb;
-		    eb = j;
-		  }
-	    } else {		/* scale a and b by 2^9600 */
-		ea += 0x2580;	/* a *= 2^9600 */
-		eb += 0x2580;	/* b *= 2^9600 */
-		k -= 9600;
-		SET_LDOUBLE_EXP(a,ea);
-		SET_LDOUBLE_EXP(b,eb);
-	    }
-	}
-    /* medium size a and b */
-	w = a-b;
-	if (w>b) {
-	    uint32_t high;
-	    GET_LDOUBLE_MSW(high,a);
-	    SET_LDOUBLE_WORDS(t1,ea,high,0);
-	    t2 = a-t1;
-	    w  = sqrtl(t1*t1-(b*(-b)-t2*(a+t1)));
-	} else {
-	    uint32_t high;
-	    GET_LDOUBLE_MSW(high,b);
-	    a  = a+a;
-	    SET_LDOUBLE_WORDS(y1,eb,high,0);
-	    y2 = b - y1;
-	    GET_LDOUBLE_MSW(high,a);
-	    SET_LDOUBLE_WORDS(t1,ea+1,high,0);
-	    t2 = a - t1;
-	    w  = sqrtl(t1*y1-(w*(-w)-(t1*y2+t2*b)));
-	}
-	if(k!=0) {
-	    uint32_t exp;
-	    t1 = 1.0;
-	    GET_LDOUBLE_EXP(exp,t1);
-	    SET_LDOUBLE_EXP(t1,exp+k);
-	    w *= t1;
-	    math_check_force_underflow_nonneg (w);
-	    return w;
-	} else return w;
+  long double t1, t2;
+  long double h = sqrtl (ax * ax + ay * ay);
+  if (h <= 2.0L * ay)
+    {
+      long double delta = h - ay;
+      t1 = ax * (2.0L * delta - ax);
+      t2 = (delta - 2.0L * (ax - ay)) * delta;
+    }
+  else
+    {
+      long double delta = h - ax;
+      t1 = 2.0L * delta * (ax - 2.0L * ay);
+      t2 = (4.0L * delta - ay) * ay + delta * delta;
+    }
+
+  h -= (t1 + t2) / (2.0L * h);
+  return h;
+}
+
+long double
+__ieee754_hypotl (long double x, long double y)
+{
+  if (!isfinite(x) || !isfinite(y))
+    {
+      if ((isinf (x) || isinf (y))
+	  && !issignaling (x) && !issignaling (y))
+	return INFINITY;
+      return x + y;
+    }
+
+  x = fabsl (x);
+  y = fabsl (y);
+
+  long double ax = x < y ? y : x;
+  long double ay = x < y ? x : y;
+
+  /* If ax is huge, scale both inputs down.  */
+  if (__glibc_unlikely (ax > LARGE_VAL))
+    {
+      if (__glibc_unlikely (ay <= ax * EPS))
+	return ax + ay;
+
+      return kernel (ax * SCALE, ay * SCALE) / SCALE;
+    }
+
+  /* If ay is tiny, scale both inputs up.  */
+  if (__glibc_unlikely (ay < TINY_VAL))
+    {
+      if (__glibc_unlikely (ax >= ay / EPS))
+	return ax;
+
+      ax = kernel (ax / SCALE, ay / SCALE) * SCALE;
+      math_check_force_underflow_nonneg (ax);
+      return ax;
+    }
+
+  /* Common case: ax is not huge and ay is not tiny.  */
+  if (__glibc_unlikely (ay <= ax * EPS))
+    return ax + ay;
+
+  return kernel (ax, ay);
 }
 libm_alias_finite (__ieee754_hypotl, __hypotl)