diff mbox series

[v2,3/5] Remove slow paths from atan

Message ID VE1PR08MB5599BF41977CD302F0E7AA8A83AF0@VE1PR08MB5599.eurprd08.prod.outlook.com
State Committed
Commit e898cd1593cc530b0fb29d46a2854dbc552302c0
Headers show
Series [v2,1/5] Remove remaining slow paths from asin and acos | expand

Commit Message

Wilco Dijkstra Jan. 7, 2021, 7:26 p.m. UTC
Remove slow paths from atan. Add ULP annotations.

Passes GLIBC testsuite.

---
diff mbox series

Patch

diff --git a/sysdeps/ieee754/dbl-64/atnat.h b/sysdeps/ieee754/dbl-64/atnat.h
index af4374019f1fce9b56b387b93a69d1330b9c8666..2b12481f0491ade9379c71ae98935ae8e67cb755 100644
--- a/sysdeps/ieee754/dbl-64/atnat.h
+++ b/sysdeps/ieee754/dbl-64/atnat.h
@@ -29,7 +29,7 @@ 
 #define M 4
 
 #ifdef BIG_ENDI
-  static const number
+  static const mynumber
   /* polynomial I */
 /**/ d3             = {{0xbfd55555, 0x55555555} }, /* -0.333... */
 /**/ d5             = {{0x3fc99999, 0x999997fd} }, /*  0.199... */
@@ -79,7 +79,7 @@ 
 
 #else
 #ifdef LITTLE_ENDI
-  static const number
+  static const mynumber
   /* polynomial I */
 /**/ d3             = {{0x55555555, 0xbfd55555} }, /* -0.333... */
 /**/ d5             = {{0x999997fd, 0x3fc99999} }, /*  0.199... */
diff --git a/sysdeps/ieee754/dbl-64/s_atan.c b/sysdeps/ieee754/dbl-64/s_atan.c
index 291b988318a67444b473044b030619d2f7851585..977183eb928f62d110e2c9e9ca6dd1f9286b1b97 100644
--- a/sysdeps/ieee754/dbl-64/s_atan.c
+++ b/sysdeps/ieee754/dbl-64/s_atan.c
@@ -20,25 +20,15 @@ 
 /*  MODULE_NAME: atnat.c                                                */
 /*                                                                      */
 /*  FUNCTIONS:  uatan                                                   */
-/*              atanMp                                                  */
 /*              signArctan                                              */
 /*                                                                      */
-/*                                                                      */
-/*  FILES NEEDED: dla.h endian.h mpa.h mydefs.h atnat.h                 */
-/*                mpatan.c mpatan2.c mpsqrt.c                           */
+/*  FILES NEEDED: dla.h endian.h mydefs.h atnat.h                       */
 /*                uatan.tbl                                             */
 /*                                                                      */
-/* An ultimate atan() routine. Given an IEEE double machine number x    */
-/* it computes the correctly rounded (to nearest) value of atan(x).     */
-/*                                                                      */
-/* Assumption: Machine arithmetic operations are performed in           */
-/* round to nearest mode of IEEE 754 standard.                          */
-/*                                                                      */
 /************************************************************************/
 
 #include <dla.h>
-#include "mpa.h"
-#include "MathLib.h"
+#include "mydefs.h"
 #include "uatan.tbl"
 #include "atnat.h"
 #include <fenv.h>
@@ -47,10 +37,8 @@ 
 #include <math.h>
 #include <fenv_private.h>
 #include <math-underflow.h>
-#include <stap-probe.h>
 
-void __mpatan (mp_no *, mp_no *, int);	/* see definition in mpatan.c */
-static double atanMp (double, const int[]);
+#define  TWO52     0x1.0p52
 
   /* Fix the sign of y and return */
 static double
@@ -59,17 +47,14 @@  __signArctan (double x, double y)
   return copysign (y, x);
 }
 
-
-/* An ultimate atan() routine. Given an IEEE double machine number x,    */
-/* routine computes the correctly rounded (to nearest) value of atan(x). */
+/* atan with max ULP of ~0.523 based on random sampling.  */
 double
 __atan (double x)
 {
-  double cor, s1, ss1, s2, ss2, t1, t2, t3, t4, u, u2, u3,
-	 v, vv, w, ww, y, yy, z, zz;
+  double cor, t1, t2, t3, u,
+	 v, w, ww, y, yy, z;
   int i, ux, dx;
-  static const int pr[M] = { 6, 8, 10, 32 };
-  number num;
+  mynumber num;
 
   num.d = x;
   ux = num.i[HIGH_HALF];
@@ -102,36 +87,14 @@  __atan (double x)
 	      yy = d3.d + v * yy;
 	      yy *= x * v;
 
-	      if ((y = x + (yy - U1 * x)) == x + (yy + U1 * x))
-		return y;
-
-	      EMULV (x, x, v, vv);	/* v+vv=x^2 */
-
-	      s1 = f17.d + v * f19.d;
-	      s1 = f15.d + v * s1;
-	      s1 = f13.d + v * s1;
-	      s1 = f11.d + v * s1;
-	      s1 *= v;
-
-	      ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2);
-	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2);
-	      ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2);
-	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2);
-	      ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2);
-	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2);
-	      ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2);
-	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2);
-	      MUL2 (x, 0, s1, ss1, s2, ss2, t1, t2);
-	      ADD2 (x, 0, s2, ss2, s1, ss1, t1, t2);
-	      if ((y = s1 + (ss1 - U5 * s1)) == s1 + (ss1 + U5 * s1))
-		return y;
-
-	      return atanMp (x, pr);
+	      y = x + yy;
+	      /* Max ULP is 0.511.  */
+	      return y;
 	    }
 	}
       else
 	{			/* B <= u < C */
-	  i = (TWO52 + TWO8 * u) - TWO52;
+	  i = (TWO52 + 256 * u) - TWO52;
 	  i -= 16;
 	  z = u - cij[i][0].d;
 	  yy = cij[i][5].d + z * cij[i][6].d;
@@ -141,44 +104,9 @@  __atan (double x)
 	  yy *= z;
 
 	  t1 = cij[i][1].d;
-	  if (i < 112)
-	    {
-	      if (i < 48)
-		u2 = U21;	/* u < 1/4        */
-	      else
-		u2 = U22;
-	    }			/* 1/4 <= u < 1/2 */
-	  else
-	    {
-	      if (i < 176)
-		u2 = U23;	/* 1/2 <= u < 3/4 */
-	      else
-		u2 = U24;
-	    }			/* 3/4 <= u <= 1  */
-	  if ((y = t1 + (yy - u2 * t1)) == t1 + (yy + u2 * t1))
-	    return __signArctan (x, y);
-
-	  z = u - hij[i][0].d;
-
-	  s1 = hij[i][14].d + z * hij[i][15].d;
-	  s1 = hij[i][13].d + z * s1;
-	  s1 = hij[i][12].d + z * s1;
-	  s1 = hij[i][11].d + z * s1;
-	  s1 *= z;
-
-	  ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2);
-	  MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2);
-	  ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
-	  MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2);
-	  ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2);
-	  MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2);
-	  ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2);
-	  MUL2 (z, 0, s2, ss2, s1, ss1, t1, t2);
-	  ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2);
-	  if ((y = s2 + (ss2 - U6 * s2)) == s2 + (ss2 + U6 * s2))
-	    return __signArctan (x, y);
-
-	  return atanMp (x, pr);
+	  y = t1 + yy;
+	  /* Max ULP is 0.56.  */
+	  return __signArctan (x, y);
 	}
     }
   else
@@ -188,7 +116,7 @@  __atan (double x)
 	  w = 1 / u;
 	  EMULV (w, u, t1, t2);
 	  ww = w * ((1 - t1) - t2);
-	  i = (TWO52 + TWO8 * w) - TWO52;
+	  i = (TWO52 + 256 * w) - TWO52;
 	  i -= 16;
 	  z = (w - cij[i][0].d) + ww;
 
@@ -199,37 +127,9 @@  __atan (double x)
 	  yy = HPI1 - z * yy;
 
 	  t1 = HPI - cij[i][1].d;
-	  if (i < 112)
-	    u3 = U31;           /* w <  1/2 */
-	  else
-	    u3 = U32;           /* w >= 1/2 */
-	  if ((y = t1 + (yy - u3)) == t1 + (yy + u3))
-	    return __signArctan (x, y);
-
-	  DIV2 (1, 0, u, 0, w, ww, t1, t2, t3, t4);
-	  t1 = w - hij[i][0].d;
-	  EADD (t1, ww, z, zz);
-
-	  s1 = hij[i][14].d + z * hij[i][15].d;
-	  s1 = hij[i][13].d + z * s1;
-	  s1 = hij[i][12].d + z * s1;
-	  s1 = hij[i][11].d + z * s1;
-	  s1 *= z;
-
-	  ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2);
-	  MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2);
-	  ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
-	  MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2);
-	  ADD2 (hij[i][5].d, hij[i][6].d, s1, ss1, s2, ss2, t1, t2);
-	  MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2);
-	  ADD2 (hij[i][3].d, hij[i][4].d, s1, ss1, s2, ss2, t1, t2);
-	  MUL2 (z, zz, s2, ss2, s1, ss1, t1, t2);
-	  ADD2 (hij[i][1].d, hij[i][2].d, s1, ss1, s2, ss2, t1, t2);
-	  SUB2 (HPI, HPI1, s2, ss2, s1, ss1, t1, t2);
-	  if ((y = s1 + (ss1 - U7)) == s1 + (ss1 + U7))
-	    return __signArctan (x, y);
-
-	  return atanMp (x, pr);
+	  y = t1 + yy;
+	  /* Max ULP is 0.503.  */
+	  return __signArctan (x, y);
 	}
       else
 	{
@@ -249,34 +149,9 @@  __atan (double x)
 	      ww = w * ((1 - t1) - t2);
 	      ESUB (HPI, w, t3, cor);
 	      yy = ((HPI1 + cor) - ww) - yy;
-	      if ((y = t3 + (yy - U4)) == t3 + (yy + U4))
-		return __signArctan (x, y);
-
-	      DIV2 (1, 0, u, 0, w, ww, t1, t2, t3, t4);
-	      MUL2 (w, ww, w, ww, v, vv, t1, t2);
-
-	      s1 = f17.d + v * f19.d;
-	      s1 = f15.d + v * s1;
-	      s1 = f13.d + v * s1;
-	      s1 = f11.d + v * s1;
-	      s1 *= v;
-
-	      ADD2 (f9.d, ff9.d, s1, 0, s2, ss2, t1, t2);
-	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2);
-	      ADD2 (f7.d, ff7.d, s1, ss1, s2, ss2, t1, t2);
-	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2);
-	      ADD2 (f5.d, ff5.d, s1, ss1, s2, ss2, t1, t2);
-	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2);
-	      ADD2 (f3.d, ff3.d, s1, ss1, s2, ss2, t1, t2);
-	      MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2);
-	      MUL2 (w, ww, s1, ss1, s2, ss2, t1, t2);
-	      ADD2 (w, ww, s2, ss2, s1, ss1, t1, t2);
-	      SUB2 (HPI, HPI1, s1, ss1, s2, ss2, t1, t2);
-
-	      if ((y = s2 + (ss2 - U8)) == s2 + (ss2 + U8))
-		return __signArctan (x, y);
-
-	      return atanMp (x, pr);
+	      y = t3 + yy;
+	      /* Max ULP is 0.5003.  */
+	      return __signArctan (x, y);
 	    }
 	  else
 	    {
@@ -290,35 +165,6 @@  __atan (double x)
     }
 }
 
- /* Final stages. Compute atan(x) by multiple precision arithmetic */
-static double
-atanMp (double x, const int pr[])
-{
-  mp_no mpx, mpy, mpy2, mperr, mpt1, mpy1;
-  double y1, y2;
-  int i, p;
-
-  for (i = 0; i < M; i++)
-    {
-      p = pr[i];
-      __dbl_mp (x, &mpx, p);
-      __mpatan (&mpx, &mpy, p);
-      __dbl_mp (u9[i].d, &mpt1, p);
-      __mul (&mpy, &mpt1, &mperr, p);
-      __add (&mpy, &mperr, &mpy1, p);
-      __sub (&mpy, &mperr, &mpy2, p);
-      __mp_dbl (&mpy1, &y1, p);
-      __mp_dbl (&mpy2, &y2, p);
-      if (y1 == y2)
-	{
-	  LIBC_PROBE (slowatan, 3, &p, &x, &y1);
-	  return y1;
-	}
-    }
-  LIBC_PROBE (slowatan_inexact, 3, &p, &x, &y1);
-  return y1;			/*if impossible to do exact computing */
-}
-
 #ifndef __atan
 libm_alias_double (__atan, atan)
 #endif
diff --git a/sysdeps/ieee754/dbl-64/uatan.tbl b/sysdeps/ieee754/dbl-64/uatan.tbl
index 8a477caa385d3f447abacf8490a8b45278d86610..fdc3e53304112eb31863a3144d6acb2f65a77a2e 100644
--- a/sysdeps/ieee754/dbl-64/uatan.tbl
+++ b/sysdeps/ieee754/dbl-64/uatan.tbl
@@ -25,7 +25,7 @@ 
 
 #ifdef BIG_ENDI
 
-  static const number
+  static const mynumber
       cij[241][7] = {                             /* x0,cij for (1/16,1) */
 /**/                 {{{0X3FB04006, 0X65E0244E} },
 /**/                  {{0X3FB03A73, 0X7B53DD20} },
@@ -1716,7 +1716,7 @@ 
 /**/                  {{0XBF99B9A7, 0X18A3BA58} } },
   };
 
-  static const number
+  static const mynumber
      hij[241][16] = {                             /* x0,hij for (1/16,1) */
 /**/                 {{{0x3fb04000, 0x00000000} },
 /**/                  {{0x3fb03a6d, 0x1c06693d} },
@@ -5579,7 +5579,7 @@ 
 #else
 #ifdef LITTLE_ENDI
 
-  static const number
+  static const mynumber
       cij[241][7] = {                             /* x0,cij for (1/16,1) */
 /**/                 {{{0X65E0244E, 0X3FB04006} },
 /**/                  {{0X7B53DD20, 0X3FB03A73} },
@@ -7270,7 +7270,7 @@ 
 /**/                  {{0X18A3BA58, 0XBF99B9A7} } },
   };
 
-  static const number
+  static const mynumber
      hij[241][16] = {                             /* x0,hij for (1/16,1) */
 /**/                 {{{0x00000000, 0x3fb04000} },
 /**/                  {{0x1c06693d, 0x3fb03a6d} },