expf: Fixed the hi+lo approximation to log(2). The normal 17+24 bit decomposition

Message ID 20250128104911.1048-1-fabian.schriever@gtd-gmbh.de
State New
Headers
Series expf: Fixed the hi+lo approximation to log(2). The normal 17+24 bit decomposition |

Commit Message

Fabian Schriever Jan. 28, 2025, 10:49 a.m. UTC
  that was used doesn't work normally here, since we want to be able to
multiply `hi' by the exponent of x _exactly_, and the exponent of x has
more than 7 significant bits for most denormal x's, so the multiplication
was not always exact despite a cloned comment claiming that it was.  (The
comment is correct in the double precision case -- with the normal 33+53
bit decomposition the exponent can have 20 significant bits and the extra
bit for denormals is only the 11th.)

Fixing this had little or no effect for denormals (I think because
more precision is inherently lost for denormals than is lost by roundoff
errors in the multiplication).

The fix is to reduce the precision of the decomposition to 16+24 bits.
Due to 2 bugs in the old deomposition and numerical accidents, reducing
the precision actually increased the precision of hi+lo.  The old hi+lo
had about 39 bits instead of at least 41 like it should have had.
There were off-by-1-bit errors in each of hi and lo, apparently due
to mistranslation from the double precision hi and lo.  The correct
16 bit hi happens to give about 19 bits of precision, so the correct
hi+lo gives about 43 bits instead of at least 40.  The end result is
that expf() is now perfectly rounded (to nearest) except in 52561 cases
instead of except in 67027 cases, and the maximum error is 0.5013 ulps
instead of 0.5023 ulps.

Commit and message are from FreeBSD.
Reference: https://github.com/freebsd/freebsd-src/commit/06d803185574a0ba37434b1e98b322e54cc32a4e
Original Author: Bruce Evans

We have taken the liberty to also add the f suffix to all float
constants to ensure they are not doubles.
---
 newlib/libm/math/ef_exp.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)
  

Comments

Jeff Johnston Jan. 28, 2025, 7:02 p.m. UTC | #1
Hi Fabian,

Looking at the FreeBSD code, I see there are other changes that differ from
newlib's version starting from:

else if(hx < 0x39000000)  { /* when |x|<2**-14 */     <=== in newlib this
is 2** - 23
   if(huge+x>one) return one+x;/* trigger inexact */
}
else k = 0;

    /* x is now in primary range */
t  = x*x;
if(k >= -125)
   SET_FLOAT_WORD(twopk,((u_int32_t)(0x7f+k))<<23);
else
   SET_FLOAT_WORD(twopk,((u_int32_t)(0x7f+(k+100)))<<23);
c  = x - t*(P1+t*P2);
if(k==0) return one-((x*c)/(c-(float)2.0)-x);
else y = one-((lo-(x*c)/((float)2.0-c))-hi);
if(k >= -125) {
   if(k==128) return y*2.0F*0x1p127F;
   return y*twopk;
} else {
   return y*twopk*twom100;
}

Would you like to port over those changes as well?  Some of them are from
18 years ago and there are a few from 6 years ago (ones with 0x7f).

-- Jeff J.

On Tue, Jan 28, 2025 at 5:52 AM Fabian Schriever <
fabian.schriever@gtd-gmbh.de> wrote:

> that was used doesn't work normally here, since we want to be able to
> multiply `hi' by the exponent of x _exactly_, and the exponent of x has
> more than 7 significant bits for most denormal x's, so the multiplication
> was not always exact despite a cloned comment claiming that it was.  (The
> comment is correct in the double precision case -- with the normal 33+53
> bit decomposition the exponent can have 20 significant bits and the extra
> bit for denormals is only the 11th.)
>
> Fixing this had little or no effect for denormals (I think because
> more precision is inherently lost for denormals than is lost by roundoff
> errors in the multiplication).
>
> The fix is to reduce the precision of the decomposition to 16+24 bits.
> Due to 2 bugs in the old deomposition and numerical accidents, reducing
> the precision actually increased the precision of hi+lo.  The old hi+lo
> had about 39 bits instead of at least 41 like it should have had.
> There were off-by-1-bit errors in each of hi and lo, apparently due
> to mistranslation from the double precision hi and lo.  The correct
> 16 bit hi happens to give about 19 bits of precision, so the correct
> hi+lo gives about 43 bits instead of at least 40.  The end result is
> that expf() is now perfectly rounded (to nearest) except in 52561 cases
> instead of except in 67027 cases, and the maximum error is 0.5013 ulps
> instead of 0.5023 ulps.
>
> Commit and message are from FreeBSD.
> Reference:
> https://github.com/freebsd/freebsd-src/commit/06d803185574a0ba37434b1e98b322e54cc32a4e
> Original Author: Bruce Evans
>
> We have taken the liberty to also add the f suffix to all float
> constants to ensure they are not doubles.
> ---
>  newlib/libm/math/ef_exp.c | 34 +++++++++++++++++-----------------
>  1 file changed, 17 insertions(+), 17 deletions(-)
>
> diff --git a/newlib/libm/math/ef_exp.c b/newlib/libm/math/ef_exp.c
> index d6e25bfcd..85e1cf9d3 100644
> --- a/newlib/libm/math/ef_exp.c
> +++ b/newlib/libm/math/ef_exp.c
> @@ -26,20 +26,20 @@ static const float
>  #else
>  static float
>  #endif
> -one    = 1.0,
> -halF[2]        = {0.5,-0.5,},
> -huge   = 1.0e+30,
> -twom100 = 7.8886090522e-31,      /* 2**-100=0x0d800000 */
> -ln2HI[2]   ={ 6.9313812256e-01,                /* 0x3f317180 */
> -            -6.9313812256e-01,},       /* 0xbf317180 */
> -ln2LO[2]   ={ 9.0580006145e-06,        /* 0x3717f7d1 */
> -            -9.0580006145e-06,},       /* 0xb717f7d1 */
> -invln2 =  1.4426950216e+00,            /* 0x3fb8aa3b */
> -P1   =  1.6666667163e-01, /* 0x3e2aaaab */
> -P2   = -2.7777778450e-03, /* 0xbb360b61 */
> -P3   =  6.6137559770e-05, /* 0x388ab355 */
> -P4   = -1.6533901999e-06, /* 0xb5ddea0e */
> -P5   =  4.1381369442e-08; /* 0x3331bb4c */
> +one    = 1.0f,
> +halF[2]        = {0.5f,-0.5f,},
> +huge   = 1.0e+30f,
> +twom100 = 7.8886090522e-31f,      /* 2**-100=0x0d800000 */
> +ln2HI[2]   ={ 6.9314575195e-01f,               /* 0x3f317200 */
> +            -6.9314575195e-01f,},      /* 0xbf317200 */
> +ln2LO[2]   ={ 1.4286067653e-06f,       /* 0x35bfbe8e */
> +            -1.4286067653e-06f,},      /* 0xb5bfbe8e */
> +invln2 =  1.4426950216e+00f,           /* 0x3fb8aa3b */
> +P1   =  1.6666667163e-01f, /* 0x3e2aaaab */
> +P2   = -2.7777778450e-03f, /* 0xbb360b61 */
> +P3   =  6.6137559770e-05f, /* 0x388ab355 */
> +P4   = -1.6533901999e-06f, /* 0xb5ddea0e */
> +P5   =  4.1381369442e-08f; /* 0x3331bb4c */
>
>  #ifdef __STDC__
>         float __ieee754_expf(float x)   /* default IEEE double exp */
> @@ -60,7 +60,7 @@ P5   =  4.1381369442e-08; /* 0x3331bb4c */
>          if(FLT_UWORD_IS_NAN(hx))
>              return x+x;                /* NaN */
>          if(FLT_UWORD_IS_INFINITE(hx))
> -           return (xsb==0)? x:0.0;             /* exp(+-inf)={inf,0} */
> +           return (xsb==0)? x:0.0f;            /* exp(+-inf)={inf,0} */
>         if(sx > FLT_UWORD_LOG_MAX)
>             return __math_oflowf(0); /* overflow */
>         if(sx < 0 && hx > FLT_UWORD_LOG_MIN)
> @@ -85,8 +85,8 @@ P5   =  4.1381369442e-08; /* 0x3331bb4c */
>      /* x is now in primary range */
>         t  = x*x;
>         c  = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
> -       if(k==0)        return one-((x*c)/(c-(float)2.0)-x);
> -       else            y = one-((lo-(x*c)/((float)2.0-c))-hi);
> +       if(k==0)        return one-((x*c)/(c-2.0f)-x);
> +       else            y = one-((lo-(x*c)/(2.0f-c))-hi);
>         if(k >= -125) {
>             __uint32_t hy;
>             GET_FLOAT_WORD(hy,y);
> --
> 2.33.0.windows.1
>
>
>
  

Patch

diff --git a/newlib/libm/math/ef_exp.c b/newlib/libm/math/ef_exp.c
index d6e25bfcd..85e1cf9d3 100644
--- a/newlib/libm/math/ef_exp.c
+++ b/newlib/libm/math/ef_exp.c
@@ -26,20 +26,20 @@  static const float
 #else
 static float
 #endif
-one	= 1.0,
-halF[2]	= {0.5,-0.5,},
-huge	= 1.0e+30,
-twom100 = 7.8886090522e-31,      /* 2**-100=0x0d800000 */
-ln2HI[2]   ={ 6.9313812256e-01,		/* 0x3f317180 */
-	     -6.9313812256e-01,},	/* 0xbf317180 */
-ln2LO[2]   ={ 9.0580006145e-06,  	/* 0x3717f7d1 */
-	     -9.0580006145e-06,},	/* 0xb717f7d1 */
-invln2 =  1.4426950216e+00, 		/* 0x3fb8aa3b */
-P1   =  1.6666667163e-01, /* 0x3e2aaaab */
-P2   = -2.7777778450e-03, /* 0xbb360b61 */
-P3   =  6.6137559770e-05, /* 0x388ab355 */
-P4   = -1.6533901999e-06, /* 0xb5ddea0e */
-P5   =  4.1381369442e-08; /* 0x3331bb4c */
+one	= 1.0f,
+halF[2]	= {0.5f,-0.5f,},
+huge	= 1.0e+30f,
+twom100 = 7.8886090522e-31f,      /* 2**-100=0x0d800000 */
+ln2HI[2]   ={ 6.9314575195e-01f,		/* 0x3f317200 */
+	     -6.9314575195e-01f,},	/* 0xbf317200 */
+ln2LO[2]   ={ 1.4286067653e-06f,  	/* 0x35bfbe8e */
+	     -1.4286067653e-06f,},	/* 0xb5bfbe8e */
+invln2 =  1.4426950216e+00f, 		/* 0x3fb8aa3b */
+P1   =  1.6666667163e-01f, /* 0x3e2aaaab */
+P2   = -2.7777778450e-03f, /* 0xbb360b61 */
+P3   =  6.6137559770e-05f, /* 0x388ab355 */
+P4   = -1.6533901999e-06f, /* 0xb5ddea0e */
+P5   =  4.1381369442e-08f; /* 0x3331bb4c */
 
 #ifdef __STDC__
 	float __ieee754_expf(float x)	/* default IEEE double exp */
@@ -60,7 +60,7 @@  P5   =  4.1381369442e-08; /* 0x3331bb4c */
         if(FLT_UWORD_IS_NAN(hx))
             return x+x;	 	/* NaN */
         if(FLT_UWORD_IS_INFINITE(hx))
-	    return (xsb==0)? x:0.0;		/* exp(+-inf)={inf,0} */
+	    return (xsb==0)? x:0.0f;		/* exp(+-inf)={inf,0} */
 	if(sx > FLT_UWORD_LOG_MAX)
 	    return __math_oflowf(0); /* overflow */
 	if(sx < 0 && hx > FLT_UWORD_LOG_MIN)
@@ -85,8 +85,8 @@  P5   =  4.1381369442e-08; /* 0x3331bb4c */
     /* x is now in primary range */
 	t  = x*x;
 	c  = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));
-	if(k==0) 	return one-((x*c)/(c-(float)2.0)-x); 
-	else 		y = one-((lo-(x*c)/((float)2.0-c))-hi);
+	if(k==0) 	return one-((x*c)/(c-2.0f)-x); 
+	else 		y = one-((lo-(x*c)/(2.0f-c))-hi);
 	if(k >= -125) {
 	    __uint32_t hy;
 	    GET_FLOAT_WORD(hy,y);