Fix f64xdivf128, f64xmulf128 spurious underflows (bug 28358) [committed]

Message ID alpine.DEB.2.22.394.2109212155370.2447301@digraph.polyomino.org.uk
State Superseded
Headers
Series Fix f64xdivf128, f64xmulf128 spurious underflows (bug 28358) [committed] |

Commit Message

Joseph Myers Sept. 21, 2021, 9:56 p.m. UTC
  As described in bug 28358, the round-to-odd computations used in the
libm functions that round their results to a narrower format can yield
spurious underflow exceptions in the following circumstances: the
narrowing only narrows the precision of the type and not the exponent
range (i.e., it's narrowing _Float128 to _Float64x on x86_64, x86 or
ia64), the architecture does after-rounding tininess detection (which
applies to all those architectures), the result is inexact, tiny
before rounding but not tiny after rounding (with the chosen rounding
mode) for _Float64x (which is possible for narrowing mul, div and fma,
not for narrowing add, sub or sqrt), so the underflow exception
resulting from the toward-zero computation in _Float128 is spurious
for _Float64x.

Fixed by making ROUND_TO_ODD call feclearexcept (FE_UNDERFLOW) in the
problem cases (as indicated by an extra argument to the macro); there
is never any need to preserve underflow exceptions from this part of
the computation, because the conversion of the round-to-odd value to
the narrower type will underflow in exactly the cases in which the
function should raise that exception, but it may be more efficient to
avoid the extra manipulation of the floating-point environment when
not needed.

Tested for x86_64 and x86, and with build-many-glibcs.py.

---

Committed.  Changes to auto-libm-test-out-* generated files omitted
from the posted patch.
  

Patch

diff --git a/math/auto-libm-test-in b/math/auto-libm-test-in
index 2045a0f8d1..119e2ecb2a 100644
--- a/math/auto-libm-test-in
+++ b/math/auto-libm-test-in
@@ -4870,6 +4870,21 @@  div 0x1.0000000000008001000000000001p0 0x1.0000000000008p0
 # Similar, for double rounding to 64-bit of a division of 53-bit values.
 div 0x1ffe1p0 0xfffp0
 
+# Cases where there is underflow before rounding (for some format) but
+# might not be after rounding, depending on the rounding mode.
+div 0x1p-126 0x1.0000001p0
+div 0x1p-126 -0x1.0000001p0
+div -0x1p-126 0x1.0000001p0
+div -0x1p-126 -0x1.0000001p0
+div 0x1p-1022 0x1.00000000000001p0
+div 0x1p-1022 -0x1.00000000000001p0
+div -0x1p-1022 0x1.00000000000001p0
+div -0x1p-1022 -0x1.00000000000001p0
+div 0x1p-16382 0x1.00000000000000001p0
+div 0x1p-16382 -0x1.00000000000000001p0
+div -0x1p-16382 0x1.00000000000000001p0
+div -0x1p-16382 -0x1.00000000000000001p0
+
 erf 0
 erf -0
 erf 0.125
@@ -6645,6 +6660,21 @@  mul 0x50000000000000005p-64 0xcccccccccccccccccccccccccccdp-114
 # This product equals 2^64 + 2^11 + 1.
 mul 97689974585 188829449
 
+# Cases where there is underflow before rounding (for some format) but
+# might not be after rounding, depending on the rounding mode.
+mul 0x0.ffffff8p-126 0x1.0000001p0
+mul 0x0.ffffff8p-126 -0x1.0000001p0
+mul -0x0.ffffff8p-126 0x1.0000001p0
+mul -0x0.ffffff8p-126 -0x1.0000001p0
+mul 0x0.fffffffffffffcp-1022 0x1.00000000000001p0
+mul 0x0.fffffffffffffcp-1022 -0x1.00000000000001p0
+mul -0x0.fffffffffffffcp-1022 0x1.00000000000001p0
+mul -0x0.fffffffffffffcp-1022 -0x1.00000000000001p0
+mul 0x0.ffffffffffffffff8p-16382 0x1.00000000000000001p0
+mul 0x0.ffffffffffffffff8p-16382 -0x1.00000000000000001p0
+mul -0x0.ffffffffffffffff8p-16382 0x1.00000000000000001p0
+mul -0x0.ffffffffffffffff8p-16382 -0x1.00000000000000001p0
+
 pow 0 0
 pow 0 -0
 pow -0 0
diff --git a/math/math-narrow.h b/math/math-narrow.h
index 93d1b4c52a..0194f8c97e 100644
--- a/math/math-narrow.h
+++ b/math/math-narrow.h
@@ -28,6 +28,7 @@ 
 #include <math_private.h>
 #include <fenv_private.h>
 #include <math-narrow-alias.h>
+#include <stdbool.h>
 
 /* Carry out a computation using round-to-odd.  The computation is
    EXPR; the union type in which to store the result is UNION and the
@@ -37,11 +38,15 @@ 
    function rather than a C operator is used when argument and result
    types are the same) and the libc_fe* macros to ensure that the
    correct rounding mode is used, for platforms with multiple rounding
-   modes where those macros set only the relevant mode.  This macro
-   does not work correctly if the sign of an exact zero result depends
-   on the rounding mode, so that case must be checked for
-   separately.  */
-#define ROUND_TO_ODD(EXPR, UNION, SUFFIX, MANTISSA)			\
+   modes where those macros set only the relevant mode.
+   CLEAR_UNDERFLOW indicates whether underflow exceptions must be
+   cleared (in the case where a round-toward-zero underflow might not
+   indicate an underflow after narrowing, when that narrowing only
+   reduces precision not exponent range and the architecture uses
+   before-rounding tininess detection).  This macro does not work
+   correctly if the sign of an exact zero result depends on the
+   rounding mode, so that case must be checked for separately.  */
+#define ROUND_TO_ODD(EXPR, UNION, SUFFIX, MANTISSA, CLEAR_UNDERFLOW)	\
   ({									\
     fenv_t env;								\
     UNION u;								\
@@ -49,6 +54,8 @@ 
     libc_feholdexcept_setround ## SUFFIX (&env, FE_TOWARDZERO);		\
     u.d = (EXPR);							\
     math_force_eval (u.d);						\
+    if (CLEAR_UNDERFLOW)						\
+      feclearexcept (FE_UNDERFLOW);					\
     u.ieee.MANTISSA							\
       |= libc_feupdateenv_test ## SUFFIX (&env, FE_INEXACT) != 0;	\
 									\
@@ -91,7 +98,7 @@ 
 	ret = (TYPE) ((X) + (Y));					\
       else								\
 	ret = (TYPE) ROUND_TO_ODD (math_opt_barrier (X) + (Y),		\
-				   UNION, SUFFIX, MANTISSA);		\
+				   UNION, SUFFIX, MANTISSA, false);	\
 									\
       CHECK_NARROW_ADD (ret, (X), (Y));					\
       return ret;							\
@@ -149,7 +156,7 @@ 
 	ret = (TYPE) ((X) - (Y));					\
       else								\
 	ret = (TYPE) ROUND_TO_ODD (math_opt_barrier (X) - (Y),		\
-				   UNION, SUFFIX, MANTISSA);		\
+				   UNION, SUFFIX, MANTISSA, false);	\
 									\
       CHECK_NARROW_SUB (ret, (X), (Y));					\
       return ret;							\
@@ -194,15 +201,17 @@ 
   while (0)
 
 /* Implement narrowing multiply using round-to-odd.  The arguments are
-   X and Y, the return type is TYPE and UNION, MANTISSA and SUFFIX are
-   as for ROUND_TO_ODD.  */
-#define NARROW_MUL_ROUND_TO_ODD(X, Y, TYPE, UNION, SUFFIX, MANTISSA)	\
+   X and Y, the return type is TYPE and UNION, MANTISSA, SUFFIX and
+   CLEAR_UNDERFLOW are as for ROUND_TO_ODD.  */
+#define NARROW_MUL_ROUND_TO_ODD(X, Y, TYPE, UNION, SUFFIX, MANTISSA,	\
+				CLEAR_UNDERFLOW)			\
   do									\
     {									\
       TYPE ret;								\
 									\
       ret = (TYPE) ROUND_TO_ODD (math_opt_barrier (X) * (Y),		\
-				 UNION, SUFFIX, MANTISSA);		\
+				 UNION, SUFFIX, MANTISSA,		\
+				 CLEAR_UNDERFLOW);			\
 									\
       CHECK_NARROW_MUL (ret, (X), (Y));					\
       return ret;							\
@@ -246,16 +255,18 @@ 
     }							\
   while (0)
 
-/* Implement narrowing divide using round-to-odd.  The arguments are
-   X and Y, the return type is TYPE and UNION, MANTISSA and SUFFIX are
-   as for ROUND_TO_ODD.  */
-#define NARROW_DIV_ROUND_TO_ODD(X, Y, TYPE, UNION, SUFFIX, MANTISSA)	\
+/* Implement narrowing divide using round-to-odd.  The arguments are X
+   and Y, the return type is TYPE and UNION, MANTISSA, SUFFIX and
+   CLEAR_UNDERFLOW are as for ROUND_TO_ODD.  */
+#define NARROW_DIV_ROUND_TO_ODD(X, Y, TYPE, UNION, SUFFIX, MANTISSA,	\
+				CLEAR_UNDERFLOW)			\
   do									\
     {									\
       TYPE ret;								\
 									\
       ret = (TYPE) ROUND_TO_ODD (math_opt_barrier (X) / (Y),		\
-				 UNION, SUFFIX, MANTISSA);		\
+				 UNION, SUFFIX, MANTISSA,		\
+				 CLEAR_UNDERFLOW);			\
 									\
       CHECK_NARROW_DIV (ret, (X), (Y));					\
       return ret;							\
@@ -308,7 +319,7 @@ 
       TYPE ret;								\
 									\
       ret = (TYPE) ROUND_TO_ODD (sqrt ## SUFFIX (math_opt_barrier (X)),	\
-				 UNION, SUFFIX, MANTISSA);		\
+				 UNION, SUFFIX, MANTISSA, false);	\
 									\
       CHECK_NARROW_SQRT (ret, (X));					\
       return ret;							\
diff --git a/sysdeps/i386/fpu/s_f32xdivf64.c b/sysdeps/i386/fpu/s_f32xdivf64.c
index 114a49b720..be0adec4b3 100644
--- a/sysdeps/i386/fpu/s_f32xdivf64.c
+++ b/sysdeps/i386/fpu/s_f32xdivf64.c
@@ -24,6 +24,6 @@  __f32xdivf64 (_Float64 x, _Float64 y)
 {
   /* To avoid double rounding, use round-to-odd on long double.  */
   NARROW_DIV_ROUND_TO_ODD ((long double) x, (long double) y, double,
-			   union ieee854_long_double, l, mantissa1);
+			   union ieee854_long_double, l, mantissa1, false);
 }
 libm_alias_float32x_float64 (div)
diff --git a/sysdeps/i386/fpu/s_f32xmulf64.c b/sysdeps/i386/fpu/s_f32xmulf64.c
index 9636bf0985..82ffff751d 100644
--- a/sysdeps/i386/fpu/s_f32xmulf64.c
+++ b/sysdeps/i386/fpu/s_f32xmulf64.c
@@ -24,6 +24,6 @@  __f32xmulf64 (_Float64 x, _Float64 y)
 {
   /* To avoid double rounding, use round-to-odd on long double.  */
   NARROW_MUL_ROUND_TO_ODD ((long double) x, (long double) y, double,
-			   union ieee854_long_double, l, mantissa1);
+			   union ieee854_long_double, l, mantissa1, false);
 }
 libm_alias_float32x_float64 (mul)
diff --git a/sysdeps/ieee754/dbl-64/s_fdiv.c b/sysdeps/ieee754/dbl-64/s_fdiv.c
index e433aee309..98640a235c 100644
--- a/sysdeps/ieee754/dbl-64/s_fdiv.c
+++ b/sysdeps/ieee754/dbl-64/s_fdiv.c
@@ -29,6 +29,7 @@ 
 float
 __fdiv (double x, double y)
 {
-  NARROW_DIV_ROUND_TO_ODD (x, y, float, union ieee754_double, , mantissa1);
+  NARROW_DIV_ROUND_TO_ODD (x, y, float, union ieee754_double, , mantissa1,
+			   false);
 }
 libm_alias_float_double (div)
diff --git a/sysdeps/ieee754/dbl-64/s_fmul.c b/sysdeps/ieee754/dbl-64/s_fmul.c
index 892776f2d3..d97bbf60ed 100644
--- a/sysdeps/ieee754/dbl-64/s_fmul.c
+++ b/sysdeps/ieee754/dbl-64/s_fmul.c
@@ -29,6 +29,7 @@ 
 float
 __fmul (double x, double y)
 {
-  NARROW_MUL_ROUND_TO_ODD (x, y, float, union ieee754_double, , mantissa1);
+  NARROW_MUL_ROUND_TO_ODD (x, y, float, union ieee754_double, , mantissa1,
+			   false);
 }
 libm_alias_float_double (mul)
diff --git a/sysdeps/ieee754/ldbl-128/s_ddivl.c b/sysdeps/ieee754/ldbl-128/s_ddivl.c
index 80c58f07e9..88e242bccb 100644
--- a/sysdeps/ieee754/ldbl-128/s_ddivl.c
+++ b/sysdeps/ieee754/ldbl-128/s_ddivl.c
@@ -32,6 +32,6 @@  double
 __ddivl (_Float128 x, _Float128 y)
 {
   NARROW_DIV_ROUND_TO_ODD (x, y, double, union ieee854_long_double, l,
-			   mantissa3);
+			   mantissa3, false);
 }
 libm_alias_double_ldouble (div)
diff --git a/sysdeps/ieee754/ldbl-128/s_dmull.c b/sysdeps/ieee754/ldbl-128/s_dmull.c
index 9b71bbab1e..186ba172bb 100644
--- a/sysdeps/ieee754/ldbl-128/s_dmull.c
+++ b/sysdeps/ieee754/ldbl-128/s_dmull.c
@@ -32,6 +32,6 @@  double
 __dmull (_Float128 x, _Float128 y)
 {
   NARROW_MUL_ROUND_TO_ODD (x, y, double, union ieee854_long_double, l,
-			   mantissa3);
+			   mantissa3, false);
 }
 libm_alias_double_ldouble (mul)
diff --git a/sysdeps/ieee754/ldbl-128/s_f64xdivf128.c b/sysdeps/ieee754/ldbl-128/s_f64xdivf128.c
index a6f114c01c..03f2052adf 100644
--- a/sysdeps/ieee754/ldbl-128/s_f64xdivf128.c
+++ b/sysdeps/ieee754/ldbl-128/s_f64xdivf128.c
@@ -18,6 +18,7 @@ 
 
 #include <math.h>
 #include <math-narrow.h>
+#include <tininess.h>
 
 /* math_ldbl.h defines _Float128 to long double for this directory,
    but when they are different, this function must be defined with
@@ -30,7 +31,7 @@  __f64xdivf128 (_Float128 x, _Float128 y)
 {
 #if __HAVE_FLOAT64X_LONG_DOUBLE && __HAVE_DISTINCT_FLOAT128
   NARROW_DIV_ROUND_TO_ODD (x, y, _Float64x, union ieee854_long_double, l,
-			   mantissa3);
+			   mantissa3, TININESS_AFTER_ROUNDING);
 #else
   NARROW_DIV_TRIVIAL (x, y, _Float64x);
 #endif
diff --git a/sysdeps/ieee754/ldbl-128/s_f64xmulf128.c b/sysdeps/ieee754/ldbl-128/s_f64xmulf128.c
index ebb34ad4e0..031f8ef480 100644
--- a/sysdeps/ieee754/ldbl-128/s_f64xmulf128.c
+++ b/sysdeps/ieee754/ldbl-128/s_f64xmulf128.c
@@ -18,6 +18,7 @@ 
 
 #include <math.h>
 #include <math-narrow.h>
+#include <tininess.h>
 
 /* math_ldbl.h defines _Float128 to long double for this directory,
    but when they are different, this function must be defined with
@@ -30,7 +31,7 @@  __f64xmulf128 (_Float128 x, _Float128 y)
 {
 #if __HAVE_FLOAT64X_LONG_DOUBLE && __HAVE_DISTINCT_FLOAT128
   NARROW_MUL_ROUND_TO_ODD (x, y, _Float64x, union ieee854_long_double, l,
-			   mantissa3);
+			   mantissa3, TININESS_AFTER_ROUNDING);
 #else
   NARROW_MUL_TRIVIAL (x, y, _Float64x);
 #endif
diff --git a/sysdeps/ieee754/ldbl-128/s_fdivl.c b/sysdeps/ieee754/ldbl-128/s_fdivl.c
index dbad331909..7ceb4db4f8 100644
--- a/sysdeps/ieee754/ldbl-128/s_fdivl.c
+++ b/sysdeps/ieee754/ldbl-128/s_fdivl.c
@@ -28,6 +28,6 @@  float
 __fdivl (_Float128 x, _Float128 y)
 {
   NARROW_DIV_ROUND_TO_ODD (x, y, float, union ieee854_long_double, l,
-			   mantissa3);
+			   mantissa3, false);
 }
 libm_alias_float_ldouble (div)
diff --git a/sysdeps/ieee754/ldbl-128/s_fmull.c b/sysdeps/ieee754/ldbl-128/s_fmull.c
index 1b6cb25921..b2602fcd61 100644
--- a/sysdeps/ieee754/ldbl-128/s_fmull.c
+++ b/sysdeps/ieee754/ldbl-128/s_fmull.c
@@ -28,6 +28,6 @@  float
 __fmull (_Float128 x, _Float128 y)
 {
   NARROW_MUL_ROUND_TO_ODD (x, y, float, union ieee854_long_double, l,
-			   mantissa3);
+			   mantissa3, false);
 }
 libm_alias_float_ldouble (mul)
diff --git a/sysdeps/ieee754/ldbl-96/s_ddivl.c b/sysdeps/ieee754/ldbl-96/s_ddivl.c
index 5ec7a78d8a..32df44da24 100644
--- a/sysdeps/ieee754/ldbl-96/s_ddivl.c
+++ b/sysdeps/ieee754/ldbl-96/s_ddivl.c
@@ -28,6 +28,6 @@  double
 __ddivl (long double x, long double y)
 {
   NARROW_DIV_ROUND_TO_ODD (x, y, double, union ieee854_long_double, l,
-			   mantissa1);
+			   mantissa1, false);
 }
 libm_alias_double_ldouble (div)
diff --git a/sysdeps/ieee754/ldbl-96/s_dmull.c b/sysdeps/ieee754/ldbl-96/s_dmull.c
index b06bdfed52..b5bfb3f37b 100644
--- a/sysdeps/ieee754/ldbl-96/s_dmull.c
+++ b/sysdeps/ieee754/ldbl-96/s_dmull.c
@@ -28,6 +28,6 @@  double
 __dmull (long double x, long double y)
 {
   NARROW_MUL_ROUND_TO_ODD (x, y, double, union ieee854_long_double, l,
-			   mantissa1);
+			   mantissa1, false);
 }
 libm_alias_double_ldouble (mul)
diff --git a/sysdeps/ieee754/ldbl-96/s_fdivl.c b/sysdeps/ieee754/ldbl-96/s_fdivl.c
index 0b30cbb30a..9568b148bb 100644
--- a/sysdeps/ieee754/ldbl-96/s_fdivl.c
+++ b/sysdeps/ieee754/ldbl-96/s_fdivl.c
@@ -26,6 +26,6 @@  float
 __fdivl (long double x, long double y)
 {
   NARROW_DIV_ROUND_TO_ODD (x, y, float, union ieee854_long_double, l,
-			   mantissa1);
+			   mantissa1, false);
 }
 libm_alias_float_ldouble (div)
diff --git a/sysdeps/ieee754/ldbl-96/s_fmull.c b/sysdeps/ieee754/ldbl-96/s_fmull.c
index 6c9c9bc899..c3a5e1feb0 100644
--- a/sysdeps/ieee754/ldbl-96/s_fmull.c
+++ b/sysdeps/ieee754/ldbl-96/s_fmull.c
@@ -26,6 +26,6 @@  float
 __fmull (long double x, long double y)
 {
   NARROW_MUL_ROUND_TO_ODD (x, y, float, union ieee854_long_double, l,
-			   mantissa1);
+			   mantissa1, false);
 }
 libm_alias_float_ldouble (mul)