[1/6] ldbl-128: Simplify fma usage

Message ID fd33196585b537f7d80ddde8614700f7d69d79f1.1471365825.git.murphyp@linux.vnet.ibm.com
State Superseded
Delegated to: Joseph Myers
Headers

Commit Message

Paul E. Murphy Aug. 16, 2016, 5:02 p.m. UTC
  Several ldbl-128 files share identical code which does
not ammend itself to the macro renaming necessary to
support building with another type.

This moves the duplicated function mul_split into its
own header, and refactors the fma usage into a single
selection macro.

	* sysdeps/ieee754/ldbl-128/gamma_productl.c:
	(mul_split) Remove and and include via mul_splitl.h.
	* sysdeps/ieee754/ldbl-128/lgamma_productl.c: Likewise.
	* sysdeps/ieee754/ldbl-128/x2y2m1l.c: Likewise.

	* sysdeps/ieee754/ldbl-128/mul_splitl.h: New file.
---
 sysdeps/ieee754/ldbl-128/gamma_productl.c  | 30 +---------------
 sysdeps/ieee754/ldbl-128/lgamma_productl.c | 30 +---------------
 sysdeps/ieee754/ldbl-128/mul_splitl.h      | 55 ++++++++++++++++++++++++++++++
 sysdeps/ieee754/ldbl-128/x2y2m1l.c         | 32 ++---------------
 4 files changed, 59 insertions(+), 88 deletions(-)
 create mode 100644 sysdeps/ieee754/ldbl-128/mul_splitl.h
  

Comments

Joseph Myers Aug. 16, 2016, 6:03 p.m. UTC | #1
On Tue, 16 Aug 2016, Paul E. Murphy wrote:

> Several ldbl-128 files share identical code which does
> not ammend itself to the macro renaming necessary to
> support building with another type.
> 
> This moves the duplicated function mul_split into its
> own header, and refactors the fma usage into a single
> selection macro.

This seems like something that if done for one type, should be done for 
all types together (optionally also removing the "compiler before GCC 4.6" 
cases as no longer relevant to building glibc).
  

Patch

diff --git a/sysdeps/ieee754/ldbl-128/gamma_productl.c b/sysdeps/ieee754/ldbl-128/gamma_productl.c
index 849b57d..6f3f05e 100644
--- a/sysdeps/ieee754/ldbl-128/gamma_productl.c
+++ b/sysdeps/ieee754/ldbl-128/gamma_productl.c
@@ -20,35 +20,7 @@ 
 #include <math_private.h>
 #include <float.h>
 
-/* Calculate X * Y exactly and store the result in *HI + *LO.  It is
-   given that the values are small enough that no overflow occurs and
-   large enough (or zero) that no underflow occurs.  */
-
-static inline void
-mul_split (long double *hi, long double *lo, long double x, long double y)
-{
-#ifdef __FP_FAST_FMAL
-  /* Fast built-in fused multiply-add.  */
-  *hi = x * y;
-  *lo = __builtin_fmal (x, y, -*hi);
-#elif defined FP_FAST_FMAL
-  /* Fast library fused multiply-add, compiler before GCC 4.6.  */
-  *hi = x * y;
-  *lo = __fmal (x, y, -*hi);
-#else
-  /* Apply Dekker's algorithm.  */
-  *hi = x * y;
-# define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
-  long double x1 = x * C;
-  long double y1 = y * C;
-# undef C
-  x1 = (x - x1) + x1;
-  y1 = (y - y1) + y1;
-  long double x2 = x - x1;
-  long double y2 = y - y1;
-  *lo = (((x1 * y1 - *hi) + x1 * y2) + x2 * y1) + x2 * y2;
-#endif
-}
+#include "mul_splitl.h"
 
 /* Compute the product of X + X_EPS, X + X_EPS + 1, ..., X + X_EPS + N
    - 1, in the form R * (1 + *EPS) where the return value R is an
diff --git a/sysdeps/ieee754/ldbl-128/lgamma_productl.c b/sysdeps/ieee754/ldbl-128/lgamma_productl.c
index de67cbe..625e54c 100644
--- a/sysdeps/ieee754/ldbl-128/lgamma_productl.c
+++ b/sysdeps/ieee754/ldbl-128/lgamma_productl.c
@@ -20,35 +20,7 @@ 
 #include <math_private.h>
 #include <float.h>
 
-/* Calculate X * Y exactly and store the result in *HI + *LO.  It is
-   given that the values are small enough that no overflow occurs and
-   large enough (or zero) that no underflow occurs.  */
-
-static void
-mul_split (long double *hi, long double *lo, long double x, long double y)
-{
-#ifdef __FP_FAST_FMAL
-  /* Fast built-in fused multiply-add.  */
-  *hi = x * y;
-  *lo = __builtin_fmal (x, y, -*hi);
-#elif defined FP_FAST_FMAL
-  /* Fast library fused multiply-add, compiler before GCC 4.6.  */
-  *hi = x * y;
-  *lo = __fmal (x, y, -*hi);
-#else
-  /* Apply Dekker's algorithm.  */
-  *hi = x * y;
-# define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
-  long double x1 = x * C;
-  long double y1 = y * C;
-# undef C
-  x1 = (x - x1) + x1;
-  y1 = (y - y1) + y1;
-  long double x2 = x - x1;
-  long double y2 = y - y1;
-  *lo = (((x1 * y1 - *hi) + x1 * y2) + x2 * y1) + x2 * y2;
-#endif
-}
+#include "mul_splitl.h"
 
 /* Compute the product of 1 + (T / (X + X_EPS)), 1 + (T / (X + X_EPS +
    1)), ..., 1 + (T / (X + X_EPS + N - 1)), minus 1.  X is such that
diff --git a/sysdeps/ieee754/ldbl-128/mul_splitl.h b/sysdeps/ieee754/ldbl-128/mul_splitl.h
new file mode 100644
index 0000000..dee572d
--- /dev/null
+++ b/sysdeps/ieee754/ldbl-128/mul_splitl.h
@@ -0,0 +1,55 @@ 
+/* Compute full X * Y.
+   Copyright (C) 2013-2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _MUL_SPLIT_H
+#define _MUL_SPLIT_H
+
+/* Choose an appropriate fused multiply-add function, if any.  */
+#ifdef __FP_FAST_FMAL
+# define __FMAL __builtin_fmal
+#elif defined FP_FAST_FMAL
+# define __FMAL __fmal
+#endif
+
+/* Calculate X * Y exactly and store the result in *HI + *LO.  It is
+   given that the values are small enough that no overflow occurs and
+   large enough (or zero) that no underflow occurs.  */
+
+static inline void
+mul_split (long double *hi, long double *lo, long double x, long double y)
+{
+#ifdef __FMAL
+  /* Fast library fused multiply-add, if supported.  */
+  *hi = x * y;
+  *lo = __FMAL (x, y, -*hi);
+#else
+  /* Apply Dekker's algorithm.  */
+  *hi = x * y;
+# define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
+  long double x1 = x * C;
+  long double y1 = y * C;
+# undef C
+  x1 = (x - x1) + x1;
+  y1 = (y - y1) + y1;
+  long double x2 = x - x1;
+  long double y2 = y - y1;
+  *lo = (((x1 * y1 - *hi) + x1 * y2) + x2 * y1) + x2 * y2;
+#endif
+}
+
+#endif /* _MUL_SPLIT_H */
diff --git a/sysdeps/ieee754/ldbl-128/x2y2m1l.c b/sysdeps/ieee754/ldbl-128/x2y2m1l.c
index 733742d..f0fe68e 100644
--- a/sysdeps/ieee754/ldbl-128/x2y2m1l.c
+++ b/sysdeps/ieee754/ldbl-128/x2y2m1l.c
@@ -21,6 +21,8 @@ 
 #include <float.h>
 #include <stdlib.h>
 
+#include "mul_splitl.h"
+
 /* Calculate X + Y exactly and store the result in *HI + *LO.  It is
    given that |X| >= |Y| and the values are small enough that no
    overflow occurs.  */
@@ -33,36 +35,6 @@  add_split (long double *hi, long double *lo, long double x, long double y)
   *lo = (x - *hi) + y;
 }
 
-/* Calculate X * Y exactly and store the result in *HI + *LO.  It is
-   given that the values are small enough that no overflow occurs and
-   large enough (or zero) that no underflow occurs.  */
-
-static inline void
-mul_split (long double *hi, long double *lo, long double x, long double y)
-{
-#ifdef __FP_FAST_FMAL
-  /* Fast built-in fused multiply-add.  */
-  *hi = x * y;
-  *lo = __builtin_fmal (x, y, -*hi);
-#elif defined FP_FAST_FMAL
-  /* Fast library fused multiply-add, compiler before GCC 4.6.  */
-  *hi = x * y;
-  *lo = __fmal (x, y, -*hi);
-#else
-  /* Apply Dekker's algorithm.  */
-  *hi = x * y;
-# define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
-  long double x1 = x * C;
-  long double y1 = y * C;
-# undef C
-  x1 = (x - x1) + x1;
-  y1 = (y - y1) + y1;
-  long double x2 = x - x1;
-  long double y2 = y - y1;
-  *lo = (((x1 * y1 - *hi) + x1 * y2) + x2 * y1) + x2 * y2;
-#endif
-}
-
 /* Compare absolute values of floating-point values pointed to by P
    and Q for qsort.  */