Patchwork [24/28] powerpc: hypot refactor and optimization

login
register
mail settings
Submitter Adhemerval Zanella Netto
Date March 29, 2019, 1:35 p.m.
Message ID <20190329133529.22523-25-adhemerval.zanella@linaro.org>
Download mbox | patch
Permalink /patch/32085/
State New
Headers show

Comments

Adhemerval Zanella Netto - March 29, 2019, 1:35 p.m.
The powerpc hypot is slight optimized by:

  - Commit 8df4e219e43, both isnan and isinf are always inlined and thus
    the check TEST_INF_NAN does not make sense anymore.  The generic
    check for POWER7 should be faster on all powerpc configuration.

  - Two reduntant checks (for y == 0 and y > two60factor && (x / y) > two60)
    are removed.

Both changes leads to unrequired ifunc especialization for power7 and
thus they are removed.  Finally The code is also cleanup a bit by inlining
the constants floating points.

The performance changes using the hypot benchtests are:

  - POWER9 without patch:
    "hypot": {
     "overflow": {
      "duration": 4.98585e+09,
      "iterations": 4.84932e+08,
      "max": 46.551,
      "min": 10.229,
      "mean": 10.2815
     },
     "higher_two500": {
      "duration": 5.00192e+09,
      "iterations": 4.24843e+08,
      "max": 33.319,
      "min": 11.606,
      "mean": 11.7736
     },
     "subnormal": {
      "duration": 5.0075e+09,
      "iterations": 4.06792e+08,
      "max": 22.178,
      "min": 12.15,
      "mean": 12.3097
     },
     "less_two500": {
      "duration": 5.00685e+09,
      "iterations": 4.08772e+08,
      "max": 22.784,
      "min": 12.052,
      "mean": 12.2485
     },
     "default": {
      "duration": 5.06002e+09,
      "iterations": 4.09894e+08,
      "max": 20.648,
      "min": 11.874,
      "mean": 12.3447
     }
    }

  - POWER9 with patch:
    "hypot": {
     "overflow": {
      "duration": 4.91848e+09,
      "iterations": 7.28039e+08,
      "max": 47.958,
      "min": 6.436,
      "mean": 6.75579
     },
     "higher_two500": {
      "duration": 4.9359e+09,
      "iterations": 6.63376e+08,
      "max": 20.783,
      "min": 7.321,
      "mean": 7.44057
     },
     "subnormal": {
      "duration": 4.9479e+09,
      "iterations": 6.19772e+08,
      "max": 18.856,
      "min": 7.817,
      "mean": 7.98341
     },
     "less_two500": {
      "duration": 4.94275e+09,
      "iterations": 6.3889e+08,
      "max": 17.452,
      "min": 7.597,
      "mean": 7.73647
     },
     "default": {
      "duration": 5.03645e+09,
      "iterations": 5.70718e+08,
      "max": 18.904,
      "min": 8.55,
      "mean": 8.82476
     }
    }

  - POWER7 without patch
    "hypot": {
     "overflow": {
      "duration": 4.86637e+09,
      "iterations": 6.43196e+08,
      "max": 53.958,
      "min": 7.328,
      "mean": 7.56592
     },
     "higher_two500": {
      "duration": 4.99842e+09,
      "iterations": 3.11012e+08,
      "max": 78.227,
      "min": 15.696,
      "mean": 16.0715
     },
     "subnormal": {
      "duration": 4.99841e+09,
      "iterations": 3.08935e+08,
      "max": 51.392,
      "min": 15.983,
      "mean": 16.1795
     },
     "less_two500": {
      "duration": 5.00108e+09,
      "iterations": 2.99464e+08,
      "max": 73.247,
      "min": 16.416,
      "mean": 16.7001
     },
     "default": {
      "duration": 5.04645e+09,
      "iterations": 3.52608e+08,
      "max": 70.073,
      "min": 13.38,
      "mean": 14.3118
     }
    }

  - POWER7 with patch
    "hypot": {
     "overflow": {
      "duration": 4.80785e+09,
      "iterations": 8.00001e+08,
      "max": 66.262,
      "min": 5.888,
      "mean": 6.00981
     },
     "higher_two500": {
      "duration": 4.9859e+09,
      "iterations": 3.39449e+08,
      "max": 5148.44,
      "min": 14.539,
      "mean": 14.6882
     },
     "subnormal": {
      "duration": 4.9905e+09,
      "iterations": 3.28874e+08,
      "max": 64.905,
      "min": 14.971,
      "mean": 15.1745
     },
     "less_two500": {
      "duration": 4.99494e+09,
      "iterations": 3.19755e+08,
      "max": 103.696,
      "min": 14.972,
      "mean": 15.6211
     },
     "default": {
      "duration": 5.03951e+09,
      "iterations": 4.02502e+08,
      "max": 61.008,
      "min": 12.368,
      "mean": 12.5205
     }
    }

Checked on powerpc-linux-gnu (built without --with-cpu, with
--with-cpu=power4 and with --with-cpu=power5+ and --disable-multi-arch),
powerpc64-linux-gnu (built without --with-cp and with --with-cpu=power5+
and --disable-multi-arch).

	* sysdeps/powerpc/fpu/e_hypot.c (two60, two500, two600, two1022,
	twoM500, twoM600, two60factor, pdnum): Remove.
	(TEST_INFO_NAN): Remove macro.
	(__ieee754_hypot): Replace static variables with inline definition,
	remove ununsed branches.
	* sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
	(libm-sysdep_routines): Remove e_hypot-* objects.
	(CFLAGS-e_hypot-power7.c, CFLAGS-e_hypotf-power7.c): Remove rule.
	* sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-power7.c: Remove
	file.
	* sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-ppc64.c: Likewise.
	* sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot.c: Likewise.
	* sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-power7.c: Likewise.
	* sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-ppc64.c: Likewise.
	* sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf.c: Likewise.
---
 sysdeps/powerpc/fpu/e_hypot.c                 | 96 +++++--------------
 .../powerpc/powerpc64/fpu/multiarch/Makefile  |  5 +-
 .../powerpc64/fpu/multiarch/e_hypot-power7.c  | 19 ----
 .../powerpc64/fpu/multiarch/e_hypot-ppc64.c   | 26 -----
 .../powerpc/powerpc64/fpu/multiarch/e_hypot.c | 33 -------
 .../powerpc64/fpu/multiarch/e_hypotf-power7.c | 19 ----
 .../powerpc64/fpu/multiarch/e_hypotf-ppc64.c  | 26 -----
 .../powerpc64/fpu/multiarch/e_hypotf.c        | 33 -------
 8 files changed, 23 insertions(+), 234 deletions(-)
 delete mode 100644 sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-power7.c
 delete mode 100644 sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-ppc64.c
 delete mode 100644 sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot.c
 delete mode 100644 sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-power7.c
 delete mode 100644 sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-ppc64.c
 delete mode 100644 sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf.c
Joseph Myers - April 1, 2019, 8:14 p.m.
On Fri, 29 Mar 2019, Adhemerval Zanella wrote:

> +  if ((isinf(x) || isinf(y))
> +      && !issignaling (x) && !issignaling (y))
> +    return INFINITY;
> +  if (isnan(x) || isnan(y))
> +    return x + y;

Missign spaces before '('.
Adhemerval Zanella Netto - April 3, 2019, 1:08 a.m.
On 02/04/2019 03:14, Joseph Myers wrote:
> On Fri, 29 Mar 2019, Adhemerval Zanella wrote:
> 
>> +  if ((isinf(x) || isinf(y))
>> +      && !issignaling (x) && !issignaling (y))
>> +    return INFINITY;
>> +  if (isnan(x) || isnan(y))
>> +    return x + y;
> 
> Missign spaces before '('.
> 

Fixed locally.

Patch

diff --git a/sysdeps/powerpc/fpu/e_hypot.c b/sysdeps/powerpc/fpu/e_hypot.c
index 039e5be430..adbc7d28f6 100644
--- a/sysdeps/powerpc/fpu/e_hypot.c
+++ b/sysdeps/powerpc/fpu/e_hypot.c
@@ -22,15 +22,6 @@ 
 #include <math-underflow.h>
 #include <stdint.h>
 
-static const double two60   = 1.152921504606847e+18;
-static const double two500  = 3.2733906078961419e+150;
-static const double two600  = 4.149515568880993e+180;
-static const double two1022 = 4.49423283715579e+307;
-static const double twoM500 = 3.054936363499605e-151;
-static const double twoM600 = 2.4099198651028841e-181;
-static const double two60factor = 1.5592502418239997e+290;
-static const double pdnum   = 2.225073858507201e-308;
-
 /* __ieee754_hypot(x,y)
  *
  * This a FP only version without any FP->INT conversion.
@@ -39,95 +30,52 @@  static const double pdnum   = 2.225073858507201e-308;
  * is needed.
  */
 
-#ifdef _ARCH_PWR7
-/* POWER7 isinf and isnan optimization are fast. */
-# define TEST_INF_NAN(x, y)                                       \
-   if ((isinf(x) || isinf(y))					  \
-       && !issignaling (x) && !issignaling (y))			  \
-       return INFINITY;                                           \
-   if (isnan(x) || isnan(y))                                      \
-       return x + y;
-# else
-/* For POWER6 and below isinf/isnan triggers LHS and PLT calls are
- * costly (especially for POWER6). */
-# define GET_TW0_HIGH_WORD(d1,d2,i1,i2)                           \
- do {                                                             \
-   ieee_double_shape_type gh_u1;                                  \
-   ieee_double_shape_type gh_u2;                                  \
-   gh_u1.value = (d1);                                            \
-   gh_u2.value = (d2);                                            \
-   (i1) = gh_u1.parts.msw & 0x7fffffff;                           \
-   (i2) = gh_u2.parts.msw & 0x7fffffff;                           \
- } while (0)
-
-# define TEST_INF_NAN(x, y)                                      \
- do {                                                            \
-   uint32_t hx, hy;                                              \
-   GET_TW0_HIGH_WORD(x, y, hx, hy);                              \
-   if (hy > hx) {                                                \
-     uint32_t ht = hx; hx = hy; hy = ht;                         \
-   }                                                             \
-   if (hx >= 0x7ff00000) {                                       \
-     if ((hx == 0x7ff00000 || hy == 0x7ff00000)			 \
-	 && !issignaling (x) && !issignaling (y))		 \
-       return INFINITY;                                          \
-     return x + y;						 \
-   }                                                             \
- } while (0)
-
-#endif
-
-
 double
 __ieee754_hypot (double x, double y)
 {
+  if ((isinf(x) || isinf(y))
+      && !issignaling (x) && !issignaling (y))
+    return INFINITY;
+  if (isnan(x) || isnan(y))
+    return x + y;
+
   x = fabs (x);
   y = fabs (y);
 
-  TEST_INF_NAN (x, y);
-
   if (y > x)
     {
       double t = x;
       x = y;
       y = t;
     }
-  if (y == 0.0)
-    return x;
+
   /* if y is higher enough, y * 2^60 might overflow. The tests if
      y >= 1.7976931348623157e+308/2^60 (two60factor) and uses the
      appropriate check to avoid the overflow exception generation.  */
-  if (y > two60factor)
-    {
-      if ((x / y) > two60)
-	return x + y;
-    }
-  else
-    {
-      if (x > (y * two60))
-	return x + y;
-    }
-  if (x > two500)
+  if (y <= 0x1.fffffffffffffp+963 && x > (y * 0x1p+60))
+    return x + y;
+
+  if (x > 0x1p+500)
     {
-      x *= twoM600;
-      y *= twoM600;
-      return sqrt (x * x + y * y) / twoM600;
+      x *= 0x1p-600;
+      y *= 0x1p-600;
+      return sqrt (x * x + y * y) / 0x1p-600;
     }
-  if (y < twoM500)
+  if (y < 0x1p-500)
     {
-      if (y <= pdnum)
+      if (y <= 0x0.fffffffffffffp-1022)
 	{
-	  x *= two1022;
-	  y *= two1022;
-	  double ret = sqrt (x * x + y * y) / two1022;
+	  x *= 0x1p+1022;
+	  y *= 0x1p+1022;
+	  double ret = sqrt (x * x + y * y) / 0x1p+1022;
 	  math_check_force_underflow_nonneg (ret);
 	  return ret;
 	}
       else
 	{
-	  x *= two600;
-	  y *= two600;
-	  return sqrt (x * x + y * y) / two600;
+	  x *= 0x1p+600;
+	  y *= 0x1p+600;
+	  return sqrt (x * x + y * y) / 0x1p+600;
 	}
     }
   return sqrt (x * x + y * y);
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
index 118865a1a9..534d5a7133 100644
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
@@ -8,8 +8,7 @@  sysdep_calls := s_modf-power5+ s_modf-ppc64 \
 sysdep_routines += $(sysdep_calls)
 libm-sysdep_routines += s_logb-power7 s_logbf-power7 \
 			s_logbl-power7 s_logb-ppc64 s_logbf-ppc64 \
-			s_logbl-ppc64 e_hypot-ppc64 \
-			e_hypot-power7 e_hypotf-ppc64 e_hypotf-power7 \
+			s_logbl-ppc64 \
 			$(sysdep_calls:s_%=m_%)
 
 CFLAGS-s_logbf-power7.c = -mcpu=power7
@@ -17,8 +16,6 @@  CFLAGS-s_logbl-power7.c = -mcpu=power7
 CFLAGS-s_logb-power7.c = -mcpu=power7
 CFLAGS-s_modf-power5+.c = -mcpu=power5+
 CFLAGS-s_modff-power5+.c = -mcpu=power5+
-CFLAGS-e_hypot-power7.c = -mcpu=power7
-CFLAGS-e_hypotf-power7.c = -mcpu=power7
 
 # These files quiet sNaNs in a way that is optimized away without
 # -fsignaling-nans.
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-power7.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-power7.c
deleted file mode 100644
index 69818d8438..0000000000
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-power7.c
+++ /dev/null
@@ -1,19 +0,0 @@ 
-/* __ieee_hypot() POWER7 version.
-   Copyright (C) 2013-2019 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypot-power7.c>
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-ppc64.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-ppc64.c
deleted file mode 100644
index da1e80f0d6..0000000000
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot-ppc64.c
+++ /dev/null
@@ -1,26 +0,0 @@ 
-/* __ieee_hypot() PowerPC64 version.
-   Copyright (C) 2013-2019 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <math.h>
-
-#undef strong_alias
-#define strong_alias(a, b)
-
-#define __ieee754_hypot __ieee754_hypot_ppc64
-
-#include <sysdeps/powerpc/fpu/e_hypot.c>
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot.c
deleted file mode 100644
index 3bd04e9517..0000000000
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypot.c
+++ /dev/null
@@ -1,33 +0,0 @@ 
-/* Multiple versions of ieee754_hypot.
-   Copyright (C) 2013-2019 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <math.h>
-#include <math_private.h>
-#include <math_ldbl_opt.h>
-#include <shlib-compat.h>
-#include "init-arch.h"
-
-extern __typeof (__ieee754_hypot) __ieee754_hypot_ppc64 attribute_hidden;
-extern __typeof (__ieee754_hypot) __ieee754_hypot_power7 attribute_hidden;
-
-libc_ifunc (__ieee754_hypot,
-	    (hwcap & PPC_FEATURE_ARCH_2_06)
-	    ? __ieee754_hypot_power7
-            : __ieee754_hypot_ppc64);
-
-strong_alias (__ieee754_hypot, __hypot_finite)
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-power7.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-power7.c
deleted file mode 100644
index 223947a617..0000000000
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-power7.c
+++ /dev/null
@@ -1,19 +0,0 @@ 
-/* __ieee_hypotf() POWER7 version.
-   Copyright (C) 2013-2019 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdeps/powerpc/powerpc32/power4/fpu/multiarch/e_hypotf-power7.c>
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-ppc64.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-ppc64.c
deleted file mode 100644
index 6d5d54bb79..0000000000
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf-ppc64.c
+++ /dev/null
@@ -1,26 +0,0 @@ 
-/* __ieee_hypot() PowerPC64 version.
-   Copyright (C) 2013-2019 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <math.h>
-
-#undef strong_alias
-#define strong_alias(a, b)
-
-#define __ieee754_hypotf __ieee754_hypotf_ppc64
-
-#include <sysdeps/powerpc/fpu/e_hypotf.c>
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf.c
deleted file mode 100644
index 02c0ab497f..0000000000
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/e_hypotf.c
+++ /dev/null
@@ -1,33 +0,0 @@ 
-/* Multiple versions of ieee754_hypot.
-   Copyright (C) 2013-2019 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <math.h>
-#include <math_private.h>
-#include <math_ldbl_opt.h>
-#include <shlib-compat.h>
-#include "init-arch.h"
-
-extern __typeof (__ieee754_hypotf) __ieee754_hypotf_ppc64 attribute_hidden;
-extern __typeof (__ieee754_hypotf) __ieee754_hypotf_power7 attribute_hidden;
-
-libc_ifunc (__ieee754_hypotf,
-	    (hwcap & PPC_FEATURE_ARCH_2_06)
-	    ? __ieee754_hypotf_power7
-            : __ieee754_hypotf_ppc64);
-
-strong_alias (__ieee754_hypotf, __hypotf_finite)