From patchwork Wed Jun 17 15:28:27 2015
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Wilco Dijkstra <wdijkstr@arm.com>
X-Patchwork-Id: 7215
Received: (qmail 93652 invoked by alias); 17 Jun 2015 15:28:37 -0000
Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm
Precedence: bulk
List-Id: <libc-alpha.sourceware.org>
List-Unsubscribe: <mailto:libc-alpha-unsubscribe-##L=##H@sourceware.org>
List-Subscribe: <mailto:libc-alpha-subscribe@sourceware.org>
List-Archive: <http://sourceware.org/ml/libc-alpha/>
List-Post: <mailto:libc-alpha@sourceware.org>
List-Help: <mailto:libc-alpha-help@sourceware.org>,
	<http://sourceware.org/ml/#faqs>
Sender: libc-alpha-owner@sourceware.org
Delivered-To: mailing list libc-alpha@sourceware.org
Received: (qmail 93640 invoked by uid 89); 17 Jun 2015 15:28:36 -0000
Authentication-Results: sourceware.org; auth=none
X-Virus-Found: No
X-Spam-SWARE-Status: No, score=-0.9 required=5.0 tests=AWL, BAYES_20,
	SPF_PASS autolearn=ham version=3.3.2
X-HELO: eu-smtp-delivery-143.mimecast.com
From: "Wilco Dijkstra" <wdijkstr@arm.com>
To: "GNU C Library" <libc-alpha@sourceware.org>
Subject: [PATCH] Add math-inline benchmark
Date: Wed, 17 Jun 2015 16:28:27 +0100
Message-ID: <001c01d0a912$42357710$c6a06530$@com>
MIME-Version: 1.0
X-MC-Unique: IHpp8OS2S3-mBGXu0dxWOA-1

Hi,

Due to popular demand, here is a new benchmark that tests isinf, isnan, 
isnormal, isfinite and fpclassify. It uses 2 arrays with 1024 doubles, 
one with 99% finite FP numbers (10% zeroes, 10% negative) and 1% inf/NaN,
the other with 50% inf, and 50% Nan.

Results shows that using the GCC built-ins in math.h will give huge speedups
due to avoiding explict calls, PLT indirection to execute a function with
3-4 instructions. The GCC builtins have similar performance as the existing 
math_private inlines for __isnan, __finite and __isinf_ns.

OK for commit?

ChangeLog:
2015-06-17  Wilco Dijkstra  <wdijkstr@arm.com>

	* benchtests/Makefile: Add bench-math-inlines.c.
	* benchtests/bench-math-inlines.c: New benchmark.
---
 benchtests/Makefile             |  14 +--
 benchtests/bench-math-inlines.c | 203 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 211 insertions(+), 6 deletions(-)
 create mode 100644 benchtests/bench-math-inlines.c

diff --git a/benchtests/Makefile b/benchtests/Makefile
index 8e615e5..3c20180 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -30,12 +30,13 @@ bench-pthread := pthread_once
 bench := $(bench-math) $(bench-pthread)
 
 # String function benchmarks.
-string-bench := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \
-		mempcpy memset rawmemchr stpcpy stpncpy strcasecmp strcasestr \
-		strcat strchr strchrnul strcmp strcpy strcspn strlen \
-		strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \
-		strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \
-		strcoll
+string-bench := bcopy bzero math-inlines memccpy memchr memcmp memcpy memmem \
+		memmove mempcpy memset rawmemchr stpcpy stpncpy strcasecmp \
+		strcasestr strcat strchr strchrnul strcmp strcpy strcspn \
+		strlen strncasecmp strncat strncmp strncpy strnlen strpbrk \
+		strrchr strspn strstr strcpy_chk stpcpy_chk memrchr strsep \
+		strtok strcoll
+
 string-bench-all := $(string-bench)
 
 # We have to generate locales
@@ -58,6 +59,7 @@ CFLAGS-bench-ffsll.c += -fno-builtin
 bench-malloc := malloc-thread
 
 $(addprefix $(objpfx)bench-,$(bench-math)): $(libm)
+$(addprefix $(objpfx)bench-,math-inlines): $(libm)
 $(addprefix $(objpfx)bench-,$(bench-pthread)): $(shared-thread-library)
 $(objpfx)bench-malloc-thread: $(shared-thread-library)
 
diff --git a/benchtests/bench-math-inlines.c b/benchtests/bench-math-inlines.c
new file mode 100644
index 0000000..c21a3d3
--- /dev/null
+++ b/benchtests/bench-math-inlines.c
@@ -0,0 +1,203 @@
+/* Measure math inline functions.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SIZE 1024
+#define TEST_MAIN
+#define TEST_NAME "math-inlines"
+#include "bench-string.h"
+
+#include <stdlib.h>
+#include <math.h>
+#include <stdint.h>
+
+#define BOOLTEST(func)					  \
+int							  \
+func ## _t (volatile double *p, size_t n, size_t iters)   \
+{							  \
+  int i, j;						  \
+  int res = 0;						  \
+  for (j = 0; j < iters; j++)				  \
+    for (i = 0; i < n; i++)				  \
+      if (func (p[i] * 2.0)) res++;			  \
+  return res;						  \
+}
+
+#define VALUETEST(func)					  \
+int							  \
+func ## _t (volatile double *p, size_t n, size_t iters)	  \
+{							  \
+  int i, j;						  \
+  int res = 0;						  \
+  for (j = 0; j < iters; j++)				  \
+    for (i = 0; i < n; i++)				  \
+      res += func (p[i] * 2.0);				  \
+  return res;						  \
+}
+
+typedef union
+{
+  double value;
+  uint64_t word;
+} ieee_double_shape_type;
+
+#define EXTRACT_WORDS64(i,d)                              \
+do {                                                      \
+  ieee_double_shape_type gh_u;                            \
+  gh_u.value = (d);                                       \
+  (i) = gh_u.word;                                        \
+} while (0)
+
+/* Explicit inlines similar to math_private.h versions.  */
+
+extern __always_inline int
+__isnan_inl (double d)
+{
+  uint64_t di;
+  EXTRACT_WORDS64 (di, d);
+  return (di & 0x7fffffffffffffffull) > 0x7ff0000000000000ull;
+}
+
+extern __always_inline int
+__isinf_inl (double x)
+{
+  uint64_t ix;
+  EXTRACT_WORDS64 (ix,x);
+  if ((ix << 1) != 0xffe0000000000000ull)
+    return 0;
+  return (int)(ix >> 32);
+}
+
+extern __always_inline int
+__finite_inl (double d)
+{
+  uint64_t di;
+  EXTRACT_WORDS64 (di, d);
+  return (di & 0x7fffffffffffffffull) < 0x7ff0000000000000ull;
+}
+
+/* Explicit inline similar to existing math.h implementation.  */
+
+#define __isnormal_inl(X) (__fpclassify (X) == FP_NORMAL)
+#define __isnormal_inl2(X) (fpclassify (X) == FP_NORMAL)
+
+/* Test fpclassify with use of only 2 of the 5 results.  */
+
+extern __always_inline int
+__fpclassify_test1 (double d)
+{
+  int cl = fpclassify (d);
+  return cl == FP_NAN || cl == FP_INFINITE;
+}
+
+extern __always_inline int
+__fpclassify_test2 (double d)
+{
+  return isnan (d) || isinf (d);
+}
+
+/* Create test functions for each possibility.  */
+
+BOOLTEST (__isnan)
+BOOLTEST (isnan)
+BOOLTEST (__isnan_inl)
+
+BOOLTEST (__isinf)
+BOOLTEST (isinf)
+BOOLTEST (__isinf_inl)
+
+BOOLTEST (__finite)
+BOOLTEST (isfinite)
+BOOLTEST (__finite_inl)
+
+BOOLTEST (isnormal)
+BOOLTEST (__isnormal_inl)
+BOOLTEST (__isnormal_inl2)
+
+VALUETEST (fpclassify)
+VALUETEST (__fpclassify)
+BOOLTEST (__fpclassify_test1)
+BOOLTEST (__fpclassify_test2)
+
+IMPL (isnan_t, 0)
+IMPL (__isnan_t, 1)
+IMPL (__isnan_inl_t, 2)
+IMPL (isinf_t, 3)
+IMPL (__isinf_t, 4)
+IMPL (__isinf_inl_t, 5)
+IMPL (isfinite_t, 6)
+IMPL (__finite_t, 7)
+IMPL (__finite_inl_t, 8)
+IMPL (isnormal_t, 9)
+IMPL (__isnormal_inl_t, 10)
+IMPL (__isnormal_inl2_t, 11)
+IMPL (fpclassify_t, 12)
+IMPL (__fpclassify_t, 13)
+IMPL (__fpclassify_test1_t, 14)
+IMPL (__fpclassify_test2_t, 15)
+
+typedef int (*proto_t) (volatile double *p, size_t n, size_t iters);
+
+static void
+do_one_test (impl_t *impl, volatile double *arr, size_t len)
+{
+  size_t iters = INNER_LOOP_ITERS * 10;
+  timing_t start, stop, cur;
+
+  TIMING_NOW (start);
+  CALL (impl, arr, len, iters);
+  TIMING_NOW (stop);
+  TIMING_DIFF (cur, start, stop);
+
+  TIMING_PRINT_MEAN ((double) cur, (double) iters);
+}
+
+static volatile double arr1[SIZE];
+static volatile double arr2[SIZE];
+
+int
+test_main (void)
+{
+  size_t i;
+
+  test_init ();
+
+  /* Create 2 test arrays, one with 10% zeroes, 10% negative values,
+     79% positive values and 1% infinity/NaN.  The other contains
+     50% inf, 50% NaN.  */
+
+  for (i = 0; i < SIZE; i++)
+    {
+      int x = rand () & 255;
+      arr1[i] = (x < 25) ? 0.0 : ((x < 50) ? -1 : 100);
+      if (x == 255) arr1[i] = __builtin_inf ();
+      if (x == 254) arr1[i] = __builtin_nan ("0");
+      arr2[i] = (x < 128) ? __builtin_inf () : __builtin_nan ("0");
+    }
+
+  FOR_EACH_IMPL (impl, 0)
+    {
+      printf ("%20s: ", impl->name);
+      do_one_test (impl, arr1, SIZE);
+      do_one_test (impl, arr2, SIZE);
+      putchar ('\n');
+    }
+
+  return ret;
+}
+
+#include "../test-skeleton.c"