[v3] Add math-inline benchmark
Commit Message
Ping (correct version of patch attached)
> Wilco Dijkstra wrote:
> Add a benchmark for isinf/isnan/isnormal/isfinite/fpclassify. The test uses 2 arrays with 1024
> doubles, one with 99% finite FP numbers (10% zeroes, 10% negative) and 1% inf/NaN, the other
> with 50% inf, and 50% Nan.
>
> This version removes various tests that caused confusion and only leaves the existing GLIBC
> definitions and inlines for comparison with the GCC builtins. I changed the tests to not
> inline inside the loop and use a branch on the boolean result. The 64-bit immediates used by
> the GLIBC inlines seem very expensive on some microarchitectures, so this shows even more
> clearly that using the built-ins results in a significant performance gain (see x64 results
> below).
>
>
> ChangeLog:
> 2015-07-23 Wilco Dijkstra <wdijkstr@arm.com>
>
> * benchtests/Makefile: Add bench-math-inlines, link with libm.
> * benchtests/bench-math-inlines.c: New benchmark.
> * benchtests/bench-util.h: New file.
> * benchtests/bench-util.c: New file.
> * benchtests/bench-skeleton.c: Add include of bench-util.c/h.
>
>
> "__isnan_t": {
> "normal": {
> "duration": 5.32475e+06,
> "iterations": 500,
> "mean": 10649
> }
> },
> "__isnan_inl_t": {
> "normal": {
> "duration": 2.74296e+06,
> "iterations": 500,
> "mean": 5485
> }
> },
> "__isnan_builtin_t": {
> "normal": {
> "duration": 2.35589e+06,
> "iterations": 500,
> "mean": 4711
> }
> },
> "isnan_t": {
> "normal": {
> "duration": 2.25889e+06,
> "iterations": 500,
> "mean": 4517
> }
> },
> "__isinf_t": {
> "normal": {
> "duration": 5.7671e+06,
> "iterations": 500,
> "mean": 11534
> }
> },
> "__isinf_ns_t": {
> "normal": {
> "duration": 2.70963e+06,
> "iterations": 500,
> "mean": 5419
> }
> },
> "__isinf_ns_builtin_t": {
> "normal": {
> "duration": 2.70841e+06,
> "iterations": 500,
> "mean": 5416
> }
> },
> "__isinf_builtin_t": {
> "normal": {
> "duration": 2.26255e+06,
> "iterations": 500,
> "mean": 4525
> }
> },
> "isinf_t": {
> "normal": {
> "duration": 2.26428e+06,
> "iterations": 500,
> "mean": 4528
> }
> },
> "__finite_t": {
> "normal": {
> "duration": 5.82567e+06,
> "iterations": 500,
> "mean": 11651
> }
> },
> "__finite_inl_t": {
> "normal": {
> "duration": 3.2019e+06,
> "iterations": 500,
> "mean": 6403
> }
> },
> "__isfinite_builtin_t": {
> "normal": {
> "duration": 3.17892e+06,
> "iterations": 500,
> "mean": 6357
> }
> },
> "isfinite_t": {
> "normal": {
> "duration": 2.75324e+06,
> "iterations": 500,
> "mean": 5506
> }
> },
> "__isnormal_inl_t": {
> "normal": {
> "duration": 8.65908e+06,
> "iterations": 500,
> "mean": 17318
> }
> },
> "__isnormal_builtin_t": {
> "normal": {
> "duration": 3.68589e+06,
> "iterations": 500,
> "mean": 7371
> }
> },
> "isnormal_t": {
> "normal": {
> "duration": 3.76085e+06,
> "iterations": 500,
> "mean": 7521
> }
> },
> "__fpclassify_t": {
> "normal": {
> "duration": 6.63574e+06,
> "iterations": 500,
> "mean": 13271
> }
> },
> "__fpclassify_builtin_t": {
> "normal": {
> "duration": 4.48757e+06,
> "iterations": 500,
> "mean": 8975
> }
> },
> "fpclassify_t": {
> "normal": {
> "duration": 4.48033e+06,
> "iterations": 500,
> "mean": 8960
> }
> },
> "remainder_test1_t": {
> "normal": {
> "duration": 2.10051e+07,
> "iterations": 500,
> "mean": 42010
> }
> },
> "remainder_test2_t": {
> "normal": {
> "duration": 1.9933e+07,
> "iterations": 500,
> "mean": 39865
> }
> }
> }
---
benchtests/Makefile | 7 +-
benchtests/bench-math-inlines.c | 284 ++++++++++++++++++++++++++++++++++++++++
benchtests/bench-skeleton.c | 18 +--
benchtests/bench-util.c | 34 +++++
benchtests/bench-util.h | 28 ++++
5 files changed, 355 insertions(+), 16 deletions(-)
create mode 100644 benchtests/bench-math-inlines.c
create mode 100644 benchtests/bench-util.c
create mode 100644 benchtests/bench-util.h
Comments
Sorry, I assumed someone else (Carlos?) was reviewing this and then
forgot about it. I've put review comments inline. The major concern I
have is that you're duplicating contents of math_private.h to compare
it against the gcc builtins, which is fine as a first hack if you want
to avoid changing actual code for the tests, but may not be a good idea
in the long term. That is, there is a risk that the inlines may be
modified but this test doesn't realize that.
I don't mind this going in as a first revision (with other changes
recommended below) provided you're willing to remedy this situation in
the next iteration so that there is at least a failure in running the
benchtest if the definitions of the inlines change in any way.
Siddhesh
On Wed, 2015-08-19 at 14:41 +0100, Wilco Dijkstra wrote:
> diff --git a/benchtests/Makefile b/benchtests/Makefile
> index 8e615e5..91970f8 100644
> --- a/benchtests/Makefile
> +++ b/benchtests/Makefile
> @@ -36,6 +36,7 @@ string-bench := bcopy bzero memccpy memchr memcmp
> memcpy memmem memmove \
> strncasecmp strncat strncmp strncpy strnlen strpbrk
> strrchr \
> strspn strstr strcpy_chk stpcpy_chk memrchr strsep
> strtok \
> strcoll
> +
Unnecessary hunk.
> string-bench-all := $(string-bench)
>
> # We have to generate locales
> @@ -50,7 +51,10 @@ stdlib-bench := strtod
>
> stdio-common-bench := sprintf
>
> -benchset := $(string-bench-all) $(stdlib-bench) $(stdio-common
> -bench)
> +math-benchset := math-inlines
> +
> +benchset := $(string-bench-all) $(stdlib-bench) $(stdio-common
> -bench) \
> + $(math-benchset)
>
> CFLAGS-bench-ffs.c += -fno-builtin
> CFLAGS-bench-ffsll.c += -fno-builtin
> @@ -58,6 +62,7 @@ CFLAGS-bench-ffsll.c += -fno-builtin
> bench-malloc := malloc-thread
>
> $(addprefix $(objpfx)bench-,$(bench-math)): $(libm)
> +$(addprefix $(objpfx)bench-,$(math-benchset)): $(libm)
> $(addprefix $(objpfx)bench-,$(bench-pthread)): $(shared-thread
> -library)
> $(objpfx)bench-malloc-thread: $(shared-thread-library)
>
> diff --git a/benchtests/bench-math-inlines.c b/benchtests/bench-math
> -inlines.c
> new file mode 100644
> index 0000000..1251935
> --- /dev/null
> +++ b/benchtests/bench-math-inlines.c
> @@ -0,0 +1,284 @@
> +/* Measure math inline functions.
> + Copyright (C) 2015 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it
> and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later
> version.
> +
> + The GNU C Library is distributed in the hope that it will be
> useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +#define SIZE 1024
> +#define TEST_MAIN
> +#define TEST_NAME "math-inlines"
> +#define TEST_FUNCTION test_main ()
> +#include "bench-timing.h"
> +#include "json-lib.h"
> +#include "bench-util.h"
> +
> +#include <stdlib.h>
> +#include <math.h>
> +#include <stdint.h>
> +
> +#define BOOLTEST(func) \
> +int __attribute__((noinline)) \
Make this function and others below static.
> +func ## _f (double d, int i) \
> +{ \
> + if (func (d)) \
> + return (int)d + i; \
Space after (int).
> + else \
> + return 5; \
> +} \
> +int \
> +func ## _t (volatile double *p, size_t n, size_t iters) \
> +{ \
> + int i, j; \
> + int res = 0; \
> + for (j = 0; j < iters; j++) \
> + for (i = 0; i < n; i++) \
> + if (func ## _f (p[i] * 2.0, i)) \
Avoid implicit boolean coercion.
> + res += 5; \
> + return res; \
> +}
> +
> +#define VALUETEST(func) \
> +int __attribute__((noinline)) \
> +func ## _f (double d) \
> +{ \
> + return func (d); \
> +} \
> +int \
> +func ## _t (volatile double *p, size_t n, size_t iters) \
> +{ \
> + int i, j; \
> + int res = 0; \
> + for (j = 0; j < iters; j++) \
> + for (i = 0; i < n; i++) \
> + res += func ## _f (p[i] * 2.0); \
> + return res; \
> +}
> +
> +typedef union
> +{
> + double value;
> + uint64_t word;
> +} ieee_double_shape_type;
> +
> +#define EXTRACT_WORDS64(i,d) \
> +do { \
> + ieee_double_shape_type gh_u; \
> + gh_u.value = (d); \
> + (i) = gh_u.word; \
> +} while (0)
> +
> +/* Inlines similar to existing math_private.h versions. */
If seems a bit ugly to duplicate code from sysdeps/ieee754/dbl
-64/wordsize-64/math_private.h to test them. Can you look at another
way to massage math_private.h so that you can do something as simple as
include the file and rewrite the function names using macros? If it's
too hard or you want to do it in a second pass then please enhance the
comment to reflect that.
> +
> +extern __always_inline int
static __always_inline for this and everything else below.
> +__isnan_inl (double d)
> +{
> + uint64_t di;
> + EXTRACT_WORDS64 (di, d);
> + return (di & 0x7fffffffffffffffull) > 0x7ff0000000000000ull;
> +}
> +
> +extern __always_inline int
> +__isinf_ns (double d)
> +{
> + uint64_t di;
> + EXTRACT_WORDS64 (di, d);
> + return (di & 0x7fffffffffffffffull) == 0x7ff0000000000000ull;
> +}
> +extern __always_inline int
> +__finite_inl (double d)
> +{
> + uint64_t di;
> + EXTRACT_WORDS64 (di, d);
> + return (di & 0x7fffffffffffffffull) < 0x7ff0000000000000ull;
> +}
> +
> +#define __isnormal_inl(X) (__fpclassify (X) == FP_NORMAL)
> +
> +/* Inlines for the builtin functions. */
> +
> +#define __isnan_builtin(X) __builtin_isnan (X)
> +#define __isinf_ns_builtin(X) __builtin_isinf (X)
> +#define __isinf_builtin(X) __builtin_isinf_sign (X)
> +#define __isfinite_builtin(X) __builtin_isfinite (X)
> +#define __isnormal_builtin(X) __builtin_isnormal (X)
> +#define __fpclassify_builtin(X) __builtin_fpclassify (FP_NAN,
> FP_INFINITE, \
> + FP_NORMAL, FP_SUBNORMAL, FP_ZERO,
> (X))
> +
> +double __attribute ((noinline))
> +kernel_standard (double x, double y, int z)
> +{
> + return x * y + z;
> +}
> +
> +volatile double rem1 = 2.5;
> +
> +extern __always_inline double
> +remainder_test1 (double x)
> +{
> + double y = rem1;
> + if (((__builtin_expect (y == 0.0, 0) && !__isnan_inl (x))
> + || (__builtin_expect (__isinf_ns (x), 0) && !__isnan_inl
> (y))))
> + return kernel_standard (x, y, 10);
> +
> + return remainder (x, y);
> +}
> +
> +extern __always_inline double
> +remainder_test2 (double x)
> +{
> + double y = rem1;
> + if (((__builtin_expect (y == 0.0, 0) && !__builtin_isnan (x))
> + || (__builtin_expect (__builtin_isinf (x), 0) &&
> !__builtin_isnan (y))))
> + return kernel_standard (x, y, 10);
> +
> + return remainder (x, y);
> +}
> +
> +/* Create test functions for each possibility. */
> +
> +BOOLTEST (__isnan)
> +BOOLTEST (__isnan_inl)
> +BOOLTEST (__isnan_builtin)
> +BOOLTEST (isnan)
> +
> +BOOLTEST (__isinf)
> +BOOLTEST (__isinf_builtin)
> +BOOLTEST (__isinf_ns)
> +BOOLTEST (__isinf_ns_builtin)
> +BOOLTEST (isinf)
> +
> +BOOLTEST (__finite)
> +BOOLTEST (__finite_inl)
> +BOOLTEST (__isfinite_builtin)
> +BOOLTEST (isfinite)
> +
> +BOOLTEST (__isnormal_inl)
> +BOOLTEST (__isnormal_builtin)
> +BOOLTEST (isnormal)
> +
> +VALUETEST (__fpclassify)
> +VALUETEST (__fpclassify_builtin)
> +VALUETEST (fpclassify)
> +
> +VALUETEST (remainder_test1)
> +VALUETEST (remainder_test2)
> +
> +typedef int (*proto_t) (volatile double *p, size_t n, size_t iters);
> +
> +typedef struct
> +{
> + const char *name;
> + proto_t fn;
> +} impl_t;
> +
> +#define IMPL(name) { #name, name ## _t }
> +
> +impl_t test_list[] =
static impl_t test_list.
> +{
> + IMPL (__isnan),
> + IMPL (__isnan_inl),
> + IMPL (__isnan_builtin),
> + IMPL (isnan),
> +
> + IMPL (__isinf),
> + IMPL (__isinf_ns),
> + IMPL (__isinf_ns_builtin),
> + IMPL (__isinf_builtin),
> + IMPL (isinf),
> +
> + IMPL (__finite),
> + IMPL (__finite_inl),
> + IMPL (__isfinite_builtin),
> + IMPL (isfinite),
> +
> + IMPL (__isnormal_inl),
> + IMPL (__isnormal_builtin),
> + IMPL (isnormal),
> +
> + IMPL (__fpclassify),
> + IMPL (__fpclassify_builtin),
> + IMPL (fpclassify),
> +
> + IMPL (remainder_test1),
> + IMPL (remainder_test2)
> +};
> +
> +static void
> +do_one_test (json_ctx_t *json_ctx, proto_t test_fn, volatile double
> *arr,
> + size_t len, const char *testname)
> +{
> + size_t iters = 500;
> + timing_t start, stop, cur;
> +
> + json_attr_object_begin (json_ctx, testname);
> +
> + TIMING_NOW (start);
> + test_fn (arr, len, iters);
> + TIMING_NOW (stop);
> + TIMING_DIFF (cur, start, stop);
> +
> + json_attr_double (json_ctx, "duration", cur);
> + json_attr_double (json_ctx, "iterations", iters);
> + json_attr_double (json_ctx, "mean", cur / iters);
> + json_attr_object_end (json_ctx);
> +}
> +
> +static volatile double arr1[SIZE];
> +static volatile double arr2[SIZE];
> +
> +int
> +test_main (void)
> +{
> + json_ctx_t json_ctx;
> + size_t i;
> +
> + bench_start ();
> +
> + json_init (&json_ctx, 2, stdout);
> + json_attr_object_begin (&json_ctx, "math-inlines");
Use the TEST_NAME macro.
> +
> + /* Create 2 test arrays, one with 10% zeroes, 10% negative values,
> + 79% positive values and 1% infinity/NaN. The other contains
> + 50% inf, 50% NaN. */
How do you know that this is the ratio of generated inputs? Please
enhance the comment to describe that.
> +
> + for (i = 0; i < SIZE; i++)
> + {
> + int x = rand () & 255;
> + arr1[i] = (x < 25) ? 0.0 : ((x < 50) ? -1 : 100);
> + if (x == 255) arr1[i] = __builtin_inf ();
> + if (x == 254) arr1[i] = __builtin_nan ("0");
> + arr2[i] = (x < 128) ? __builtin_inf () : __builtin_nan ("0");
> + }
> +
> + for (i = 0; i < sizeof (test_list) / sizeof (test_list[0]); i++)
> + {
> + json_attr_object_begin (&json_ctx, test_list[i].name);
> + do_one_test (&json_ctx, test_list[i].fn, arr2, SIZE,
> "inf/nan");
> + json_attr_object_end (&json_ctx);
> + }
> +
> + for (i = 0; i < sizeof (test_list) / sizeof (test_list[0]); i++)
> + {
> + json_attr_object_begin (&json_ctx, test_list[i].name);
> + do_one_test (&json_ctx, test_list[i].fn, arr1, SIZE,
> "normal");
> + json_attr_object_end (&json_ctx);
> + }
> +
> + json_attr_object_end (&json_ctx);
> + return 0;
> +}
> +
> +#include "bench-util.c"
> +#include "../test-skeleton.c"
> diff --git a/benchtests/bench-skeleton.c b/benchtests/bench
> -skeleton.c
> index e357f0c..bc820df 100644
> --- a/benchtests/bench-skeleton.c
> +++ b/benchtests/bench-skeleton.c
> @@ -24,21 +24,9 @@
> #include <inttypes.h>
> #include "bench-timing.h"
> #include "json-lib.h"
> +#include "bench-util.h"
>
> -volatile unsigned int dontoptimize = 0;
> -
> -void
> -startup (void)
> -{
> - /* This loop should cause CPU to switch to maximal freqency.
> - This makes subsequent measurement more accurate. We need a
> side effect
> - to prevent the loop being deleted by compiler.
> - This should be enough to cause CPU to speed up and it is
> simpler than
> - running loop for constant time. This is used when user does not
> have root
> - access to set a constant freqency. */
> - for (int k = 0; k < 10000000; k++)
> - dontoptimize += 23 * dontoptimize + 2;
> -}
> +#include "bench-util.c"
>
> #define TIMESPEC_AFTER(a, b) \
> (((a).tv_sec == (b).tv_sec) ?
> \
> @@ -56,7 +44,7 @@ main (int argc, char **argv)
> if (argc == 2 && !strcmp (argv[1], "-d"))
> detailed = true;
>
> - startup();
> + bench_start ();
>
> memset (&runtime, 0, sizeof (runtime));
>
> diff --git a/benchtests/bench-util.h b/benchtests/bench-util.h
> new file mode 100644
> index 0000000..930cecc
> --- /dev/null
> +++ b/benchtests/bench-util.h
> @@ -0,0 +1,28 @@
> +/* Benchmark utility functions.
> + Copyright (C) 2015 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it
> and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later
> version.
> +
> + The GNU C Library is distributed in the hope that it will be
> useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +
> +#ifndef START_ITER
> +# define START_ITER (100000000)
> +#endif
> +
> +/* bench_start reduces the random variations due to frequency
> scaling by
> + executing a small loop with many memory accesses. START_ITER
> controls
> + the number of iterations. */
> +
> +void bench_start (void);
@@ -36,6 +36,7 @@ string-bench := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \
strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \
strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \
strcoll
+
string-bench-all := $(string-bench)
# We have to generate locales
@@ -50,7 +51,10 @@ stdlib-bench := strtod
stdio-common-bench := sprintf
-benchset := $(string-bench-all) $(stdlib-bench) $(stdio-common-bench)
+math-benchset := math-inlines
+
+benchset := $(string-bench-all) $(stdlib-bench) $(stdio-common-bench) \
+ $(math-benchset)
CFLAGS-bench-ffs.c += -fno-builtin
CFLAGS-bench-ffsll.c += -fno-builtin
@@ -58,6 +62,7 @@ CFLAGS-bench-ffsll.c += -fno-builtin
bench-malloc := malloc-thread
$(addprefix $(objpfx)bench-,$(bench-math)): $(libm)
+$(addprefix $(objpfx)bench-,$(math-benchset)): $(libm)
$(addprefix $(objpfx)bench-,$(bench-pthread)): $(shared-thread-library)
$(objpfx)bench-malloc-thread: $(shared-thread-library)
new file mode 100644
@@ -0,0 +1,284 @@
+/* Measure math inline functions.
+ Copyright (C) 2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define SIZE 1024
+#define TEST_MAIN
+#define TEST_NAME "math-inlines"
+#define TEST_FUNCTION test_main ()
+#include "bench-timing.h"
+#include "json-lib.h"
+#include "bench-util.h"
+
+#include <stdlib.h>
+#include <math.h>
+#include <stdint.h>
+
+#define BOOLTEST(func) \
+int __attribute__((noinline)) \
+func ## _f (double d, int i) \
+{ \
+ if (func (d)) \
+ return (int)d + i; \
+ else \
+ return 5; \
+} \
+int \
+func ## _t (volatile double *p, size_t n, size_t iters) \
+{ \
+ int i, j; \
+ int res = 0; \
+ for (j = 0; j < iters; j++) \
+ for (i = 0; i < n; i++) \
+ if (func ## _f (p[i] * 2.0, i)) \
+ res += 5; \
+ return res; \
+}
+
+#define VALUETEST(func) \
+int __attribute__((noinline)) \
+func ## _f (double d) \
+{ \
+ return func (d); \
+} \
+int \
+func ## _t (volatile double *p, size_t n, size_t iters) \
+{ \
+ int i, j; \
+ int res = 0; \
+ for (j = 0; j < iters; j++) \
+ for (i = 0; i < n; i++) \
+ res += func ## _f (p[i] * 2.0); \
+ return res; \
+}
+
+typedef union
+{
+ double value;
+ uint64_t word;
+} ieee_double_shape_type;
+
+#define EXTRACT_WORDS64(i,d) \
+do { \
+ ieee_double_shape_type gh_u; \
+ gh_u.value = (d); \
+ (i) = gh_u.word; \
+} while (0)
+
+/* Inlines similar to existing math_private.h versions. */
+
+extern __always_inline int
+__isnan_inl (double d)
+{
+ uint64_t di;
+ EXTRACT_WORDS64 (di, d);
+ return (di & 0x7fffffffffffffffull) > 0x7ff0000000000000ull;
+}
+
+extern __always_inline int
+__isinf_ns (double d)
+{
+ uint64_t di;
+ EXTRACT_WORDS64 (di, d);
+ return (di & 0x7fffffffffffffffull) == 0x7ff0000000000000ull;
+}
+extern __always_inline int
+__finite_inl (double d)
+{
+ uint64_t di;
+ EXTRACT_WORDS64 (di, d);
+ return (di & 0x7fffffffffffffffull) < 0x7ff0000000000000ull;
+}
+
+#define __isnormal_inl(X) (__fpclassify (X) == FP_NORMAL)
+
+/* Inlines for the builtin functions. */
+
+#define __isnan_builtin(X) __builtin_isnan (X)
+#define __isinf_ns_builtin(X) __builtin_isinf (X)
+#define __isinf_builtin(X) __builtin_isinf_sign (X)
+#define __isfinite_builtin(X) __builtin_isfinite (X)
+#define __isnormal_builtin(X) __builtin_isnormal (X)
+#define __fpclassify_builtin(X) __builtin_fpclassify (FP_NAN, FP_INFINITE, \
+ FP_NORMAL, FP_SUBNORMAL, FP_ZERO, (X))
+
+double __attribute ((noinline))
+kernel_standard (double x, double y, int z)
+{
+ return x * y + z;
+}
+
+volatile double rem1 = 2.5;
+
+extern __always_inline double
+remainder_test1 (double x)
+{
+ double y = rem1;
+ if (((__builtin_expect (y == 0.0, 0) && !__isnan_inl (x))
+ || (__builtin_expect (__isinf_ns (x), 0) && !__isnan_inl (y))))
+ return kernel_standard (x, y, 10);
+
+ return remainder (x, y);
+}
+
+extern __always_inline double
+remainder_test2 (double x)
+{
+ double y = rem1;
+ if (((__builtin_expect (y == 0.0, 0) && !__builtin_isnan (x))
+ || (__builtin_expect (__builtin_isinf (x), 0) && !__builtin_isnan (y))))
+ return kernel_standard (x, y, 10);
+
+ return remainder (x, y);
+}
+
+/* Create test functions for each possibility. */
+
+BOOLTEST (__isnan)
+BOOLTEST (__isnan_inl)
+BOOLTEST (__isnan_builtin)
+BOOLTEST (isnan)
+
+BOOLTEST (__isinf)
+BOOLTEST (__isinf_builtin)
+BOOLTEST (__isinf_ns)
+BOOLTEST (__isinf_ns_builtin)
+BOOLTEST (isinf)
+
+BOOLTEST (__finite)
+BOOLTEST (__finite_inl)
+BOOLTEST (__isfinite_builtin)
+BOOLTEST (isfinite)
+
+BOOLTEST (__isnormal_inl)
+BOOLTEST (__isnormal_builtin)
+BOOLTEST (isnormal)
+
+VALUETEST (__fpclassify)
+VALUETEST (__fpclassify_builtin)
+VALUETEST (fpclassify)
+
+VALUETEST (remainder_test1)
+VALUETEST (remainder_test2)
+
+typedef int (*proto_t) (volatile double *p, size_t n, size_t iters);
+
+typedef struct
+{
+ const char *name;
+ proto_t fn;
+} impl_t;
+
+#define IMPL(name) { #name, name ## _t }
+
+impl_t test_list[] =
+{
+ IMPL (__isnan),
+ IMPL (__isnan_inl),
+ IMPL (__isnan_builtin),
+ IMPL (isnan),
+
+ IMPL (__isinf),
+ IMPL (__isinf_ns),
+ IMPL (__isinf_ns_builtin),
+ IMPL (__isinf_builtin),
+ IMPL (isinf),
+
+ IMPL (__finite),
+ IMPL (__finite_inl),
+ IMPL (__isfinite_builtin),
+ IMPL (isfinite),
+
+ IMPL (__isnormal_inl),
+ IMPL (__isnormal_builtin),
+ IMPL (isnormal),
+
+ IMPL (__fpclassify),
+ IMPL (__fpclassify_builtin),
+ IMPL (fpclassify),
+
+ IMPL (remainder_test1),
+ IMPL (remainder_test2)
+};
+
+static void
+do_one_test (json_ctx_t *json_ctx, proto_t test_fn, volatile double *arr,
+ size_t len, const char *testname)
+{
+ size_t iters = 500;
+ timing_t start, stop, cur;
+
+ json_attr_object_begin (json_ctx, testname);
+
+ TIMING_NOW (start);
+ test_fn (arr, len, iters);
+ TIMING_NOW (stop);
+ TIMING_DIFF (cur, start, stop);
+
+ json_attr_double (json_ctx, "duration", cur);
+ json_attr_double (json_ctx, "iterations", iters);
+ json_attr_double (json_ctx, "mean", cur / iters);
+ json_attr_object_end (json_ctx);
+}
+
+static volatile double arr1[SIZE];
+static volatile double arr2[SIZE];
+
+int
+test_main (void)
+{
+ json_ctx_t json_ctx;
+ size_t i;
+
+ bench_start ();
+
+ json_init (&json_ctx, 2, stdout);
+ json_attr_object_begin (&json_ctx, "math-inlines");
+
+ /* Create 2 test arrays, one with 10% zeroes, 10% negative values,
+ 79% positive values and 1% infinity/NaN. The other contains
+ 50% inf, 50% NaN. */
+
+ for (i = 0; i < SIZE; i++)
+ {
+ int x = rand () & 255;
+ arr1[i] = (x < 25) ? 0.0 : ((x < 50) ? -1 : 100);
+ if (x == 255) arr1[i] = __builtin_inf ();
+ if (x == 254) arr1[i] = __builtin_nan ("0");
+ arr2[i] = (x < 128) ? __builtin_inf () : __builtin_nan ("0");
+ }
+
+ for (i = 0; i < sizeof (test_list) / sizeof (test_list[0]); i++)
+ {
+ json_attr_object_begin (&json_ctx, test_list[i].name);
+ do_one_test (&json_ctx, test_list[i].fn, arr2, SIZE, "inf/nan");
+ json_attr_object_end (&json_ctx);
+ }
+
+ for (i = 0; i < sizeof (test_list) / sizeof (test_list[0]); i++)
+ {
+ json_attr_object_begin (&json_ctx, test_list[i].name);
+ do_one_test (&json_ctx, test_list[i].fn, arr1, SIZE, "normal");
+ json_attr_object_end (&json_ctx);
+ }
+
+ json_attr_object_end (&json_ctx);
+ return 0;
+}
+
+#include "bench-util.c"
+#include "../test-skeleton.c"
@@ -24,21 +24,9 @@
#include <inttypes.h>
#include "bench-timing.h"
#include "json-lib.h"
+#include "bench-util.h"
-volatile unsigned int dontoptimize = 0;
-
-void
-startup (void)
-{
- /* This loop should cause CPU to switch to maximal freqency.
- This makes subsequent measurement more accurate. We need a side effect
- to prevent the loop being deleted by compiler.
- This should be enough to cause CPU to speed up and it is simpler than
- running loop for constant time. This is used when user does not have root
- access to set a constant freqency. */
- for (int k = 0; k < 10000000; k++)
- dontoptimize += 23 * dontoptimize + 2;
-}
+#include "bench-util.c"
#define TIMESPEC_AFTER(a, b) \
(((a).tv_sec == (b).tv_sec) ? \
@@ -56,7 +44,7 @@ main (int argc, char **argv)
if (argc == 2 && !strcmp (argv[1], "-d"))
detailed = true;
- startup();
+ bench_start ();
memset (&runtime, 0, sizeof (runtime));
new file mode 100644
@@ -0,0 +1,28 @@
+/* Benchmark utility functions.
+ Copyright (C) 2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+
+#ifndef START_ITER
+# define START_ITER (100000000)
+#endif
+
+/* bench_start reduces the random variations due to frequency scaling by
+ executing a small loop with many memory accesses. START_ITER controls
+ the number of iterations. */
+
+void bench_start (void);
--
1.9.1
new file mode 100644
@@ -0,0 +1,34 @@
+/* Benchmark utility functions.
+ Copyright (C) 2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+
+static volatile unsigned int dontoptimize = 0;
+
+void
+bench_start (void)
+{
+ /* This loop should cause CPU to switch to maximal freqency.
+ This makes subsequent measurement more accurate. We need a side effect
+ to prevent the loop being deleted by compiler.
+ This should be enough to cause CPU to speed up and it is simpler than
+ running loop for constant time. This is used when user does not have root
+ access to set a constant freqency. */
+
+ for (int k = 0; k < START_ITER; k++)
+ dontoptimize += 23 * dontoptimize + 2;
+}