[v3] improve cexp performance for imaginary inputs
Commit Message
cexp(x) can avoid the exponential from the exp(x_r) * (cos(x_i) + i sin(x_i))
when the real part of the input is zero.
This is a common enough input to be worth optimizing, e.g. twiddle factors in
fast fourier transforms.
Even though the exp function has a fast path for the zero input case it
does still impose significant overhead in the computation.
For x = 0.5 + 0.5I the overhead is about 50%, for the range over
[-pi,pi,10000] its still 15%.
double cexp benchmark on an amd phenom II X4, with the change:
"cexp": {
"realandimag": {
"duration": 3.20798e+10,
"iterations": 1.47882e+08,
"max": 665.21,
"min": 215.071,
"mean": 216.929
},
"imagonly": {
"duration": 3.20139e+10,
"iterations": 3.01673e+08,
"max": 486.618,
"min": 105.573,
"mean": 106.121
}
without the change:
"cexp": {
"realandimag": {
"duration": 3.20785e+10,
"iterations": 1.51123e+08,
"max": 373.893,
"min": 212.079,
"mean": 212.268
},
"imagonly": {
"duration": 3.2056e+10,
"iterations": 2.03904e+08,
"max": 715.579,
"min": 155.042,
"mean": 157.211
}
---
benchtests/Makefile | 5 +++--
benchtests/cexp-inputs | 7 +++++++
benchtests/cexpf-inputs | 7 +++++++
benchtests/cexpl-inputs | 7 +++++++
math/s_cexp.c | 4 +++-
math/s_cexpf.c | 4 +++-
math/s_cexpl.c | 4 +++-
7 files changed, 33 insertions(+), 5 deletions(-)
create mode 100644 benchtests/cexp-inputs
create mode 100644 benchtests/cexpf-inputs
create mode 100644 benchtests/cexpl-inputs
Comments
On Sat, 7 Mar 2015, Julian Taylor wrote:
> diff --git a/math/s_cexp.c b/math/s_cexp.c
> index 9116e2b..ddafe6f 100644
> --- a/math/s_cexp.c
> +++ b/math/s_cexp.c
> @@ -70,7 +70,9 @@ __cexp (__complex__ double x)
> }
> else
> {
> - double exp_val = __ieee754_exp (__real__ x);
> + double exp_val = 1.;
> + if (__real__ x != 0.)
> + exp_val = __ieee754_exp (__real__ x);
The indentation seems to be off now the braces have been removed.
@@ -22,8 +22,9 @@
subdir := benchtests
include ../Makeconfig
-bench-math := acos acosh asin asinh atan atanh cos cosh exp exp2 ffs ffsll \
- log log2 modf pow rint sin sincos sinh sqrt tan tanh
+bench-math := acos acosh asin asinh atan atanh cexp cexpf cexpl cos cosh exp
+ exp2 ffs ffsll log log2 modf pow rint sin sincos sinh sqrt
+ tan tanh
bench-pthread := pthread_once
new file mode 100644
@@ -0,0 +1,7 @@
+## args: double
+## ret: double
+## includes: math.h,complex.h
+## name: imagonly
+I*0.5
+## name: realandimag
+0.5+I*0.5
new file mode 100644
@@ -0,0 +1,7 @@
+## args: float
+## ret: float
+## includes: math.h,complex.h
+## name: imagonly
+I*0.5
+## name: realandimag
+0.5+I*0.5
new file mode 100644
@@ -0,0 +1,7 @@
+## args: long double
+## ret: long double
+## includes: math.h,complex.h
+## name: imagonly
+I*0.5
+## name: realandimag
+0.5+I*0.5
@@ -70,7 +70,9 @@ __cexp (__complex__ double x)
}
else
{
- double exp_val = __ieee754_exp (__real__ x);
+ double exp_val = 1.;
+ if (__real__ x != 0.)
+ exp_val = __ieee754_exp (__real__ x);
__real__ retval = exp_val * cosix;
__imag__ retval = exp_val * sinix;
}
@@ -70,7 +70,9 @@ __cexpf (__complex__ float x)
}
else
{
- float exp_val = __ieee754_expf (__real__ x);
+ float exp_val = 1.;
+ if (__real__ x != 0.)
+ exp_val = __ieee754_expf (__real__ x);
__real__ retval = exp_val * cosix;
__imag__ retval = exp_val * sinix;
}
@@ -70,7 +70,9 @@ __cexpl (__complex__ long double x)
}
else
{
- long double exp_val = __ieee754_expl (__real__ x);
+ long double exp_val = 1.;
+ if (__real__ x != 0.)
+ exp_val = __ieee754_expl (__real__ x);
__real__ retval = exp_val * cosix;
__imag__ retval = exp_val * sinix;
}