[v3] improve cexp performance for imaginary inputs

Message ID 1425723465-4928-1-git-send-email-jtaylor.debian@googlemail.com
State Changes Requested, archived
Headers

Commit Message

Julian Taylor March 7, 2015, 10:17 a.m. UTC
  cexp(x) can avoid the exponential from the exp(x_r) * (cos(x_i) + i sin(x_i))
when the real part of the input is zero.
This is a common enough input to be worth optimizing, e.g. twiddle factors in
fast fourier transforms.

Even though the exp function has a fast path for the zero input case it
does still impose significant overhead in the computation.
For x = 0.5 + 0.5I the overhead is about 50%, for the range over
[-pi,pi,10000] its still 15%.

double cexp benchmark on an amd phenom II X4, with the change:
  "cexp": {
   "realandimag": {
    "duration": 3.20798e+10,
    "iterations": 1.47882e+08,
    "max": 665.21,
    "min": 215.071,
    "mean": 216.929
   },
   "imagonly": {
    "duration": 3.20139e+10,
    "iterations": 3.01673e+08,
    "max": 486.618,
    "min": 105.573,
    "mean": 106.121
   }

without the change:

  "cexp": {
   "realandimag": {
    "duration": 3.20785e+10,
    "iterations": 1.51123e+08,
    "max": 373.893,
    "min": 212.079,
    "mean": 212.268
   },
   "imagonly": {
    "duration": 3.2056e+10,
    "iterations": 2.03904e+08,
    "max": 715.579,
    "min": 155.042,
    "mean": 157.211
   }
---
 benchtests/Makefile     | 5 +++--
 benchtests/cexp-inputs  | 7 +++++++
 benchtests/cexpf-inputs | 7 +++++++
 benchtests/cexpl-inputs | 7 +++++++
 math/s_cexp.c           | 4 +++-
 math/s_cexpf.c          | 4 +++-
 math/s_cexpl.c          | 4 +++-
 7 files changed, 33 insertions(+), 5 deletions(-)
 create mode 100644 benchtests/cexp-inputs
 create mode 100644 benchtests/cexpf-inputs
 create mode 100644 benchtests/cexpl-inputs
  

Comments

Joseph Myers March 9, 2015, 3:39 p.m. UTC | #1
On Sat, 7 Mar 2015, Julian Taylor wrote:

> diff --git a/math/s_cexp.c b/math/s_cexp.c
> index 9116e2b..ddafe6f 100644
> --- a/math/s_cexp.c
> +++ b/math/s_cexp.c
> @@ -70,7 +70,9 @@ __cexp (__complex__ double x)
>  	    }
>  	  else
>  	    {
> -	      double exp_val = __ieee754_exp (__real__ x);
> +	      double exp_val = 1.;
> +	      if (__real__ x != 0.)
> +	          exp_val = __ieee754_exp (__real__ x);

The indentation seems to be off now the braces have been removed.
  

Patch

diff --git a/benchtests/Makefile b/benchtests/Makefile
index 08603a2..d6f724f 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -22,8 +22,9 @@ 
 subdir := benchtests
 
 include ../Makeconfig
-bench-math := acos acosh asin asinh atan atanh cos cosh exp exp2 ffs ffsll \
-	      log log2 modf pow rint sin sincos sinh sqrt tan tanh
+bench-math := acos acosh asin asinh atan atanh cexp cexpf cexpl cos cosh exp
+	      exp2 ffs ffsll log log2 modf pow rint sin sincos sinh sqrt
+	      tan tanh
 
 bench-pthread := pthread_once
 
diff --git a/benchtests/cexp-inputs b/benchtests/cexp-inputs
new file mode 100644
index 0000000..0674327
--- /dev/null
+++ b/benchtests/cexp-inputs
@@ -0,0 +1,7 @@ 
+## args: double
+## ret: double
+## includes: math.h,complex.h
+## name: imagonly
+I*0.5
+## name: realandimag
+0.5+I*0.5
diff --git a/benchtests/cexpf-inputs b/benchtests/cexpf-inputs
new file mode 100644
index 0000000..0361f11
--- /dev/null
+++ b/benchtests/cexpf-inputs
@@ -0,0 +1,7 @@ 
+## args: float
+## ret: float
+## includes: math.h,complex.h
+## name: imagonly
+I*0.5
+## name: realandimag
+0.5+I*0.5
diff --git a/benchtests/cexpl-inputs b/benchtests/cexpl-inputs
new file mode 100644
index 0000000..e4a0167
--- /dev/null
+++ b/benchtests/cexpl-inputs
@@ -0,0 +1,7 @@ 
+## args: long double
+## ret: long double
+## includes: math.h,complex.h
+## name: imagonly
+I*0.5
+## name: realandimag
+0.5+I*0.5
diff --git a/math/s_cexp.c b/math/s_cexp.c
index 9116e2b..ddafe6f 100644
--- a/math/s_cexp.c
+++ b/math/s_cexp.c
@@ -70,7 +70,9 @@  __cexp (__complex__ double x)
 	    }
 	  else
 	    {
-	      double exp_val = __ieee754_exp (__real__ x);
+	      double exp_val = 1.;
+	      if (__real__ x != 0.)
+	          exp_val = __ieee754_exp (__real__ x);
 	      __real__ retval = exp_val * cosix;
 	      __imag__ retval = exp_val * sinix;
 	    }
diff --git a/math/s_cexpf.c b/math/s_cexpf.c
index fac1a17..6cfc3ad 100644
--- a/math/s_cexpf.c
+++ b/math/s_cexpf.c
@@ -70,7 +70,9 @@  __cexpf (__complex__ float x)
 	    }
 	  else
 	    {
-	      float exp_val = __ieee754_expf (__real__ x);
+	      float exp_val = 1.;
+	      if (__real__ x != 0.)
+	          exp_val = __ieee754_expf (__real__ x);
 	      __real__ retval = exp_val * cosix;
 	      __imag__ retval = exp_val * sinix;
 	    }
diff --git a/math/s_cexpl.c b/math/s_cexpl.c
index 9309b1f..ff2e2af 100644
--- a/math/s_cexpl.c
+++ b/math/s_cexpl.c
@@ -70,7 +70,9 @@  __cexpl (__complex__ long double x)
 	    }
 	  else
 	    {
-	      long double exp_val = __ieee754_expl (__real__ x);
+	      long double exp_val = 1.;
+	      if (__real__ x != 0.)
+	          exp_val = __ieee754_expl (__real__ x);
 	      __real__ retval = exp_val * cosix;
 	      __imag__ retval = exp_val * sinix;
 	    }