x86-64: Add sinf with FMA

Message ID CAMe9rOp7a9=WhZZ9X_GEZOTrX6xRcaQmTQ_mHQEof14Ehv48eA@mail.gmail.com
State New, archived
Headers

Commit Message

H.J. Lu Dec. 5, 2017, 7:03 p.m. UTC
  On Tue, Dec 5, 2017 at 9:09 AM, Adhemerval Zanella
<adhemerval.zanella@linaro.org> wrote:
>
>
> On 05/12/2017 14:56, H.J. Lu wrote:
>> On Tue, Dec 5, 2017 at 5:47 AM, Adhemerval Zanella
>> <adhemerval.zanella@linaro.org> wrote:
>>
>>> And with a simple modification to avoid int to fp conversion:
>>>
>>> ---
>>> diff --git a/sysdeps/ieee754/flt-32/s_sinf.c b/sysdeps/ieee754/flt-32/s_sinf.c
>>> index 40d3d19..a2fd3cf 100644
>>> --- a/sysdeps/ieee754/flt-32/s_sinf.c
>>> +++ b/sysdeps/ieee754/flt-32/s_sinf.c
>>> @@ -75,7 +75,7 @@ static const double invpio4_table[] = {
>>>    0x1.0e4107cp-169
>>>  };
>>>
>>> -static const int ones[] = { +1, -1 };
>>> +static const double ones[] = { 1.0, -1.0 };
>>>
>>>  /* Compute the sine value using Chebyshev polynomials where
>>>     THETA is the range reduced absolute value of the input
>>> @@ -92,7 +92,7 @@ reduced (const double theta, const unsigned long int n,
>>>    const double theta2 = theta * theta;
>>>    /* We are operating on |x|, so we need to add back the original
>>>       signbit for sinf.  */
>>> -  int sign;
>>> +  double sign;
>>>    /* Determine positive or negative primary interval.  */
>>>    sign = ones[((n >> 2) & 1) ^ signbit];
>>>    /* Are we in the primary interval of sin or cos?  */
>>> ---
>>>
>>> I get:
>>>
>>>   "sinf": {
>>>    "": {
>>>     "duration": 4.0015e+10,
>>>     "iterations": 1.4535e+09,
>>>     "max": 640.456,
>>>     "min": 11.437,
>>>     "mean": 27.5301
>>>    }
>>>
>>> Which is roughly 3% on mean and 11.5% on min. I think we can improve it
>>> even more by avoiding the int to fp conversion to get the sign right
>>> and try operate with sign as double argument.
>>
>> I tried it on Skylake with the current master.  Before:
>>
>>   "sinf": {
>>    "": {
>>     "duration": 3.4044e+10,
>>     "iterations": 1.9942e+09,
>>     "max": 141.106,
>>     "min": 7.704,
>>     "mean": 17.0715
>>    }
>>   }
>>
>> After:
>>
>>   "sinf": {
>>    "": {
>>     "duration": 3.40665e+10,
>>     "iterations": 2.03199e+09,
>>     "max": 95.994,
>>     "min": 7.704,
>>     "mean": 16.765
>>    }
>>   }
>>
>> Generic is faster than asm now:
>>
>>   "sinf": {
>>    "": {
>>     "duration": 3.40417e+10,
>>     "iterations": 1.87792e+09,
>>     "max": 138.868,
>>     "min": 8.546,
>>     "mean": 18.1273
>>    }
>>   }
>>
>> Can you submit your patch?
>
> I will do it, thanks for checking this out.

Here is the patch to add sinf with FMA using s_sinf.c for SSE2.
I posted a separate patch to remove sysdeps/x86_64/fpu/s_sinf.S:

https://sourceware.org/ml/libc-alpha/2017-12/msg00146.html

OK for master?
  

Comments

Adhemerval Zanella Netto Dec. 7, 2017, 12:20 p.m. UTC | #1
On 05/12/2017 17:03, H.J. Lu wrote:
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_sinf.c b/sysdeps/x86_64/fpu/multiarch/s_sinf.c
> new file mode 100644
> index 0000000000..f91f866cdc
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/s_sinf.c
> @@ -0,0 +1,31 @@
> +/* Multiple versions of sinf.
> +   Copyright (C) 2017 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#include <libm-alias-float.h>
> +
> +extern float __redirect_sinf (float);
> +
> +#define SYMBOL_NAME sinf
> +#include "ifunc-fma.h"
> +
> +libc_ifunc_redirected (__redirect_sinf, __sinf, IFUNC_SELECTOR ());
> +
> +libm_alias_float (__sin, sin)
> +
> +#define SINF __sinf_sse2
> +#include <sysdeps/ieee754/flt-32/s_sinf.c>
> -- 2.14.3

I would prefer if we move the default version to specific files instead
of incorporate them on ifunc resolver itself.  In this case add a
sysdeps/x86_64/fpu/multiarch/s_sinf-sse2.c file with:

#define SINF __sinf_sse2
#include <sysdeps/ieee754/flt-32/s_sinf.c>

LGTM with this change.
  

Patch

From 6ddbb7775f61b447c59bd8e9b2dc753681cfa95a Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Mon, 4 Dec 2017 09:27:50 -0800
Subject: [PATCH] x86-64: Add sinf with FMA

On Skylake, bench-sinf reports performance improvement:

            Before        After         Improvement
max        153.996       100.094           54%
min        8.546         6.852             25%
mean       18.1223       11.802            54%

	* sysdeps/x86_64/fpu/multiarch/Makefile (libm-sysdep_routines):
	Add s_sinf-fma.
	(CFLAGS-s_sinf-fma.c): New.
	* sysdeps/x86_64/fpu/multiarch/s_sinf-fma.c: New file.
	* sysdeps/x86_64/fpu/multiarch/s_sinf.c: Likewise.
---
 sysdeps/x86_64/fpu/multiarch/Makefile     |  3 ++-
 sysdeps/x86_64/fpu/multiarch/s_sinf-fma.c |  2 ++
 sysdeps/x86_64/fpu/multiarch/s_sinf.c     | 31 +++++++++++++++++++++++++++++++
 3 files changed, 35 insertions(+), 1 deletion(-)
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_sinf-fma.c
 create mode 100644 sysdeps/x86_64/fpu/multiarch/s_sinf.c

diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
index c78624b47d..a9dc329ddb 100644
--- a/sysdeps/x86_64/fpu/multiarch/Makefile
+++ b/sysdeps/x86_64/fpu/multiarch/Makefile
@@ -38,13 +38,14 @@  CFLAGS-s_sin-fma.c = -mfma -mavx2
 CFLAGS-s_tan-fma.c = -mfma -mavx2
 
 libm-sysdep_routines += e_exp2f-fma e_expf-fma e_log2f-fma e_logf-fma \
-			e_powf-fma
+			e_powf-fma s_sinf-fma
 
 CFLAGS-e_exp2f-fma.c = -mfma -mavx2
 CFLAGS-e_expf-fma.c = -mfma -mavx2
 CFLAGS-e_log2f-fma.c = -mfma -mavx2
 CFLAGS-e_logf-fma.c = -mfma -mavx2
 CFLAGS-e_powf-fma.c = -mfma -mavx2
+CFLAGS-s_sinf-fma.c = -mfma -mavx2
 
 libm-sysdep_routines += e_exp-fma4 e_log-fma4 e_pow-fma4 s_atan-fma4 \
 			e_asin-fma4 e_atan2-fma4 s_sin-fma4 s_tan-fma4 \
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sinf-fma.c b/sysdeps/x86_64/fpu/multiarch/s_sinf-fma.c
new file mode 100644
index 0000000000..34440ebf4a
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_sinf-fma.c
@@ -0,0 +1,2 @@ 
+#define SINF __sinf_fma
+#include <sysdeps/ieee754/flt-32/s_sinf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sinf.c b/sysdeps/x86_64/fpu/multiarch/s_sinf.c
new file mode 100644
index 0000000000..f91f866cdc
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_sinf.c
@@ -0,0 +1,31 @@ 
+/* Multiple versions of sinf.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <libm-alias-float.h>
+
+extern float __redirect_sinf (float);
+
+#define SYMBOL_NAME sinf
+#include "ifunc-fma.h"
+
+libc_ifunc_redirected (__redirect_sinf, __sinf, IFUNC_SELECTOR ());
+
+libm_alias_float (__sin, sin)
+
+#define SINF __sinf_sse2
+#include <sysdeps/ieee754/flt-32/s_sinf.c>
-- 
2.14.3