From patchwork Thu Oct 19 17:31:54 2017
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "H.J. Lu" <hjl.tools@gmail.com>
X-Patchwork-Id: 23705
Received: (qmail 81254 invoked by alias); 19 Oct 2017 17:32:09 -0000
Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm
Precedence: bulk
List-Id: <libc-alpha.sourceware.org>
List-Unsubscribe: <mailto:libc-alpha-unsubscribe-##L=##H@sourceware.org>
List-Subscribe: <mailto:libc-alpha-subscribe@sourceware.org>
List-Archive: <http://sourceware.org/ml/libc-alpha/>
List-Post: <mailto:libc-alpha@sourceware.org>
List-Help: <mailto:libc-alpha-help@sourceware.org>,
	<http://sourceware.org/ml/#faqs>
Sender: libc-alpha-owner@sourceware.org
Delivered-To: mailing list libc-alpha@sourceware.org
Received: (qmail 81092 invoked by uid 89); 19 Oct 2017 17:32:09 -0000
Authentication-Results: sourceware.org; auth=none
X-Virus-Found: No
X-Spam-SWARE-Status: No, score=-26.1 required=5.0 tests=BAYES_00,
	FREEMAIL_FROM, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2,
	GIT_PATCH_3, KAM_STOCKGEN,
	SPF_SOFTFAIL autolearn=ham version=3.3.2 spammy=
X-HELO: mga09.intel.com
X-ExtLoop1: 1
From: "H.J. Lu" <hjl.tools@gmail.com>
To: libc-alpha@sourceware.org
Subject: [PATCH 05/10] i386: Replace assembly versions of e_logf with
	generic e_logf.c
Date: Thu, 19 Oct 2017 10:31:54 -0700
Message-Id: <20171019173159.21402-6-hjl.tools@gmail.com>
In-Reply-To: <20171019173159.21402-1-hjl.tools@gmail.com>
References: <20171019173159.21402-1-hjl.tools@gmail.com>

This patch replaces i386 assembly versions of e_logf with generic
e_logf.c.  For workload-spec2017.wrf, on Nehalem, it improves
performance by:

                           Before            After     Improvement
reciprocal-throughput      73.3865          40.0454       83%
latency                    90.0985          54.4479       65%

On Skylake, it improves performance by:

                           Before            After     Improvement
reciprocal-throughput      75.1384          22.1452       239%
latency                    91.9441          50.7925       81%

	* sysdeps/i386/fpu/e_logf.S: Removed.
	* sysdeps/i386/fpu/e_logf_data.c: Likewise.
	* sysdeps/i386/fpu/w_logf.c: Likewise.
	* sysdeps/i386/i686/fpu/e_logf.S: Likewise.
	* sysdeps/i386/fpu/libm-test-ulps: Updated for generic e_logf.c.
	* sysdeps/i386/i686/fpu/multiarch/libm-test-ulps: Likewise.
	* sysdeps/i386/i686/fpu/multiarch/Makefile (libm-sysdep_routines):
	Add e_logf-sse2.
	(CFLAGS-e_logf-sse2.c): New.
	* sysdeps/i386/i686/fpu/multiarch/e_logf-sse2.c: New file.
	* sysdeps/i386/i686/fpu/multiarch/e_logf.c: Likewise.
---
 sysdeps/i386/fpu/e_logf.S                      | 93 --------------------------
 sysdeps/i386/fpu/e_logf_data.c                 |  1 -
 sysdeps/i386/fpu/libm-test-ulps                | 20 +++---
 sysdeps/i386/fpu/w_logf.c                      |  1 -
 sysdeps/i386/i686/fpu/e_logf.S                 | 30 ---------
 sysdeps/i386/i686/fpu/multiarch/Makefile       |  5 +-
 sysdeps/i386/i686/fpu/multiarch/e_logf-sse2.c  |  3 +
 sysdeps/i386/i686/fpu/multiarch/e_logf.c       | 40 +++++++++++
 sysdeps/i386/i686/fpu/multiarch/libm-test-ulps | 12 ++--
 9 files changed, 62 insertions(+), 143 deletions(-)
 delete mode 100644 sysdeps/i386/fpu/e_logf.S
 delete mode 100644 sysdeps/i386/fpu/e_logf_data.c
 delete mode 100644 sysdeps/i386/fpu/w_logf.c
 delete mode 100644 sysdeps/i386/i686/fpu/e_logf.S
 create mode 100644 sysdeps/i386/i686/fpu/multiarch/e_logf-sse2.c
 create mode 100644 sysdeps/i386/i686/fpu/multiarch/e_logf.c

diff --git a/sysdeps/i386/fpu/e_logf.S b/sysdeps/i386/fpu/e_logf.S
deleted file mode 100644
index de967a31f5..0000000000
--- a/sysdeps/i386/fpu/e_logf.S
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Written by J.T. Conklin <jtc@netbsd.org>.
- * Public domain.
- * Adapted for float by Ulrich Drepper <drepper@cygnus.com>.
- *
- * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>.
- */
-
-#include <machine/asm.h>
-
-	.section .rodata.cst8,"aM",@progbits,8
-
-	.p2align 3
-	.type one,@object
-one:	.double 1.0
-	ASM_SIZE_DIRECTIVE(one)
-	/* It is not important that this constant is precise.  It is only
-	   a value which is known to be on the safe side for using the
-	   fyl2xp1 instruction.  */
-	.type limit,@object
-limit:	.double 0.29
-	ASM_SIZE_DIRECTIVE(limit)
-
-
-#ifdef PIC
-# define MO(op) op##@GOTOFF(%edx)
-#else
-# define MO(op) op
-#endif
-
-	.text
-ENTRY(__ieee754_logf)
-	fldln2			// log(2)
-	flds	4(%esp)		// x : log(2)
-	fxam
-	fnstsw
-#ifdef PIC
-	LOAD_PIC_REG (dx)
-#endif
-	fld	%st		// x : x : log(2)
-	sahf
-	jc	3f		// in case x is NaN or +-Inf
-4:	fsubl	MO(one)		// x-1 : x : log(2)
-	fld	%st		// x-1 : x-1 : x : log(2)
-	fabs			// |x-1| : x-1 : x : log(2)
-	fcompl	MO(limit)	// x-1 : x : log(2)
-	fnstsw			// x-1 : x : log(2)
-	andb	$0x45, %ah
-	jz	2f
-	fxam
-	fnstsw
-	andb	$0x45, %ah
-	cmpb	$0x40, %ah
-	jne	5f
-	fabs			// log(1) is +0 in all rounding modes.
-5:	fstp	%st(1)		// x-1 : log(2)
-	fyl2xp1			// log(x)
-	ret
-
-2:	fstp	%st(0)		// x : log(2)
-	fyl2x			// log(x)
-	ret
-
-3:	jp	4b		// in case x is +-Inf
-	fstp	%st(1)
-	fstp	%st(1)
-	ret
-END (__ieee754_logf)
-
-ENTRY(__logf_finite)
-	fldln2			// log(2)
-	flds	4(%esp)		// x : log(2)
-#ifdef PIC
-	LOAD_PIC_REG (dx)
-#endif
-	fld	%st		// x : x : log(2)
-	fsubl	MO(one)		// x-1 : x : log(2)
-	fld	%st		// x-1 : x-1 : x : log(2)
-	fabs			// |x-1| : x-1 : x : log(2)
-	fcompl	MO(limit)	// x-1 : x : log(2)
-	fnstsw			// x-1 : x : log(2)
-	andb	$0x45, %ah
-	jz	2b
-	fxam
-	fnstsw
-	andb	$0x45, %ah
-	cmpb	$0x40, %ah
-	jne	6f
-	fabs			// log(1) is +0 in all rounding modes.
-6:	fstp	%st(1)		// x-1 : log(2)
-	fyl2xp1			// log(x)
-	ret
-END(__logf_finite)
diff --git a/sysdeps/i386/fpu/e_logf_data.c b/sysdeps/i386/fpu/e_logf_data.c
deleted file mode 100644
index 1cc8931700..0000000000
--- a/sysdeps/i386/fpu/e_logf_data.c
+++ /dev/null
@@ -1 +0,0 @@
-/* Not needed.  */
diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps
index 5a319f147b..698d1b3a1d 100644
--- a/sysdeps/i386/fpu/libm-test-ulps
+++ b/sysdeps/i386/fpu/libm-test-ulps
@@ -2000,17 +2000,17 @@ ldouble: 4
 
 Function: "gamma_downward":
 double: 4
-float: 4
+float: 5
 idouble: 4
-ifloat: 4
+ifloat: 5
 ildouble: 7
 ldouble: 7
 
 Function: "gamma_towardzero":
 double: 4
-float: 2
+float: 3
 idouble: 4
-ifloat: 2
+ifloat: 3
 ildouble: 7
 ldouble: 7
 
@@ -2186,20 +2186,20 @@ ldouble: 4
 
 Function: "lgamma_downward":
 double: 4
-float: 4
+float: 5
 float128: 8
 idouble: 4
-ifloat: 4
+ifloat: 5
 ifloat128: 8
 ildouble: 7
 ldouble: 7
 
 Function: "lgamma_towardzero":
 double: 4
-float: 2
+float: 3
 float128: 5
 idouble: 4
-ifloat: 2
+ifloat: 3
 ifloat128: 5
 ildouble: 7
 ldouble: 7
@@ -2641,10 +2641,10 @@ ldouble: 5
 
 Function: "y0_towardzero":
 double: 2
-float: 2
+float: 3
 float128: 3
 idouble: 2
-ifloat: 2
+ifloat: 3
 ifloat128: 3
 ildouble: 5
 ldouble: 5
diff --git a/sysdeps/i386/fpu/w_logf.c b/sysdeps/i386/fpu/w_logf.c
deleted file mode 100644
index ea48d1356e..0000000000
--- a/sysdeps/i386/fpu/w_logf.c
+++ /dev/null
@@ -1 +0,0 @@
-#include <sysdeps/../math/w_logf.c>
diff --git a/sysdeps/i386/i686/fpu/e_logf.S b/sysdeps/i386/i686/fpu/e_logf.S
deleted file mode 100644
index 6fd39d50d3..0000000000
--- a/sysdeps/i386/i686/fpu/e_logf.S
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Written by J.T. Conklin <jtc@netbsd.org>.
- * Public domain.
- * Adapted for float by Ulrich Drepper <drepper@cygnus.com>.
- *
- * Adapted for i686 instructions.
- */
-
-#include <machine/asm.h>
-
-
-	.text
-ENTRY(__ieee754_logf)
-	fldln2			// log(2)
-	flds	4(%esp)		// x : log(2)
-	fucomi	%st
-	jp	3f
-	fyl2x			// log(x)
-	ret
-
-3:	fstp	%st(1)
-	ret
-END (__ieee754_logf)
-
-ENTRY(__logf_finite)
-	fldln2			// log(2)
-	flds	4(%esp)		// x : log(2)
-	fyl2x			// log(x)
-	ret
-END(__logf_finite)
diff --git a/sysdeps/i386/i686/fpu/multiarch/Makefile b/sysdeps/i386/i686/fpu/multiarch/Makefile
index 2723f570af..ee4c3f39e7 100644
--- a/sysdeps/i386/i686/fpu/multiarch/Makefile
+++ b/sysdeps/i386/i686/fpu/multiarch/Makefile
@@ -1,7 +1,8 @@
 ifeq ($(subdir),math)
-libm-sysdep_routines += e_exp2f-sse2 e_expf-sse2 s_sinf-sse2 s_cosf-sse2 \
-                        s_sincosf-sse2
+libm-sysdep_routines += e_exp2f-sse2 e_expf-sse2 e_logf-sse2 \
+			s_sinf-sse2 s_cosf-sse2 s_sincosf-sse2
 
 CFLAGS-e_exp2f-sse2.c = -msse2 -mfpmath=sse
 CFLAGS-e_expf-sse2.c = -msse2 -mfpmath=sse
+CFLAGS-e_logf-sse2.c = -msse2 -mfpmath=sse
 endif
diff --git a/sysdeps/i386/i686/fpu/multiarch/e_logf-sse2.c b/sysdeps/i386/i686/fpu/multiarch/e_logf-sse2.c
new file mode 100644
index 0000000000..11621fc122
--- /dev/null
+++ b/sysdeps/i386/i686/fpu/multiarch/e_logf-sse2.c
@@ -0,0 +1,3 @@
+#define __logf __logf_sse2
+
+#include <sysdeps/ieee754/flt-32/e_logf.c>
diff --git a/sysdeps/i386/i686/fpu/multiarch/e_logf.c b/sysdeps/i386/i686/fpu/multiarch/e_logf.c
new file mode 100644
index 0000000000..1414d17d25
--- /dev/null
+++ b/sysdeps/i386/i686/fpu/multiarch/e_logf.c
@@ -0,0 +1,40 @@
+/* Multiple versions of logf.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+extern float __redirect_logf (float);
+
+#define SYMBOL_NAME logf
+#include "ifunc-sse2.h"
+
+libc_ifunc_redirected (__redirect_logf, __logf, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (__logf_ia32, __GI___logf, __redirect_logf)
+  __attribute__ ((visibility ("hidden")));
+
+# include <shlib-compat.h>
+versioned_symbol (libm, __logf, logf, GLIBC_2_27);
+#else
+weak_alias (__logf, logf)
+#endif
+
+strong_alias (__logf, __ieee754_logf)
+strong_alias (__logf, __logf_finite)
+
+#define __logf __logf_ia32
+#include <sysdeps/ieee754/flt-32/e_logf.c>
diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
index 2d056a9260..c86294b778 100644
--- a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
+++ b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
@@ -2000,9 +2000,9 @@ ldouble: 4
 
 Function: "gamma_downward":
 double: 4
-float: 4
+float: 5
 idouble: 4
-ifloat: 4
+ifloat: 5
 ildouble: 7
 ldouble: 7
 
@@ -2186,10 +2186,10 @@ ldouble: 4
 
 Function: "lgamma_downward":
 double: 4
-float: 4
+float: 5
 float128: 8
 idouble: 4
-ifloat: 4
+ifloat: 5
 ifloat128: 8
 ildouble: 7
 ldouble: 7
@@ -2625,10 +2625,10 @@ ldouble: 5
 
 Function: "y0_towardzero":
 double: 2
-float: 2
+float: 3
 float128: 3
 idouble: 2
-ifloat: 2
+ifloat: 3
 ifloat128: 3
 ildouble: 5
 ldouble: 5