[v4,07/12] i386: Move hypot implementation to C

Message ID 20211203000103.737833-8-adhemerval.zanella@linaro.org
State Superseded
Headers
Series Improve hypot |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Adhemerval Zanella Dec. 3, 2021, midnight UTC
  The generic hypotf is slight slower, mostly due the tricks the assembly
does to optimize the isinf/isnan/issignaling.  The generic hypot is way
slower, since the optimized implementation uses the i386 default
excessive precision to issue the operation directly.  A similar
implementation is provided instead of using the generic implementation:

Checked on i686-linux-gnu.
---
 sysdeps/i386/fpu/e_hypot.S  | 75 -------------------------------------
 sysdeps/i386/fpu/e_hypot.c  | 46 +++++++++++++++++++++++
 sysdeps/i386/fpu/e_hypotf.S | 64 -------------------------------
 3 files changed, 46 insertions(+), 139 deletions(-)
 delete mode 100644 sysdeps/i386/fpu/e_hypot.S
 create mode 100644 sysdeps/i386/fpu/e_hypot.c
 delete mode 100644 sysdeps/i386/fpu/e_hypotf.S
  

Comments

Wilco Dijkstra Dec. 3, 2021, 2:51 p.m. UTC | #1
Hi Adhemerval,

+  double r = math_narrow_eval (sqrtl (lx * lx + ly * ly));

Same problem here, this needs a cast to double.

Wilco
  
Adhemerval Zanella Dec. 6, 2021, 12:26 p.m. UTC | #2
On 03/12/2021 11:51, Wilco Dijkstra wrote:
> Hi Adhemerval,
> 
> +  double r = math_narrow_eval (sqrtl (lx * lx + ly * ly));
> 
> Same problem here, this needs a cast to double.
> 
> Wilco
> 

Ack.
  

Patch

diff --git a/sysdeps/i386/fpu/e_hypot.S b/sysdeps/i386/fpu/e_hypot.S
deleted file mode 100644
index f2c956b77a..0000000000
--- a/sysdeps/i386/fpu/e_hypot.S
+++ /dev/null
@@ -1,75 +0,0 @@ 
-/* Compute the hypothenuse of X and Y.
-   Copyright (C) 1998-2021 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <i386-math-asm.h>
-#include <libm-alias-finite.h>
-
-DEFINE_DBL_MIN
-
-#ifdef PIC
-# define MO(op) op##@GOTOFF(%edx)
-#else
-# define MO(op) op
-#endif
-
-	.text
-ENTRY(__ieee754_hypot)
-#ifdef  PIC
-	LOAD_PIC_REG (dx)
-#endif
-	fldl	4(%esp)		// x
-	fxam
-	fnstsw
-	fldl	12(%esp)	// y : x
-	movb	%ah, %ch
-	fxam
-	fnstsw
-	movb	%ah, %al
-	orb	%ch, %ah
-	sahf
-	jc	1f
-	fmul	%st(0)		// y * y : x
-	fxch			// x : y * y
-	fmul	%st(0)		// x * x : y * y
-	faddp			// x * x + y * y
-	fsqrt
-	DBL_NARROW_EVAL_UFLOW_NONNEG
-2:	ret
-
-	// We have to test whether any of the parameters is Inf.
-	// In this case the result is infinity.
-1:	andb	$0x45, %al
-	cmpb	$5, %al
-	je	3f		// jump if y is Inf
-	andb	$0x45, %ch
-	cmpb	$5, %ch
-	jne	4f		// jump if x is not Inf
-	fxch
-3:	fstp	%st(1)
-	fabs
-	jmp	2b
-
-4:	testb	$1, %al
-	jnz	5f		// y is NaN
-	fxch
-5:	fstp	%st(1)
-	jmp	2b
-
-END(__ieee754_hypot)
-libm_alias_finite (__ieee754_hypot, __hypot)
diff --git a/sysdeps/i386/fpu/e_hypot.c b/sysdeps/i386/fpu/e_hypot.c
new file mode 100644
index 0000000000..b7c068e734
--- /dev/null
+++ b/sysdeps/i386/fpu/e_hypot.c
@@ -0,0 +1,46 @@ 
+/* Euclidean distance function.  Double/Binary64 i386 version.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <math.h>
+#include <math_private.h>
+#include <math-underflow.h>
+#include <math-narrow-eval.h>
+#include <libm-alias-finite.h>
+#include <math_config.h>
+
+/* The i386 allows to use the default excess of precision to optimize the
+   hypot implementation, since internal multiplication and sqrt is carried
+   with 80-bit FP type.  */
+double
+__ieee754_hypot (double x, double y)
+{
+  if (!isfinite (x) || !isfinite (y))
+    {
+      if ((isinf (x) || isinf (y))
+	  && !issignaling (x) && !issignaling (y))
+	return INFINITY;
+      return x + y;
+    }
+
+  long double lx = x;
+  long double ly = y;
+  double r = math_narrow_eval (sqrtl (lx * lx + ly * ly));
+  math_check_force_underflow_nonneg (r);
+  return r;
+}
+libm_alias_finite (__ieee754_hypot, __hypot)
diff --git a/sysdeps/i386/fpu/e_hypotf.S b/sysdeps/i386/fpu/e_hypotf.S
deleted file mode 100644
index cec5d15403..0000000000
--- a/sysdeps/i386/fpu/e_hypotf.S
+++ /dev/null
@@ -1,64 +0,0 @@ 
-/* Compute the hypothenuse of X and Y.
-   Copyright (C) 1998-2021 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <i386-math-asm.h>
-#include <libm-alias-finite.h>
-
-	.text
-ENTRY(__ieee754_hypotf)
-	flds	4(%esp)		// x
-	fxam
-	fnstsw
-	flds	8(%esp)		// y : x
-	movb	%ah, %ch
-	fxam
-	fnstsw
-	movb	%ah, %al
-	orb	%ch, %ah
-	sahf
-	jc	1f
-	fmul	%st(0)		// y * y : x
-	fxch			// x : y * y
-	fmul	%st(0)		// x * x : y * y
-	faddp			// x * x + y * y
-	fsqrt
-	FLT_NARROW_EVAL
-2:	ret
-
-	// We have to test whether any of the parameters is Inf.
-	// In this case the result is infinity.
-1:	andb	$0x45, %al
-	cmpb	$5, %al
-	je	3f		// jump if y is Inf
-	andb	$0x45, %ch
-	cmpb	$5, %ch
-	jne	4f		// jump if x is not Inf
-	fxch
-3:	fstp	%st(1)
-	fabs
-	jmp	2b
-
-4:	testb	$1, %al
-	jnz	5f		// y is NaN
-	fxch
-5:	fstp	%st(1)
-	jmp	2b
-
-END(__ieee754_hypotf)
-libm_alias_finite (__ieee754_hypotf, __hypotf)