Patchwork [3/4] Add ILP32 support to aarch64

login
register
mail settings
Submitter Steve Ellcey
Date Aug. 3, 2017, 3:36 p.m.
Message ID <1501774579.3962.54.camel@cavium.com>
Download mbox | patch
Permalink /patch/21908/
State New
Headers show

Comments

Steve Ellcey - Aug. 3, 2017, 3:36 p.m.
Here are some fixes to the floating point to integer conversion
routines for aarch64.

Steve Ellcey
sellcey@cavium.com

2017-08-03  Steve Ellcey  <sellcey@cavium.com>

	* sysdeps/aarch64/fpu/math_private.h (libc_feclearexcept_aarch64):
	New function.
	(libc_feclearexcept, libc_feclearexceptf, libc_feclearexceptl):
	New defines.
	* sysdeps/aarch64/fpu/s_llrint.c (OREG_SIZE): New macro.
	* sysdeps/aarch64/fpu/s_llround.c (OREG_SIZE): Likewise.
	* sysdeps/aarch64/fpu/s_llrintf.c (OREGS, IREGS): Remove.
	(IREG_SIZE, OREG_SIZE): New macros.
	* sysdeps/aarch64/fpu/s_llroundf.c: (OREGS, IREGS): Remove.
	(IREG_SIZE, OREG_SIZE): New macros.
	* sysdeps/aarch64/fpu/s_lrintf.c (IREGS): Remove.
	(IREG_SIZE): New macro.
	* sysdeps/aarch64/fpu/s_lroundf.c (IREGS): Remove.
	(IREG_SIZE): New macro.
	* sysdeps/aarch64/fpu/s_lrint.c (math_private.h): New include.
	(IREG_SIZE, OREG_SIZE): Initialize if not already set.
	(OREGS, IREGS): Set based on IREG_SIZE and OREG_SIZE.
	(__CONCATX): Clear INEXACT if INVALID is set.
	* sysdeps/aarch64/fpu/s_lround.c (IREG_SIZE, OREG_SIZE):
	Initialize if not already set.
        (OREGS, IREGS): Set based on IREG_SIZE and OREG_SIZE.
Joseph Myers - Aug. 3, 2017, 5:47 p.m.
On Thu, 3 Aug 2017, Steve Ellcey wrote:

> +#if OREG_SIZE == 32
> +  /* The rounding step may set FE_INEXEXACT and converting to a 32 bit
> +     value may set FE_INVALID.  We do not want FE_INEXACT set when
> +     FE_INVALID has been set.  */
> +  if (libc_fetestexcept_aarch64 (FE_INVALID))
> +    libc_feclearexcept_aarch64 (FE_INEXACT);
> +#endif

This sort of thing is never correct, because it would clear an "inexact" 
exception that was already set on entry to the function, and functions 
other than <fenv.h> specified to do so should never clear already-raised 
exceptions.

(Also, typo "FE_INEXEXACT".)
Steve Ellcey - Aug. 3, 2017, 6:22 p.m.
On Thu, 2017-08-03 at 17:47 +0000, Joseph Myers wrote:
> On Thu, 3 Aug 2017, Steve Ellcey wrote:
> 
> > 
> > +#if OREG_SIZE == 32
> > +  /* The rounding step may set FE_INEXEXACT and converting to a 32
> > bit
> > +     value may set FE_INVALID.  We do not want FE_INEXACT set when
> > +     FE_INVALID has been set.  */
> > +  if (libc_fetestexcept_aarch64 (FE_INVALID))
> > +    libc_feclearexcept_aarch64 (FE_INEXACT);
> > +#endif
> This sort of thing is never correct, because it would clear an
> "inexact" 
> exception that was already set on entry to the function, and
> functions 
> other than <fenv.h> specified to do so should never clear already-
> raised 
> exceptions.
> 
> (Also, typo "FE_INEXEXACT".)

I hadn't considered that.  So maybe I could save the environment
(feholdexcept), do the calculation and see which exceptions, if any,
got raised.  Then restore the original environment (fesetenv) and raise
one or the other exceptions if needed.  Does that sound like a workable
solution?

Steve Ellcey
sellcey@cavium.com
Joseph Myers - Aug. 3, 2017, 7:48 p.m.
On Thu, 3 Aug 2017, Steve Ellcey wrote:

> I hadn't considered that.  So maybe I could save the environment
> (feholdexcept), do the calculation and see which exceptions, if any,
> got raised.  Then restore the original environment (fesetenv) and raise
> one or the other exceptions if needed.  Does that sound like a workable
> solution?

Yes, you could do that (see x86/x86_64 nearbyintl for example; that needs 
to preserve "invalid" for signaling NaNs, avoid "inexact" from frndint, 
but not clear any "inexact" that was already raised).  Whether doing so is 
optimal, versus using another implementation of the function for ILP32, 
may depend on the AArch64 performance characteristics of saving and 
restoring the environment.

Patch

diff --git a/sysdeps/aarch64/fpu/math_private.h b/sysdeps/aarch64/fpu/math_private.h
index 807111e..d5a141d5 100644
--- a/sysdeps/aarch64/fpu/math_private.h
+++ b/sysdeps/aarch64/fpu/math_private.h
@@ -134,6 +134,20 @@  libc_fetestexcept_aarch64 (int ex)
 #define libc_fetestexceptl libc_fetestexcept_aarch64
 
 static __always_inline void
+libc_feclearexcept_aarch64 (int ex)
+{
+  fpu_fpsr_t fpsr;
+
+  _FPU_GETFPSR (fpsr);
+  fpsr &= ~((fpu_fpsr_t) ex);
+  _FPU_SETFPSR (fpsr);
+}
+
+#define libc_feclearexcept  libc_feclearexcept_aarch64
+#define libc_feclearexceptf libc_feclearexcept_aarch64
+#define libc_feclearexceptl libc_feclearexcept_aarch64
+
+static __always_inline void
 libc_fesetenv_aarch64 (const fenv_t *envp)
 {
   fpu_control_t fpcr;
diff --git a/sysdeps/aarch64/fpu/s_llrint.c b/sysdeps/aarch64/fpu/s_llrint.c
index c0d0d0e..57821c0 100644
--- a/sysdeps/aarch64/fpu/s_llrint.c
+++ b/sysdeps/aarch64/fpu/s_llrint.c
@@ -18,4 +18,5 @@ 
 
 #define FUNC llrint
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_llrintf.c b/sysdeps/aarch64/fpu/s_llrintf.c
index 67724c6..98ed4f8 100644
--- a/sysdeps/aarch64/fpu/s_llrintf.c
+++ b/sysdeps/aarch64/fpu/s_llrintf.c
@@ -18,6 +18,7 @@ 
 
 #define FUNC llrintf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_llround.c b/sysdeps/aarch64/fpu/s_llround.c
index ed4b192..ef7aedf 100644
--- a/sysdeps/aarch64/fpu/s_llround.c
+++ b/sysdeps/aarch64/fpu/s_llround.c
@@ -18,4 +18,5 @@ 
 
 #define FUNC llround
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lround.c>
diff --git a/sysdeps/aarch64/fpu/s_llroundf.c b/sysdeps/aarch64/fpu/s_llroundf.c
index 360ce8b..294f0f4 100644
--- a/sysdeps/aarch64/fpu/s_llroundf.c
+++ b/sysdeps/aarch64/fpu/s_llroundf.c
@@ -18,6 +18,7 @@ 
 
 #define FUNC llroundf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #define OTYPE long long int
+#define OREG_SIZE 64
 #include <s_lround.c>
diff --git a/sysdeps/aarch64/fpu/s_lrint.c b/sysdeps/aarch64/fpu/s_lrint.c
index 8c61a03..9f90385 100644
--- a/sysdeps/aarch64/fpu/s_lrint.c
+++ b/sysdeps/aarch64/fpu/s_lrint.c
@@ -16,7 +16,9 @@ 
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
+#include <math_private.h>
 #include <math.h>
+#include <fenv.h>
 
 #ifndef FUNC
 # define FUNC lrint
@@ -24,18 +26,37 @@ 
 
 #ifndef ITYPE
 # define ITYPE double
-# define IREGS "d"
+# define IREG_SIZE 64
 #else
-# ifndef IREGS
-#  error IREGS not defined
+# ifndef IREG_SIZE
+#  error IREG_SIZE not defined
 # endif
 #endif
 
 #ifndef OTYPE
 # define OTYPE long int
+# ifdef __ILP32__
+#  define OREG_SIZE 32
+# else
+#  define OREG_SIZE 64
+# endif
+#else
+# ifndef OREG_SIZE
+#  error OREG_SIZE not defined
+# endif
 #endif
 
-#define OREGS "x"
+#if IREG_SIZE == 32
+# define IREGS "s"
+#else
+# define IREGS "d"
+#endif
+
+#if OREG_SIZE == 32
+# define OREGS "w"
+#else
+# define OREGS "x"
+#endif
 
 #define __CONCATX(a,b) __CONCAT(a,b)
 
@@ -47,6 +68,13 @@  __CONCATX(__,FUNC) (ITYPE x)
   asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t"
         "fcvtzs" "\t%" OREGS "0, %" IREGS "1"
         : "=r" (result), "=w" (temp) : "w" (x) );
+#if OREG_SIZE == 32
+  /* The rounding step may set FE_INEXEXACT and converting to a 32 bit
+     value may set FE_INVALID.  We do not want FE_INEXACT set when
+     FE_INVALID has been set.  */
+  if (libc_fetestexcept_aarch64 (FE_INVALID))
+    libc_feclearexcept_aarch64 (FE_INEXACT);
+#endif
   return result;
 }
 
diff --git a/sysdeps/aarch64/fpu/s_lrintf.c b/sysdeps/aarch64/fpu/s_lrintf.c
index a995e4b..2e73271 100644
--- a/sysdeps/aarch64/fpu/s_lrintf.c
+++ b/sysdeps/aarch64/fpu/s_lrintf.c
@@ -18,5 +18,5 @@ 
 
 #define FUNC lrintf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_lround.c b/sysdeps/aarch64/fpu/s_lround.c
index 9be9e7f..1f77d82 100644
--- a/sysdeps/aarch64/fpu/s_lround.c
+++ b/sysdeps/aarch64/fpu/s_lround.c
@@ -24,18 +24,37 @@ 
 
 #ifndef ITYPE
 # define ITYPE double
-# define IREGS "d"
+# define IREG_SIZE 64
 #else
-# ifndef IREGS
-#  error IREGS not defined
+# ifndef IREG_SIZE
+#  error IREG_SIZE not defined
 # endif
 #endif
 
 #ifndef OTYPE
 # define OTYPE long int
+# ifdef __ILP32__
+#  define OREG_SIZE 32
+# else
+#  define OREG_SIZE 64
+# endif
+#else
+# ifndef OREG_SIZE
+#  error OREG_SIZE not defined
+# endif
+#endif
+
+#if IREG_SIZE == 32
+# define IREGS "s"
+#else
+# define IREGS "d"
 #endif
 
-#define OREGS "x"
+#if OREG_SIZE == 32
+# define OREGS "w"
+#else
+# define OREGS "x"
+#endif
 
 #define __CONCATX(a,b) __CONCAT(a,b)
 
diff --git a/sysdeps/aarch64/fpu/s_lroundf.c b/sysdeps/aarch64/fpu/s_lroundf.c
index 4a066d4..b30ddb6 100644
--- a/sysdeps/aarch64/fpu/s_lroundf.c
+++ b/sysdeps/aarch64/fpu/s_lroundf.c
@@ -18,5 +18,5 @@ 
 
 #define FUNC lroundf
 #define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
 #include <s_lround.c>