[3/4] Add ILP32 support to aarch64
Commit Message
Here are some fixes to the floating point to integer conversion
routines for aarch64.
Steve Ellcey
sellcey@cavium.com
2017-08-03 Steve Ellcey <sellcey@cavium.com>
* sysdeps/aarch64/fpu/math_private.h (libc_feclearexcept_aarch64):
New function.
(libc_feclearexcept, libc_feclearexceptf, libc_feclearexceptl):
New defines.
* sysdeps/aarch64/fpu/s_llrint.c (OREG_SIZE): New macro.
* sysdeps/aarch64/fpu/s_llround.c (OREG_SIZE): Likewise.
* sysdeps/aarch64/fpu/s_llrintf.c (OREGS, IREGS): Remove.
(IREG_SIZE, OREG_SIZE): New macros.
* sysdeps/aarch64/fpu/s_llroundf.c: (OREGS, IREGS): Remove.
(IREG_SIZE, OREG_SIZE): New macros.
* sysdeps/aarch64/fpu/s_lrintf.c (IREGS): Remove.
(IREG_SIZE): New macro.
* sysdeps/aarch64/fpu/s_lroundf.c (IREGS): Remove.
(IREG_SIZE): New macro.
* sysdeps/aarch64/fpu/s_lrint.c (math_private.h): New include.
(IREG_SIZE, OREG_SIZE): Initialize if not already set.
(OREGS, IREGS): Set based on IREG_SIZE and OREG_SIZE.
(__CONCATX): Clear INEXACT if INVALID is set.
* sysdeps/aarch64/fpu/s_lround.c (IREG_SIZE, OREG_SIZE):
Initialize if not already set.
(OREGS, IREGS): Set based on IREG_SIZE and OREG_SIZE.
Comments
On Thu, 3 Aug 2017, Steve Ellcey wrote:
> +#if OREG_SIZE == 32
> + /* The rounding step may set FE_INEXEXACT and converting to a 32 bit
> + value may set FE_INVALID. We do not want FE_INEXACT set when
> + FE_INVALID has been set. */
> + if (libc_fetestexcept_aarch64 (FE_INVALID))
> + libc_feclearexcept_aarch64 (FE_INEXACT);
> +#endif
This sort of thing is never correct, because it would clear an "inexact"
exception that was already set on entry to the function, and functions
other than <fenv.h> specified to do so should never clear already-raised
exceptions.
(Also, typo "FE_INEXEXACT".)
On Thu, 2017-08-03 at 17:47 +0000, Joseph Myers wrote:
> On Thu, 3 Aug 2017, Steve Ellcey wrote:
>
> >
> > +#if OREG_SIZE == 32
> > + /* The rounding step may set FE_INEXEXACT and converting to a 32
> > bit
> > + value may set FE_INVALID. We do not want FE_INEXACT set when
> > + FE_INVALID has been set. */
> > + if (libc_fetestexcept_aarch64 (FE_INVALID))
> > + libc_feclearexcept_aarch64 (FE_INEXACT);
> > +#endif
> This sort of thing is never correct, because it would clear an
> "inexact"
> exception that was already set on entry to the function, and
> functions
> other than <fenv.h> specified to do so should never clear already-
> raised
> exceptions.
>
> (Also, typo "FE_INEXEXACT".)
I hadn't considered that. So maybe I could save the environment
(feholdexcept), do the calculation and see which exceptions, if any,
got raised. Then restore the original environment (fesetenv) and raise
one or the other exceptions if needed. Does that sound like a workable
solution?
Steve Ellcey
sellcey@cavium.com
On Thu, 3 Aug 2017, Steve Ellcey wrote:
> I hadn't considered that. So maybe I could save the environment
> (feholdexcept), do the calculation and see which exceptions, if any,
> got raised. Then restore the original environment (fesetenv) and raise
> one or the other exceptions if needed. Does that sound like a workable
> solution?
Yes, you could do that (see x86/x86_64 nearbyintl for example; that needs
to preserve "invalid" for signaling NaNs, avoid "inexact" from frndint,
but not clear any "inexact" that was already raised). Whether doing so is
optimal, versus using another implementation of the function for ILP32,
may depend on the AArch64 performance characteristics of saving and
restoring the environment.
@@ -134,6 +134,20 @@ libc_fetestexcept_aarch64 (int ex)
#define libc_fetestexceptl libc_fetestexcept_aarch64
static __always_inline void
+libc_feclearexcept_aarch64 (int ex)
+{
+ fpu_fpsr_t fpsr;
+
+ _FPU_GETFPSR (fpsr);
+ fpsr &= ~((fpu_fpsr_t) ex);
+ _FPU_SETFPSR (fpsr);
+}
+
+#define libc_feclearexcept libc_feclearexcept_aarch64
+#define libc_feclearexceptf libc_feclearexcept_aarch64
+#define libc_feclearexceptl libc_feclearexcept_aarch64
+
+static __always_inline void
libc_fesetenv_aarch64 (const fenv_t *envp)
{
fpu_control_t fpcr;
@@ -18,4 +18,5 @@
#define FUNC llrint
#define OTYPE long long int
+#define OREG_SIZE 64
#include <s_lrint.c>
@@ -18,6 +18,7 @@
#define FUNC llrintf
#define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
#define OTYPE long long int
+#define OREG_SIZE 64
#include <s_lrint.c>
@@ -18,4 +18,5 @@
#define FUNC llround
#define OTYPE long long int
+#define OREG_SIZE 64
#include <s_lround.c>
@@ -18,6 +18,7 @@
#define FUNC llroundf
#define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
#define OTYPE long long int
+#define OREG_SIZE 64
#include <s_lround.c>
@@ -16,7 +16,9 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#include <math_private.h>
#include <math.h>
+#include <fenv.h>
#ifndef FUNC
# define FUNC lrint
@@ -24,18 +26,37 @@
#ifndef ITYPE
# define ITYPE double
-# define IREGS "d"
+# define IREG_SIZE 64
#else
-# ifndef IREGS
-# error IREGS not defined
+# ifndef IREG_SIZE
+# error IREG_SIZE not defined
# endif
#endif
#ifndef OTYPE
# define OTYPE long int
+# ifdef __ILP32__
+# define OREG_SIZE 32
+# else
+# define OREG_SIZE 64
+# endif
+#else
+# ifndef OREG_SIZE
+# error OREG_SIZE not defined
+# endif
#endif
-#define OREGS "x"
+#if IREG_SIZE == 32
+# define IREGS "s"
+#else
+# define IREGS "d"
+#endif
+
+#if OREG_SIZE == 32
+# define OREGS "w"
+#else
+# define OREGS "x"
+#endif
#define __CONCATX(a,b) __CONCAT(a,b)
@@ -47,6 +68,13 @@ __CONCATX(__,FUNC) (ITYPE x)
asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t"
"fcvtzs" "\t%" OREGS "0, %" IREGS "1"
: "=r" (result), "=w" (temp) : "w" (x) );
+#if OREG_SIZE == 32
+ /* The rounding step may set FE_INEXEXACT and converting to a 32 bit
+ value may set FE_INVALID. We do not want FE_INEXACT set when
+ FE_INVALID has been set. */
+ if (libc_fetestexcept_aarch64 (FE_INVALID))
+ libc_feclearexcept_aarch64 (FE_INEXACT);
+#endif
return result;
}
@@ -18,5 +18,5 @@
#define FUNC lrintf
#define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
#include <s_lrint.c>
@@ -24,18 +24,37 @@
#ifndef ITYPE
# define ITYPE double
-# define IREGS "d"
+# define IREG_SIZE 64
#else
-# ifndef IREGS
-# error IREGS not defined
+# ifndef IREG_SIZE
+# error IREG_SIZE not defined
# endif
#endif
#ifndef OTYPE
# define OTYPE long int
+# ifdef __ILP32__
+# define OREG_SIZE 32
+# else
+# define OREG_SIZE 64
+# endif
+#else
+# ifndef OREG_SIZE
+# error OREG_SIZE not defined
+# endif
+#endif
+
+#if IREG_SIZE == 32
+# define IREGS "s"
+#else
+# define IREGS "d"
#endif
-#define OREGS "x"
+#if OREG_SIZE == 32
+# define OREGS "w"
+#else
+# define OREGS "x"
+#endif
#define __CONCATX(a,b) __CONCAT(a,b)
@@ -18,5 +18,5 @@
#define FUNC lroundf
#define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
#include <s_lround.c>