[3/4] Add ILP32 support to aarch64
Commit Message
On Tue, 2017-08-08 at 16:01 +0100, Szabolcs Nagy wrote:
>
> > +#if IREG_SIZE == 64 && OREG_SIZE == 32
> > + if (__builtin_fabs (x) > INT32_MAX - 2)
> i don't understand the -2 here.
I was confused and trying to handle the fact that fabs(INT32_MIN) !=
INT32_MAX. I have removed the -2 and am just comparing to INT32_MAX
and that seems to work fine. Since fabs(INT32_MIN) is greater than
INT32_MAX we may unnecessarily enter this if statement for values
between INT32_MIN and INT32_MIN+1 but that should not cause any
failures, just a slowdown.
> > + {
> > + /* Converting large values to a 32 bit in may cause the
> > frintx/fcvtza
> s/in/int/
Fixed that.
> > + invalid_p = libc_fetestexcept (FE_INVALID);
> > + inexact_p = libc_fetestexcept (FE_INEXACT);
> multiple flags can be tested/raised in a single call.
Good point. I changed this to one call and saved the flags in an
integer variable for checking later.
> > + libc_fesetenv (&env);
> > +
> > + if (invalid_p)
> > + feraiseexcept (FE_INVALID);
> > + else if (inexact_p)
> > + feraiseexcept (FE_INEXACT);
> > +
> i think correct trapping is not guaranteed by glibc,
> only correct status flags when the function returns,
> so spurious inexact is not a problem if it is already
> raised, and then i expect better code gen for the
> inexact clearing approach:
>
> if (fabs (x) > INT32_MAX && fetestexcept (FE_INEXACT) == 0)
> {
> asm (...);
> if (fetestexcept (FE_INVALID|FE_INEXACT) ==
> (FE_INVALID|FE_INEXACT))
> feclearexcept (FE_INEXACT);
> }
> else
> asm (...);
As you mentioned in your followup email, we have to worry about
FE_INVALID being set on entry too. I have attached an updated
version of my patch.
Steve Ellcey
sellcey@cavium.com
2017-08-08 Steve Ellcey <sellcey@cavium.com>
* sysdeps/aarch64/fpu/s_llrint.c (OREG_SIZE): New macro.
* sysdeps/aarch64/fpu/s_llround.c (OREG_SIZE): Likewise.
* sysdeps/aarch64/fpu/s_llrintf.c (OREGS, IREGS): Remove.
(IREG_SIZE, OREG_SIZE): New macros.
* sysdeps/aarch64/fpu/s_llroundf.c: (OREGS, IREGS): Remove.
(IREG_SIZE, OREG_SIZE): New macros.
* sysdeps/aarch64/fpu/s_lrintf.c (IREGS): Remove.
(IREG_SIZE): New macro.
* sysdeps/aarch64/fpu/s_lroundf.c (IREGS): Remove.
(IREG_SIZE): New macro.
* sysdeps/aarch64/fpu/s_lrint.c (math_private.h, fenv.h, stdint.h):
New includes.
(IREG_SIZE, OREG_SIZE): Initialize if not already set.
(OREGS, IREGS): Set based on IREG_SIZE and OREG_SIZE.
(__CONCATX): Handle exceptions correctly on large values that may
set FE_INVALID.
* sysdeps/aarch64/fpu/s_lround.c (IREG_SIZE, OREG_SIZE):
Initialize if not already set.
(OREGS, IREGS): Set based on IREG_SIZE and OREG_SIZE.
@@ -18,4 +18,5 @@
#define FUNC llrint
#define OTYPE long long int
+#define OREG_SIZE 64
#include <s_lrint.c>
@@ -18,6 +18,7 @@
#define FUNC llrintf
#define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
#define OTYPE long long int
+#define OREG_SIZE 64
#include <s_lrint.c>
@@ -18,4 +18,5 @@
#define FUNC llround
#define OTYPE long long int
+#define OREG_SIZE 64
#include <s_lround.c>
@@ -18,6 +18,7 @@
#define FUNC llroundf
#define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
#define OTYPE long long int
+#define OREG_SIZE 64
#include <s_lround.c>
@@ -16,7 +16,10 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#include <math_private.h>
#include <math.h>
+#include <fenv.h>
+#include <stdint.h>
#ifndef FUNC
# define FUNC lrint
@@ -24,18 +27,37 @@
#ifndef ITYPE
# define ITYPE double
-# define IREGS "d"
+# define IREG_SIZE 64
#else
-# ifndef IREGS
-# error IREGS not defined
+# ifndef IREG_SIZE
+# error IREG_SIZE not defined
# endif
#endif
#ifndef OTYPE
# define OTYPE long int
+# ifdef __ILP32__
+# define OREG_SIZE 32
+# else
+# define OREG_SIZE 64
+# endif
+#else
+# ifndef OREG_SIZE
+# error OREG_SIZE not defined
+# endif
+#endif
+
+#if IREG_SIZE == 32
+# define IREGS "s"
+#else
+# define IREGS "d"
#endif
-#define OREGS "x"
+#if OREG_SIZE == 32
+# define OREGS "w"
+#else
+# define OREGS "x"
+#endif
#define __CONCATX(a,b) __CONCAT(a,b)
@@ -44,6 +66,32 @@ __CONCATX(__,FUNC) (ITYPE x)
{
OTYPE result;
ITYPE temp;
+
+#if IREG_SIZE == 64 && OREG_SIZE == 32
+ if (__builtin_fabs (x) > INT32_MAX)
+ {
+ /* Converting large values to a 32 bit int may cause the frintx/fcvtza
+ sequence to set both FE_INVALID and FE_INEXACT. To avoid this
+ we save and restore the FE and only set one or the other. */
+
+ fenv_t env;
+ int feflags;
+
+ libc_feholdexcept (&env);
+ asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t"
+ "fcvtzs" "\t%" OREGS "0, %" IREGS "1"
+ : "=r" (result), "=w" (temp) : "w" (x) );
+ feflags = libc_fetestexcept (FE_INVALID | FE_INEXACT);
+ libc_fesetenv (&env);
+
+ if (feflags & FE_INVALID)
+ feraiseexcept (FE_INVALID);
+ else if (feflags & FE_INEXACT)
+ feraiseexcept (FE_INEXACT);
+
+ return result;
+ }
+#endif
asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t"
"fcvtzs" "\t%" OREGS "0, %" IREGS "1"
: "=r" (result), "=w" (temp) : "w" (x) );
@@ -18,5 +18,5 @@
#define FUNC lrintf
#define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
#include <s_lrint.c>
@@ -24,18 +24,37 @@
#ifndef ITYPE
# define ITYPE double
-# define IREGS "d"
+# define IREG_SIZE 64
#else
-# ifndef IREGS
-# error IREGS not defined
+# ifndef IREG_SIZE
+# error IREG_SIZE not defined
# endif
#endif
#ifndef OTYPE
# define OTYPE long int
+# ifdef __ILP32__
+# define OREG_SIZE 32
+# else
+# define OREG_SIZE 64
+# endif
+#else
+# ifndef OREG_SIZE
+# error OREG_SIZE not defined
+# endif
+#endif
+
+#if IREG_SIZE == 32
+# define IREGS "s"
+#else
+# define IREGS "d"
#endif
-#define OREGS "x"
+#if OREG_SIZE == 32
+# define OREGS "w"
+#else
+# define OREGS "x"
+#endif
#define __CONCATX(a,b) __CONCAT(a,b)
@@ -18,5 +18,5 @@
#define FUNC lroundf
#define ITYPE float
-#define IREGS "s"
+#define IREG_SIZE 32
#include <s_lround.c>