[04/23] sim/erc32: Add FPU support on x86_64 hosts.

Message ID 1424159099-5148-5-git-send-email-jiri@gaisler.se
State Superseded
Headers

Commit Message

Jiri Gaisler Feb. 17, 2015, 7:44 a.m. UTC
  * float.c (get_accex) access FPU control and status words on x64
---
 sim/erc32/float.c | 80 +++++++++++++++++++++++--------------------------------
 1 file changed, 34 insertions(+), 46 deletions(-)
  

Comments

Mike Frysinger Feb. 17, 2015, 9:05 a.m. UTC | #1
On 17 Feb 2015 08:44, Jiri Gaisler wrote:
> 	* float.c (get_accex) access FPU control and status words on x64

shouldn't you gut this file and use <fenv.h> instead for a portable method ?
http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/fenv.h.html

at the very least, you could cut over to <fpu_control.h> ...
-mike
  
Jiri Gaisler Feb. 19, 2015, 8:45 p.m. UTC | #2
On 02/17/2015 10:05 AM, Mike Frysinger wrote:
> On 17 Feb 2015 08:44, Jiri Gaisler wrote:
>> 	* float.c (get_accex) access FPU control and status words on x64
> 
> shouldn't you gut this file and use <fenv.h> instead for a portable method ?
> http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/fenv.h.html
> 
> at the very least, you could cut over to <fpu_control.h> ...
> -mike
> 

I will switch to fenv.h, but this will produce less accurate results
on Intel hardware. double on SPARC V8 has 53-bit mantissa while Intel has
both 53- and 64-bit, and 64-bit is default. To get accurate SPARC V8 FPU
operations, the Intel FPU should be switched to 53-bit, but the fenv.h
does not have an API for this. I guess we will have to live with that ...

Jiri.
  
Mike Frysinger Feb. 22, 2015, 4:40 a.m. UTC | #3
On 19 Feb 2015 21:45, Jiri Gaisler wrote:
> On 02/17/2015 10:05 AM, Mike Frysinger wrote:
> > On 17 Feb 2015 08:44, Jiri Gaisler wrote:
> >> 	* float.c (get_accex) access FPU control and status words on x64
> > 
> > shouldn't you gut this file and use <fenv.h> instead for a portable method ?
> > http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/fenv.h.html
> > 
> > at the very least, you could cut over to <fpu_control.h> ...
> 
> I will switch to fenv.h, but this will produce less accurate results
> on Intel hardware. double on SPARC V8 has 53-bit mantissa while Intel has
> both 53- and 64-bit, and 64-bit is default. To get accurate SPARC V8 FPU
> operations, the Intel FPU should be switched to 53-bit, but the fenv.h
> does not have an API for this. I guess we will have to live with that ...

if you wanted to keep a little inline asm to just that particular bit twiddle, 
that should be fine.

normally the sims are functional ... but i guess this part of the simulator is 
relying on the inaccuracies of floating point to trade for speed ?
-mike
  
Jiri Gaisler Feb. 22, 2015, 9:43 p.m. UTC | #4
On 02/22/2015 05:40 AM, Mike Frysinger wrote:
> On 19 Feb 2015 21:45, Jiri Gaisler wrote:
>> On 02/17/2015 10:05 AM, Mike Frysinger wrote:
>>> On 17 Feb 2015 08:44, Jiri Gaisler wrote:
>>>> 	* float.c (get_accex) access FPU control and status words on x64
>>>
>>> shouldn't you gut this file and use <fenv.h> instead for a portable method ?
>>> http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/fenv.h.html
>>>
>>> at the very least, you could cut over to <fpu_control.h> ...
>>
>> I will switch to fenv.h, but this will produce less accurate results
>> on Intel hardware. double on SPARC V8 has 53-bit mantissa while Intel has
>> both 53- and 64-bit, and 64-bit is default. To get accurate SPARC V8 FPU
>> operations, the Intel FPU should be switched to 53-bit, but the fenv.h
>> does not have an API for this. I guess we will have to live with that ...
> 
> if you wanted to keep a little inline asm to just that particular bit twiddle, 
> that should be fine.

Can't be bothered, I like the idea of dropping the assembly fully.

> 
> normally the sims are functional ... but i guess this part of the simulator is 
> relying on the inaccuracies of floating point to trade for speed ?
> -mike

Yes, instead of emulating the FPU (soft-float libs) we use the host FPU directly.
Works fine when the host is standard IEEE-754 compatible, but can lead to minor
discrepancies when the host has extended precision (like Intel/AMD). If somebody
really cares, he can run sis on a non-x86 box (SPARC, ARM or PowerPC).

Jiri.
  

Patch

diff --git a/sim/erc32/float.c b/sim/erc32/float.c
index 598b7cc..ce92a39 100644
--- a/sim/erc32/float.c
+++ b/sim/erc32/float.c
@@ -38,7 +38,6 @@ 
 
 extern uint32	_get_sw (void);
 extern uint32	_get_cw (void);
-static void	__setfpucw (unsigned short fpu_control);
 
 /* This host dependent routine should return the accrued exceptions */
 int
@@ -46,7 +45,7 @@  get_accex()
 {
 #ifdef sparc
     return ((_get_fsr_raw() >> 5) & 0x1F);
-#elif i386
+#elif defined(i386) || defined(__x86_64__)
     uint32 accx;
 
     accx = _get_sw() & 0x3f;
@@ -66,7 +65,7 @@  clear_accex()
 {
 #ifdef sparc
     set_fsr((_get_fsr_raw() & ~0x3e0));
-#elif i386
+#elif defined(i386) || defined(__x86_64__)
     asm("\n"
 ".text\n"
 "	fnclex\n"
@@ -84,9 +83,8 @@  uint32 fsr;
 {
 #ifdef sparc
 	_set_fsr_raw(fsr & ~0x0f800000);
-#elif i386
-     void __setfpucw(unsigned short fpu_control);
-     uint32 rawfsr;
+#elif defined(i386) || defined(__x86_64__)
+     unsigned short rawfsr;
 
      fsr >>= 30;
      switch (fsr) {
@@ -102,9 +100,8 @@  uint32 fsr;
 	  fsr = 1;
 	  break;
      }
-     rawfsr = _get_cw();
-     rawfsr |= (fsr << 10) | 0x3ff;
-     __setfpucw(rawfsr);
+     rawfsr = (fsr << 10) | 0x2FF; /* double precision, all traps masked */
+    __asm__ volatile ("fldcw %0" :: "m" (rawfsr));
 #else
 #warning no fpu trap support for this target
 #endif
@@ -175,45 +172,36 @@  uint32 fsr;
 "\n"
 "    ");
 
+#elif defined(__x86_64__)
+     asm ("\n"
+"\n"
+".text\n"
+".align 8\n"
+".globl _get_sw, __get_sw\n"
+"__get_sw:\n"
+"_get_sw:\n"
+"	  pushq %rbp\n"
+"	  movq %rsp, %rbp\n"
+"	  movl $0, %eax\n"
+"	  fnstsw %ax\n"
+"	  movq %rbp, %rsp\n"
+"	  popq %rbp\n"
+"	  ret\n"
+".align 8\n"
+".globl _get_cw, __get_cw\n"
+"__get_cw:\n"
+"_get_cw:\n"
+"	  pushq %rbp\n"
+"	  movq %rsp, %rbp\n"
+"	  subq $2, %rsp\n"
+"	  fnstcw -2(%rbp)\n"
+"	  movw -2(%rbp), %ax\n"
+"	  movq %rbp, %rsp\n"
+"	  popq %rbp\n"
+"	  ret\n"
+"	  ");
 
 #else
 #warning no fpu trap support for this target
 #endif
 
-#if i386
-/* #if defined _WIN32 || defined __GO32__ */
-/* This is so floating exception handling works on NT
-   These definitions are from the linux fpu_control.h, which
-   doesn't exist on NT.
-
-   default to:
-     - extended precision
-     - rounding to nearest
-     - exceptions on overflow, zero divide and NaN
-*/
-#define _FPU_DEFAULT  0x1372 
-#define _FPU_RESERVED 0xF0C0  /* Reserved bits in cw */
-
-static void
-__setfpucw(unsigned short fpu_control)
-{
-  volatile unsigned short cw;
-
-  /* If user supplied _fpu_control, use it ! */
-  if (!fpu_control)
-  { 
-    /* use defaults */
-    fpu_control = _FPU_DEFAULT;
-  }
-  /* Get Control Word */
-  __asm__ volatile ("fnstcw %0" : "=m" (cw) : );
-  
-  /* mask in */
-  cw &= _FPU_RESERVED;
-  cw = cw | (fpu_control & ~_FPU_RESERVED);
-
-  /* set cw */
-  __asm__ volatile ("fldcw %0" :: "m" (cw));
-}
-/* #endif */
-#endif