sparc: Force calculation that raises exception

Message ID 20240112092628.2464455-3-cederman@gaisler.com
State Superseded
Headers
Series sparc: Force calculation that raises exception |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
redhat-pt-bot/TryBot-32bit success Build for i686
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_glibc_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_glibc_check--master-arm success Testing passed

Commit Message

Daniel Cederman Jan. 12, 2024, 9:26 a.m. UTC
  Read out the FPU control word to force the calculation to complete and
raise the exception.

With this change the math/test-fenv test pass for LEON.

Signed-off-by: Daniel Cederman <cederman@gaisler.com>
---
 sysdeps/sparc/fpu/fraiseexcpt.c | 7 +++++++
 1 file changed, 7 insertions(+)
  

Comments

Adhemerval Zanella Netto Jan. 12, 2024, 4:45 p.m. UTC | #1
On 12/01/24 06:26, Daniel Cederman wrote:
> Read out the FPU control word to force the calculation to complete and
> raise the exception.
> 
> With this change the math/test-fenv test pass for LEON.

Is this to try mitigate 'GRFPU Floating-point controller: Missing 
FDIV/FSQRT Result' [1]?

If so, wouldn't be better to use '-mfix-ut699' when building against
leon3? There are potentially multiple usages of FDIV/FSQRT within 
libc/libm that might be subject to this issue.

At least recent gcc versions seems to add the required nops after fsqrt:

$ cat t.c
#include <math.h>
#include <float.h>

#define math_force_eval(x)                                              \
  ({ __typeof (x) __x = (x); __asm __volatile__ ("" : : "m" (__x)); })

int foo (void)
{
  static const struct {
    double zero, one, max, min, pi;
  } c = {
    0.0, 1.0, DBL_MAX, DBL_MIN, M_PI
  };
  double d;
  asm ("" : "=e" (d) : "0" (c.zero));
  d /= c.zero;
  __asm __volatile ("" : : "e" (d));
}
$ sparc64-linux-gnu -m32 -mcpu=leon3 -O2 -mhard-float -mfix-ut699 t.c -S -o -
[...]
foo:
        sethi   %hi(.LC0), %g1
        ldd     [%g1+%lo(.LC0)], %f10
        fmovs   %f10, %f8
        fmovs   %f11, %f9
        fdivd   %f8, %f10, %f8
        std     %f8, [%sp-8]
        nop
        nop
        jmp     %o7+8
[...]

In any case this penalize non-leon3 chips with the extra 'stx fsr,...', so
it should be used only for leon (and with a proper explanation of why it is
required).

[1] https://www.gaisler.com/doc/antn/GRLIB-TN-0013.pdf

> 
> Signed-off-by: Daniel Cederman <cederman@gaisler.com>
> ---
>  sysdeps/sparc/fpu/fraiseexcpt.c | 7 +++++++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/sysdeps/sparc/fpu/fraiseexcpt.c b/sysdeps/sparc/fpu/fraiseexcpt.c
> index 26a7720ec9..43176cf6a3 100644
> --- a/sysdeps/sparc/fpu/fraiseexcpt.c
> +++ b/sysdeps/sparc/fpu/fraiseexcpt.c
> @@ -20,6 +20,7 @@
>  #include <float.h>
>  #include <math.h>
>  #include <shlib-compat.h>
> +#include <fpu_control.h>
>  
>  int
>  __feraiseexcept (int excepts)
> @@ -30,6 +31,7 @@ __feraiseexcept (int excepts)
>      0.0, 1.0, DBL_MAX, DBL_MIN, M_PI
>    };
>    double d;
> +  fpu_control_t cw;
>  
>    /* Raise exceptions represented by EXPECTS.  But we must raise only
>       one signal at a time.  It is important the if the overflow/underflow
> @@ -43,6 +45,7 @@ __feraiseexcept (int excepts)
>        __asm ("" : "=e" (d) : "0" (c.zero));
>        d /= c.zero;
>        __asm __volatile ("" : : "e" (d));
> +      _FPU_GETCW(cw);
>      }
>  
>    /* Next: division by zero.  */
> @@ -51,6 +54,7 @@ __feraiseexcept (int excepts)
>        __asm ("" : "=e" (d) : "0" (c.one));
>        d /= c.zero;
>        __asm __volatile ("" : : "e" (d));
> +      _FPU_GETCW(cw);
>      }
>  
>    /* Next: overflow.  */
> @@ -59,6 +63,7 @@ __feraiseexcept (int excepts)
>        __asm ("" : "=e" (d) : "0" (c.max));
>        d *= d;
>        __asm __volatile ("" : : "e" (d));
> +      _FPU_GETCW(cw);
>      }
>  
>    /* Next: underflow.  */
> @@ -67,6 +72,7 @@ __feraiseexcept (int excepts)
>        __asm ("" : "=e" (d) : "0" (c.min));
>        d *= d;
>        __asm __volatile ("" : : "e" (d));
> +      _FPU_GETCW(cw);
>      }
>  
>    /* Last: inexact.  */
> @@ -75,6 +81,7 @@ __feraiseexcept (int excepts)
>        __asm ("" : "=e" (d) : "0" (c.one));
>        d /= c.pi;
>        __asm __volatile ("" : : "e" (d));
> +      _FPU_GETCW(cw);
>      }
>  
>    /* Success.  */
  
Daniel Cederman Jan. 15, 2024, 9:41 a.m. UTC | #2
On 2024-01-12 17:45, Adhemerval Zanella Netto wrote:
> 
> 
> On 12/01/24 06:26, Daniel Cederman wrote:
>> Read out the FPU control word to force the calculation to complete and
>> raise the exception.
>>
>> With this change the math/test-fenv test pass for LEON.
> 
> Is this to try mitigate 'GRFPU Floating-point controller: Missing
> FDIV/FSQRT Result' [1]?
> 

No, this change is not for any errata. The current implementation 
generates code that looks like this:

   ac:	91 a2 09 ca 	fdivd  %f8, %f10, %f8
   b0:	90 10 20 00 	clr  %o0
   b4:	81 c3 e0 08 	retl
   bc:	9c 03 a0 50 	add  %sp, 0x50, %sp

The division performed will raise an exception, but if the FPU is 
running asynchronous with the CPU the exception will not happen until 
the division is finished and another fp operation accesses the FPU. 
Reading out the control word forces the division to finish and the 
exception to trigger.

But looking at it now I think this is the more correct change:

- __asm __volatile ("" : : "e" (d));
+ __asm __volatile ("" : : "m" (d));

That would generate a store instruction with the same effect as the stfsr.

/Daniel
  
Adhemerval Zanella Netto Jan. 15, 2024, 11:47 a.m. UTC | #3
On 15/01/24 06:41, Daniel Cederman wrote:
> On 2024-01-12 17:45, Adhemerval Zanella Netto wrote:
>>
>>
>> On 12/01/24 06:26, Daniel Cederman wrote:
>>> Read out the FPU control word to force the calculation to complete and
>>> raise the exception.
>>>
>>> With this change the math/test-fenv test pass for LEON.
>>
>> Is this to try mitigate 'GRFPU Floating-point controller: Missing
>> FDIV/FSQRT Result' [1]?
>>
> 
> No, this change is not for any errata. The current implementation generates code that looks like this:
> 
>   ac:    91 a2 09 ca     fdivd  %f8, %f10, %f8
>   b0:    90 10 20 00     clr  %o0
>   b4:    81 c3 e0 08     retl
>   bc:    9c 03 a0 50     add  %sp, 0x50, %sp
> 
> The division performed will raise an exception, but if the FPU is running asynchronous with the CPU the exception will not happen until the division is finished and another fp operation accesses the FPU. Reading out the control word forces the division to finish and the exception to trigger.
> 
> But looking at it now I think this is the more correct change:
> 
> - __asm __volatile ("" : : "e" (d));
> + __asm __volatile ("" : : "m" (d));
> 
> That would generate a store instruction with the same effect as the stfsr.

We have the math_force_eval macro exactly for this [1].  It should be ok
to be used on the generic implementation (and not making it leon3 specific).

[1] https://godbolt.org/z/sPWKhhnsz
  

Patch

diff --git a/sysdeps/sparc/fpu/fraiseexcpt.c b/sysdeps/sparc/fpu/fraiseexcpt.c
index 26a7720ec9..43176cf6a3 100644
--- a/sysdeps/sparc/fpu/fraiseexcpt.c
+++ b/sysdeps/sparc/fpu/fraiseexcpt.c
@@ -20,6 +20,7 @@ 
 #include <float.h>
 #include <math.h>
 #include <shlib-compat.h>
+#include <fpu_control.h>
 
 int
 __feraiseexcept (int excepts)
@@ -30,6 +31,7 @@  __feraiseexcept (int excepts)
     0.0, 1.0, DBL_MAX, DBL_MIN, M_PI
   };
   double d;
+  fpu_control_t cw;
 
   /* Raise exceptions represented by EXPECTS.  But we must raise only
      one signal at a time.  It is important the if the overflow/underflow
@@ -43,6 +45,7 @@  __feraiseexcept (int excepts)
       __asm ("" : "=e" (d) : "0" (c.zero));
       d /= c.zero;
       __asm __volatile ("" : : "e" (d));
+      _FPU_GETCW(cw);
     }
 
   /* Next: division by zero.  */
@@ -51,6 +54,7 @@  __feraiseexcept (int excepts)
       __asm ("" : "=e" (d) : "0" (c.one));
       d /= c.zero;
       __asm __volatile ("" : : "e" (d));
+      _FPU_GETCW(cw);
     }
 
   /* Next: overflow.  */
@@ -59,6 +63,7 @@  __feraiseexcept (int excepts)
       __asm ("" : "=e" (d) : "0" (c.max));
       d *= d;
       __asm __volatile ("" : : "e" (d));
+      _FPU_GETCW(cw);
     }
 
   /* Next: underflow.  */
@@ -67,6 +72,7 @@  __feraiseexcept (int excepts)
       __asm ("" : "=e" (d) : "0" (c.min));
       d *= d;
       __asm __volatile ("" : : "e" (d));
+      _FPU_GETCW(cw);
     }
 
   /* Last: inexact.  */
@@ -75,6 +81,7 @@  __feraiseexcept (int excepts)
       __asm ("" : "=e" (d) : "0" (c.one));
       d /= c.pi;
       __asm __volatile ("" : : "e" (d));
+      _FPU_GETCW(cw);
     }
 
   /* Success.  */