sparc: Force calculation that raises exception
Checks
Context |
Check |
Description |
redhat-pt-bot/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
redhat-pt-bot/TryBot-32bit |
success
|
Build for i686
|
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_build--master-arm |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-arm |
success
|
Testing passed
|
Commit Message
Read out the FPU control word to force the calculation to complete and
raise the exception.
With this change the math/test-fenv test pass for LEON.
Signed-off-by: Daniel Cederman <cederman@gaisler.com>
---
sysdeps/sparc/fpu/fraiseexcpt.c | 7 +++++++
1 file changed, 7 insertions(+)
Comments
On 12/01/24 06:26, Daniel Cederman wrote:
> Read out the FPU control word to force the calculation to complete and
> raise the exception.
>
> With this change the math/test-fenv test pass for LEON.
Is this to try mitigate 'GRFPU Floating-point controller: Missing
FDIV/FSQRT Result' [1]?
If so, wouldn't be better to use '-mfix-ut699' when building against
leon3? There are potentially multiple usages of FDIV/FSQRT within
libc/libm that might be subject to this issue.
At least recent gcc versions seems to add the required nops after fsqrt:
$ cat t.c
#include <math.h>
#include <float.h>
#define math_force_eval(x) \
({ __typeof (x) __x = (x); __asm __volatile__ ("" : : "m" (__x)); })
int foo (void)
{
static const struct {
double zero, one, max, min, pi;
} c = {
0.0, 1.0, DBL_MAX, DBL_MIN, M_PI
};
double d;
asm ("" : "=e" (d) : "0" (c.zero));
d /= c.zero;
__asm __volatile ("" : : "e" (d));
}
$ sparc64-linux-gnu -m32 -mcpu=leon3 -O2 -mhard-float -mfix-ut699 t.c -S -o -
[...]
foo:
sethi %hi(.LC0), %g1
ldd [%g1+%lo(.LC0)], %f10
fmovs %f10, %f8
fmovs %f11, %f9
fdivd %f8, %f10, %f8
std %f8, [%sp-8]
nop
nop
jmp %o7+8
[...]
In any case this penalize non-leon3 chips with the extra 'stx fsr,...', so
it should be used only for leon (and with a proper explanation of why it is
required).
[1] https://www.gaisler.com/doc/antn/GRLIB-TN-0013.pdf
>
> Signed-off-by: Daniel Cederman <cederman@gaisler.com>
> ---
> sysdeps/sparc/fpu/fraiseexcpt.c | 7 +++++++
> 1 file changed, 7 insertions(+)
>
> diff --git a/sysdeps/sparc/fpu/fraiseexcpt.c b/sysdeps/sparc/fpu/fraiseexcpt.c
> index 26a7720ec9..43176cf6a3 100644
> --- a/sysdeps/sparc/fpu/fraiseexcpt.c
> +++ b/sysdeps/sparc/fpu/fraiseexcpt.c
> @@ -20,6 +20,7 @@
> #include <float.h>
> #include <math.h>
> #include <shlib-compat.h>
> +#include <fpu_control.h>
>
> int
> __feraiseexcept (int excepts)
> @@ -30,6 +31,7 @@ __feraiseexcept (int excepts)
> 0.0, 1.0, DBL_MAX, DBL_MIN, M_PI
> };
> double d;
> + fpu_control_t cw;
>
> /* Raise exceptions represented by EXPECTS. But we must raise only
> one signal at a time. It is important the if the overflow/underflow
> @@ -43,6 +45,7 @@ __feraiseexcept (int excepts)
> __asm ("" : "=e" (d) : "0" (c.zero));
> d /= c.zero;
> __asm __volatile ("" : : "e" (d));
> + _FPU_GETCW(cw);
> }
>
> /* Next: division by zero. */
> @@ -51,6 +54,7 @@ __feraiseexcept (int excepts)
> __asm ("" : "=e" (d) : "0" (c.one));
> d /= c.zero;
> __asm __volatile ("" : : "e" (d));
> + _FPU_GETCW(cw);
> }
>
> /* Next: overflow. */
> @@ -59,6 +63,7 @@ __feraiseexcept (int excepts)
> __asm ("" : "=e" (d) : "0" (c.max));
> d *= d;
> __asm __volatile ("" : : "e" (d));
> + _FPU_GETCW(cw);
> }
>
> /* Next: underflow. */
> @@ -67,6 +72,7 @@ __feraiseexcept (int excepts)
> __asm ("" : "=e" (d) : "0" (c.min));
> d *= d;
> __asm __volatile ("" : : "e" (d));
> + _FPU_GETCW(cw);
> }
>
> /* Last: inexact. */
> @@ -75,6 +81,7 @@ __feraiseexcept (int excepts)
> __asm ("" : "=e" (d) : "0" (c.one));
> d /= c.pi;
> __asm __volatile ("" : : "e" (d));
> + _FPU_GETCW(cw);
> }
>
> /* Success. */
On 2024-01-12 17:45, Adhemerval Zanella Netto wrote:
>
>
> On 12/01/24 06:26, Daniel Cederman wrote:
>> Read out the FPU control word to force the calculation to complete and
>> raise the exception.
>>
>> With this change the math/test-fenv test pass for LEON.
>
> Is this to try mitigate 'GRFPU Floating-point controller: Missing
> FDIV/FSQRT Result' [1]?
>
No, this change is not for any errata. The current implementation
generates code that looks like this:
ac: 91 a2 09 ca fdivd %f8, %f10, %f8
b0: 90 10 20 00 clr %o0
b4: 81 c3 e0 08 retl
bc: 9c 03 a0 50 add %sp, 0x50, %sp
The division performed will raise an exception, but if the FPU is
running asynchronous with the CPU the exception will not happen until
the division is finished and another fp operation accesses the FPU.
Reading out the control word forces the division to finish and the
exception to trigger.
But looking at it now I think this is the more correct change:
- __asm __volatile ("" : : "e" (d));
+ __asm __volatile ("" : : "m" (d));
That would generate a store instruction with the same effect as the stfsr.
/Daniel
On 15/01/24 06:41, Daniel Cederman wrote:
> On 2024-01-12 17:45, Adhemerval Zanella Netto wrote:
>>
>>
>> On 12/01/24 06:26, Daniel Cederman wrote:
>>> Read out the FPU control word to force the calculation to complete and
>>> raise the exception.
>>>
>>> With this change the math/test-fenv test pass for LEON.
>>
>> Is this to try mitigate 'GRFPU Floating-point controller: Missing
>> FDIV/FSQRT Result' [1]?
>>
>
> No, this change is not for any errata. The current implementation generates code that looks like this:
>
> ac: 91 a2 09 ca fdivd %f8, %f10, %f8
> b0: 90 10 20 00 clr %o0
> b4: 81 c3 e0 08 retl
> bc: 9c 03 a0 50 add %sp, 0x50, %sp
>
> The division performed will raise an exception, but if the FPU is running asynchronous with the CPU the exception will not happen until the division is finished and another fp operation accesses the FPU. Reading out the control word forces the division to finish and the exception to trigger.
>
> But looking at it now I think this is the more correct change:
>
> - __asm __volatile ("" : : "e" (d));
> + __asm __volatile ("" : : "m" (d));
>
> That would generate a store instruction with the same effect as the stfsr.
We have the math_force_eval macro exactly for this [1]. It should be ok
to be used on the generic implementation (and not making it leon3 specific).
[1] https://godbolt.org/z/sPWKhhnsz
@@ -20,6 +20,7 @@
#include <float.h>
#include <math.h>
#include <shlib-compat.h>
+#include <fpu_control.h>
int
__feraiseexcept (int excepts)
@@ -30,6 +31,7 @@ __feraiseexcept (int excepts)
0.0, 1.0, DBL_MAX, DBL_MIN, M_PI
};
double d;
+ fpu_control_t cw;
/* Raise exceptions represented by EXPECTS. But we must raise only
one signal at a time. It is important the if the overflow/underflow
@@ -43,6 +45,7 @@ __feraiseexcept (int excepts)
__asm ("" : "=e" (d) : "0" (c.zero));
d /= c.zero;
__asm __volatile ("" : : "e" (d));
+ _FPU_GETCW(cw);
}
/* Next: division by zero. */
@@ -51,6 +54,7 @@ __feraiseexcept (int excepts)
__asm ("" : "=e" (d) : "0" (c.one));
d /= c.zero;
__asm __volatile ("" : : "e" (d));
+ _FPU_GETCW(cw);
}
/* Next: overflow. */
@@ -59,6 +63,7 @@ __feraiseexcept (int excepts)
__asm ("" : "=e" (d) : "0" (c.max));
d *= d;
__asm __volatile ("" : : "e" (d));
+ _FPU_GETCW(cw);
}
/* Next: underflow. */
@@ -67,6 +72,7 @@ __feraiseexcept (int excepts)
__asm ("" : "=e" (d) : "0" (c.min));
d *= d;
__asm __volatile ("" : : "e" (d));
+ _FPU_GETCW(cw);
}
/* Last: inexact. */
@@ -75,6 +81,7 @@ __feraiseexcept (int excepts)
__asm ("" : "=e" (d) : "0" (c.one));
d /= c.pi;
__asm __volatile ("" : : "e" (d));
+ _FPU_GETCW(cw);
}
/* Success. */