Remove alpha specific fmax, fmin to fix sNaN handling [BZ #20947]

Message ID 20171231163825.9768-1-aurelien@aurel32.net
State New, archived
Headers

Commit Message

Aurelien Jarno Dec. 31, 2017, 4:38 p.m. UTC
  Various fmax and fmin function implementations mishandle sNaN
arguments:

(a) When both arguments are NaNs, the return value should be a qNaN,
but sometimes it is an sNaN if at least one argument is an sNaN.

(b) Under TS 18661-1 semantics, if either argument is an sNaN then the
result should be a qNaN (whereas if one argument is a qNaN and the
other is not a NaN, the result should be the non-NaN argument).
Various implementations treat sNaNs like qNaNs here.

One way to fix that is to detect the sNaN and add a special case. That
said there is no FPU instruction to do that, so it requires transfering
the FP value to an integer register and testing bits. This becomes quite
complicated so it's probably better to just use the generic versions of
these functions which just do that through issignaling.

Changelog:
	[BZ #20947]
	* sysdeps/alpha/fpu/s_fmax.S: Remove file.
	* sysdeps/alpha/fpu/s_fmaxf.S: Likewise.
	* sysdeps/alpha/fpu/s_fmin.S: Likewise.
	* sysdeps/alpha/fpu/s_fminf.S: Likewise.
---
 ChangeLog                   |  8 +++++++
 sysdeps/alpha/fpu/s_fmax.S  | 52 ---------------------------------------------
 sysdeps/alpha/fpu/s_fmaxf.S |  1 -
 sysdeps/alpha/fpu/s_fmin.S  | 52 ---------------------------------------------
 sysdeps/alpha/fpu/s_fminf.S |  1 -
 5 files changed, 8 insertions(+), 106 deletions(-)
 delete mode 100644 sysdeps/alpha/fpu/s_fmax.S
 delete mode 100644 sysdeps/alpha/fpu/s_fmaxf.S
 delete mode 100644 sysdeps/alpha/fpu/s_fmin.S
 delete mode 100644 sysdeps/alpha/fpu/s_fminf.S
  

Comments

Adhemerval Zanella Netto Dec. 31, 2017, 7:37 p.m. UTC | #1
On 31/12/2017 14:38, Aurelien Jarno wrote:
> Various fmax and fmin function implementations mishandle sNaN
> arguments:
> 
> (a) When both arguments are NaNs, the return value should be a qNaN,
> but sometimes it is an sNaN if at least one argument is an sNaN.
> 
> (b) Under TS 18661-1 semantics, if either argument is an sNaN then the
> result should be a qNaN (whereas if one argument is a qNaN and the
> other is not a NaN, the result should be the non-NaN argument).
> Various implementations treat sNaNs like qNaNs here.
> 
> One way to fix that is to detect the sNaN and add a special case. That
> said there is no FPU instruction to do that, so it requires transfering
> the FP value to an integer register and testing bits. This becomes quite
> complicated so it's probably better to just use the generic versions of
> these functions which just do that through issignaling.
> 
> Changelog:
> 	[BZ #20947]
> 	* sysdeps/alpha/fpu/s_fmax.S: Remove file.
> 	* sysdeps/alpha/fpu/s_fmaxf.S: Likewise.
> 	* sysdeps/alpha/fpu/s_fmin.S: Likewise.
> 	* sysdeps/alpha/fpu/s_fminf.S: Likewise.

LGTM. I think other alpha math functions suffers from similar issue (ceil and
floor at lest).

Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>

> ---
>  ChangeLog                   |  8 +++++++
>  sysdeps/alpha/fpu/s_fmax.S  | 52 ---------------------------------------------
>  sysdeps/alpha/fpu/s_fmaxf.S |  1 -
>  sysdeps/alpha/fpu/s_fmin.S  | 52 ---------------------------------------------
>  sysdeps/alpha/fpu/s_fminf.S |  1 -
>  5 files changed, 8 insertions(+), 106 deletions(-)
>  delete mode 100644 sysdeps/alpha/fpu/s_fmax.S
>  delete mode 100644 sysdeps/alpha/fpu/s_fmaxf.S
>  delete mode 100644 sysdeps/alpha/fpu/s_fmin.S
>  delete mode 100644 sysdeps/alpha/fpu/s_fminf.S
> 
> diff --git a/ChangeLog b/ChangeLog
> index cd6fc15767..3f6002a175 100644
> --- a/ChangeLog
> +++ b/ChangeLog
> @@ -1,3 +1,11 @@
> +2017-12-31  Aurelien Jarno  <aurelien@aurel32.net>
> +
> +	[BZ #20947]
> +	* sysdeps/alpha/fpu/s_fmax.S: Remove file.
> +	* sysdeps/alpha/fpu/s_fmaxf.S: Likewise.
> +	* sysdeps/alpha/fpu/s_fmin.S: Likewise.
> +	* sysdeps/alpha/fpu/s_fminf.S: Likewise.
> +
>  2017-12-30  Aurelien Jarno  <aurelien@aurel32.net>
>  	    Dmitry V. Levin  <ldv@altlinux.org>
>  
> diff --git a/sysdeps/alpha/fpu/s_fmax.S b/sysdeps/alpha/fpu/s_fmax.S
> deleted file mode 100644
> index 5da9e0df11..0000000000
> --- a/sysdeps/alpha/fpu/s_fmax.S
> +++ /dev/null
> @@ -1,52 +0,0 @@
> -/* Copyright (C) 2007-2017 Free Software Foundation, Inc.
> -   This file is part of the GNU C Library.
> -   Contributed by Richard Henderson.
> -
> -   The GNU C Library is free software; you can redistribute it and/or
> -   modify it under the terms of the GNU Lesser General Public
> -   License as published by the Free Software Foundation; either
> -   version 2.1 of the License, or (at your option) any later version.
> -
> -   The GNU C Library is distributed in the hope that it will be useful,
> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> -   Lesser General Public License for more details.
> -
> -   You should have received a copy of the GNU Lesser General Public
> -   License along with the GNU C Library.  If not, see
> -   <http://www.gnu.org/licenses/>.  */
> -
> -#include <sysdep.h>
> -#include <math_ldbl_opt.h>
> -#include <libm-alias-float.h>
> -#include <libm-alias-double.h>
> -
> -        .set noat
> -	.set noreorder
> -
> -	.text
> -ENTRY (__fmax)
> -	.prologue 0
> -
> -	cmptun/su	$f16, $f16, $f10
> -	cmptun/su	$f17, $f17, $f11
> -	fmov		$f17, $f0
> -	unop
> -
> -	trapb
> -	fbne		$f10, $ret
> -	fmov		$f16, $f0
> -	fbne		$f11, $ret
> -
> -	cmptlt/su	$f16, $f17, $f11
> -	trapb
> -	fcmovne		$f11, $f17, $f0
> -$ret:	ret
> -
> -END (__fmax)
> -
> -/* Given the in-register format of single-precision, this works there too.  */
> -strong_alias (__fmax, __fmaxf)
> -libm_alias_float (__fmax, fmax)
> -
> -libm_alias_double (__fmax, fmax)
> diff --git a/sysdeps/alpha/fpu/s_fmaxf.S b/sysdeps/alpha/fpu/s_fmaxf.S
> deleted file mode 100644
> index 3c2d62bb81..0000000000
> --- a/sysdeps/alpha/fpu/s_fmaxf.S
> +++ /dev/null
> @@ -1 +0,0 @@
> -/* __fmaxf is in s_fmax.c  */
> diff --git a/sysdeps/alpha/fpu/s_fmin.S b/sysdeps/alpha/fpu/s_fmin.S
> deleted file mode 100644
> index d752223151..0000000000
> --- a/sysdeps/alpha/fpu/s_fmin.S
> +++ /dev/null
> @@ -1,52 +0,0 @@
> -/* Copyright (C) 2007-2017 Free Software Foundation, Inc.
> -   This file is part of the GNU C Library.
> -   Contributed by Richard Henderson.
> -
> -   The GNU C Library is free software; you can redistribute it and/or
> -   modify it under the terms of the GNU Lesser General Public
> -   License as published by the Free Software Foundation; either
> -   version 2.1 of the License, or (at your option) any later version.
> -
> -   The GNU C Library is distributed in the hope that it will be useful,
> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> -   Lesser General Public License for more details.
> -
> -   You should have received a copy of the GNU Lesser General Public
> -   License along with the GNU C Library.  If not, see
> -   <http://www.gnu.org/licenses/>.  */
> -
> -#include <sysdep.h>
> -#include <math_ldbl_opt.h>
> -#include <libm-alias-float.h>
> -#include <libm-alias-double.h>
> -
> -        .set noat
> -	.set noreorder
> -
> -	.text
> -ENTRY (__fmin)
> -	.prologue 0
> -
> -	cmptun/su	$f16, $f16, $f10
> -	cmptun/su	$f17, $f17, $f11
> -	fmov		$f17, $f0
> -	unop
> -
> -	trapb
> -	fbne		$f10, $ret
> -	fmov		$f16, $f0
> -	fbne		$f11, $ret
> -
> -	cmptlt/su	$f17, $f16, $f11
> -	trapb
> -	fcmovne		$f11, $f17, $f0
> -$ret:	ret
> -
> -END (__fmin)
> -
> -/* Given the in-register format of single-precision, this works there too.  */
> -strong_alias (__fmin, __fminf)
> -libm_alias_float (__fmin, fmin)
> -
> -libm_alias_double (__fmin, fmin)
> diff --git a/sysdeps/alpha/fpu/s_fminf.S b/sysdeps/alpha/fpu/s_fminf.S
> deleted file mode 100644
> index 10ab7fe53c..0000000000
> --- a/sysdeps/alpha/fpu/s_fminf.S
> +++ /dev/null
> @@ -1 +0,0 @@
> -/* __fminf is in s_fmin.c  */
>
  
Joseph Myers Jan. 1, 2018, 1:39 a.m. UTC | #2
Since bug 20947 was marked as fixed in 2.25, you need to file a new bug 
for the alpha-specific case, so that the generated list of fixed bugs in 
NEWS can accurately reflect the alpha-specific fix, and use that bug 
number accordingly in the ChangeLog entry.
  
Aurelien Jarno Jan. 1, 2018, 3 p.m. UTC | #3
On 2017-12-31 17:37, Adhemerval Zanella wrote:
> 
> 
> On 31/12/2017 14:38, Aurelien Jarno wrote:
> > Various fmax and fmin function implementations mishandle sNaN
> > arguments:
> > 
> > (a) When both arguments are NaNs, the return value should be a qNaN,
> > but sometimes it is an sNaN if at least one argument is an sNaN.
> > 
> > (b) Under TS 18661-1 semantics, if either argument is an sNaN then the
> > result should be a qNaN (whereas if one argument is a qNaN and the
> > other is not a NaN, the result should be the non-NaN argument).
> > Various implementations treat sNaNs like qNaNs here.
> > 
> > One way to fix that is to detect the sNaN and add a special case. That
> > said there is no FPU instruction to do that, so it requires transfering
> > the FP value to an integer register and testing bits. This becomes quite
> > complicated so it's probably better to just use the generic versions of
> > these functions which just do that through issignaling.
> > 
> > Changelog:
> > 	[BZ #20947]
> > 	* sysdeps/alpha/fpu/s_fmax.S: Remove file.
> > 	* sysdeps/alpha/fpu/s_fmaxf.S: Likewise.
> > 	* sysdeps/alpha/fpu/s_fmin.S: Likewise.
> > 	* sysdeps/alpha/fpu/s_fminf.S: Likewise.
> 
> LGTM. I think other alpha math functions suffers from similar issue (ceil and
> floor at lest).

Thanks for the review. ceil and floor have already been fixed in commits
062e53c195 and 65cc568cf5. What is left are many issues with the
exceptions, mostly "Inexact" ones, but they are more difficult to fix.
  
Aurelien Jarno Jan. 1, 2018, 3 p.m. UTC | #4
On 2018-01-01 01:39, Joseph Myers wrote:
> Since bug 20947 was marked as fixed in 2.25, you need to file a new bug 
> for the alpha-specific case, so that the generated list of fixed bugs in 
> NEWS can accurately reflect the alpha-specific fix, and use that bug 
> number accordingly in the ChangeLog entry.
> 

Ok, I'll open a new bug.
  
Joseph Myers Jan. 1, 2018, 3:06 p.m. UTC | #5
On Mon, 1 Jan 2018, Aurelien Jarno wrote:

> Thanks for the review. ceil and floor have already been fixed in commits
> 062e53c195 and 65cc568cf5. What is left are many issues with the
> exceptions, mostly "Inexact" ones, but they are more difficult to fix.

If you have missing "inexact" for functions that require it to be raised, 
building those particular functions with -mieee-with-inexact (which some 
functions and tests already use) would be appropriate.
  
Adhemerval Zanella Netto Jan. 1, 2018, 7:32 p.m. UTC | #6
On 01/01/2018 13:06, Joseph Myers wrote:
> On Mon, 1 Jan 2018, Aurelien Jarno wrote:
> 
>> Thanks for the review. ceil and floor have already been fixed in commits
>> 062e53c195 and 65cc568cf5. What is left are many issues with the
>> exceptions, mostly "Inexact" ones, but they are more difficult to fix.
> 
> If you have missing "inexact" for functions that require it to be raised, 
> building those particular functions with -mieee-with-inexact (which some 
> functions and tests already use) would be appropriate.
> 

Indeed I was referring to Inexact exceptions being generated.  And
unfortunately adding -mieee-with-inexact on s_ceil{f} does not seems
to fix the issues on the environment I have access (gcc version 6.3.0,
Alpha EV68CB). Also, the same compiler flag does not help on fma{f}.
  
Richard Henderson Jan. 1, 2018, 9:56 p.m. UTC | #7
On 12/31/2017 08:38 AM, Aurelien Jarno wrote:
> Various fmax and fmin function implementations mishandle sNaN
> arguments:
> 
> (a) When both arguments are NaNs, the return value should be a qNaN,
> but sometimes it is an sNaN if at least one argument is an sNaN.
> 
> (b) Under TS 18661-1 semantics, if either argument is an sNaN then the
> result should be a qNaN (whereas if one argument is a qNaN and the
> other is not a NaN, the result should be the non-NaN argument).
> Various implementations treat sNaNs like qNaNs here.
> 
> One way to fix that is to detect the sNaN and add a special case. That
> said there is no FPU instruction to do that, so it requires transfering
> the FP value to an integer register and testing bits. This becomes quite
> complicated so it's probably better to just use the generic versions of
> these functions which just do that through issignaling.
> 
> Changelog:
> 	[BZ #20947]
> 	* sysdeps/alpha/fpu/s_fmax.S: Remove file.
> 	* sysdeps/alpha/fpu/s_fmaxf.S: Likewise.
> 	* sysdeps/alpha/fpu/s_fmin.S: Likewise.
> 	* sysdeps/alpha/fpu/s_fminf.S: Likewise.
> ---

LGTM.


r~
  
Joseph Myers Jan. 1, 2018, 9:59 p.m. UTC | #8
On Mon, 1 Jan 2018, Adhemerval Zanella wrote:

> On 01/01/2018 13:06, Joseph Myers wrote:
> > On Mon, 1 Jan 2018, Aurelien Jarno wrote:
> > 
> >> Thanks for the review. ceil and floor have already been fixed in commits
> >> 062e53c195 and 65cc568cf5. What is left are many issues with the
> >> exceptions, mostly "Inexact" ones, but they are more difficult to fix.
> > 
> > If you have missing "inexact" for functions that require it to be raised, 
> > building those particular functions with -mieee-with-inexact (which some 
> > functions and tests already use) would be appropriate.
> > 
> 
> Indeed I was referring to Inexact exceptions being generated.  And
> unfortunately adding -mieee-with-inexact on s_ceil{f} does not seems
> to fix the issues on the environment I have access (gcc version 6.3.0,
> Alpha EV68CB). Also, the same compiler flag does not help on fma{f}.

In the case of ceil, inexact should never be generated.  Since the alpha 
ceil implementations work entirely with asm which does not use /i to 
enable inexact exceptions, I'm not sure why they should generate such 
exceptions spuriously.  What failures are you seeing exactly - every case 
of noninteger arguments to ceil / ceilf, or only some such cases, or even 
cases of integer arguments?

Possibly some cases of the instructions used in ceil / ceilf are trapping 
to the kernel?  It looks like the alpha floating-point emulation in the 
kernel (arch/alpha/math-emu/math.c) does not decode the TRP field at all, 
and so would wrongly set inexact even when the instruction semantics 
should not set it.  If so, the kernel bug would need to be fixed for code 
relying on inexact not being set to work.

That however does not explain issues for fma / fmaf.  What do you see 
there - spurious inexact, missing inexact, wrong results?  The use of 
-mieee-with-inexact ought to ensure instructions are generated that set 
"inexact" appropriately, and unless it's set appropriately, wrong results 
can occur because the round-to-odd implementation relies on correct 
setting of inexact.  fmaf in particular is very simple, so as long as the 
right instructions are used and nothing gets reordered past the libc_fe* 
calls, not much should be able to go wrong.
  
Adhemerval Zanella Netto Jan. 2, 2018, 1:34 a.m. UTC | #9
On 01/01/2018 19:59, Joseph Myers wrote:
> On Mon, 1 Jan 2018, Adhemerval Zanella wrote:
> 
>> On 01/01/2018 13:06, Joseph Myers wrote:
>>> On Mon, 1 Jan 2018, Aurelien Jarno wrote:
>>>
>>>> Thanks for the review. ceil and floor have already been fixed in commits
>>>> 062e53c195 and 65cc568cf5. What is left are many issues with the
>>>> exceptions, mostly "Inexact" ones, but they are more difficult to fix.
>>>
>>> If you have missing "inexact" for functions that require it to be raised, 
>>> building those particular functions with -mieee-with-inexact (which some 
>>> functions and tests already use) would be appropriate.
>>>
>>
>> Indeed I was referring to Inexact exceptions being generated.  And
>> unfortunately adding -mieee-with-inexact on s_ceil{f} does not seems
>> to fix the issues on the environment I have access (gcc version 6.3.0,
>> Alpha EV68CB). Also, the same compiler flag does not help on fma{f}.
> 
> In the case of ceil, inexact should never be generated.  Since the alpha 
> ceil implementations work entirely with asm which does not use /i to 
> enable inexact exceptions, I'm not sure why they should generate such 
> exceptions spuriously.  What failures are you seeing exactly - every case 
> of noninteger arguments to ceil / ceilf, or only some such cases, or even 
> cases of integer arguments?

The ceil/ceilf issues are in attachments (ran with s_ceil{f} built with
-mieee-with-inexact).

> 
> Possibly some cases of the instructions used in ceil / ceilf are trapping 
> to the kernel?  It looks like the alpha floating-point emulation in the 
> kernel (arch/alpha/math-emu/math.c) does not decode the TRP field at all, 
> and so would wrongly set inexact even when the instruction semantics 
> should not set it.  If so, the kernel bug would need to be fixed for code 
> relying on inexact not being set to work.

I am not sure if these are related to kernel emulation, although from kernel
config the math is indeed add as a builtin object.  I am trying to check 
if it is the case.

> 
> That however does not explain issues for fma / fmaf.  What do you see 
> there - spurious inexact, missing inexact, wrong results?  The use of 
> -mieee-with-inexact ought to ensure instructions are generated that set 
> "inexact" appropriately, and unless it's set appropriately, wrong results 
> can occur because the round-to-odd implementation relies on correct 
> setting of inexact.  fmaf in particular is very simple, so as long as the 
> right instructions are used and nothing gets reordered past the libc_fe* 
> calls, not much should be able to go wrong.

The issues I am seeing on alpha for fma/fmaf are also in attachments.
testing double (without inline functions)
Failure: ceil (lit_pi): Exception "Inexact" set
Failure: ceil (-lit_pi): Exception "Inexact" set
Failure: ceil (min_subnorm_value): Exception "Inexact" set
Failure: ceil (min_value): Exception "Inexact" set
Failure: ceil (0.1): Exception "Inexact" set
Failure: ceil (0.25): Exception "Inexact" set
Failure: ceil (0.625): Exception "Inexact" set
Failure: ceil (-min_subnorm_value): Exception "Inexact" set
Failure: ceil (-min_value): Exception "Inexact" set
Failure: ceil (-0.1): Exception "Inexact" set
Failure: ceil (-0.25): Exception "Inexact" set
Failure: ceil (-0.625): Exception "Inexact" set
Failure: ceil_downward (lit_pi): Exception "Inexact" set
Failure: ceil_downward (-lit_pi): Exception "Inexact" set
Failure: ceil_downward (min_subnorm_value): Exception "Inexact" set
Failure: ceil_downward (min_value): Exception "Inexact" set
Failure: ceil_downward (0.1): Exception "Inexact" set
Failure: ceil_downward (0.25): Exception "Inexact" set
Failure: ceil_downward (0.625): Exception "Inexact" set
Failure: ceil_downward (-min_subnorm_value): Exception "Inexact" set
Failure: ceil_downward (-min_value): Exception "Inexact" set
Failure: ceil_downward (-0.1): Exception "Inexact" set
Failure: ceil_downward (-0.25): Exception "Inexact" set
Failure: ceil_downward (-0.625): Exception "Inexact" set
Failure: ceil_towardzero (lit_pi): Exception "Inexact" set
Failure: ceil_towardzero (-lit_pi): Exception "Inexact" set
Failure: ceil_towardzero (min_subnorm_value): Exception "Inexact" set
Failure: ceil_towardzero (min_value): Exception "Inexact" set
Failure: ceil_towardzero (0.1): Exception "Inexact" set
Failure: ceil_towardzero (0.25): Exception "Inexact" set
Failure: ceil_towardzero (0.625): Exception "Inexact" set
Failure: ceil_towardzero (-min_subnorm_value): Exception "Inexact" set
Failure: ceil_towardzero (-min_value): Exception "Inexact" set
Failure: ceil_towardzero (-0.1): Exception "Inexact" set
Failure: ceil_towardzero (-0.25): Exception "Inexact" set
Failure: ceil_towardzero (-0.625): Exception "Inexact" set
Failure: ceil_upward (lit_pi): Exception "Inexact" set
Failure: ceil_upward (-lit_pi): Exception "Inexact" set
Failure: ceil_upward (min_subnorm_value): Exception "Inexact" set
Failure: ceil_upward (min_value): Exception "Inexact" set
Failure: ceil_upward (0.1): Exception "Inexact" set
Failure: ceil_upward (0.25): Exception "Inexact" set
Failure: ceil_upward (0.625): Exception "Inexact" set
Failure: ceil_upward (-min_subnorm_value): Exception "Inexact" set
Failure: ceil_upward (-min_value): Exception "Inexact" set
Failure: ceil_upward (-0.1): Exception "Inexact" set
Failure: ceil_upward (-0.25): Exception "Inexact" set
Failure: ceil_upward (-0.625): Exception "Inexact" set

Test suite completed:
  228 test cases plus 224 tests for exception flags and
    224 tests for errno executed.
  48 errors occurred.
testing float (without inline functions)
Failure: ceil (lit_pi): Exception "Inexact" set
Failure: ceil (-lit_pi): Exception "Inexact" set
Failure: ceil (min_subnorm_value): Exception "Inexact" set
Failure: ceil (min_value): Exception "Inexact" set
Failure: ceil (0.1): Exception "Inexact" set
Failure: ceil (0.25): Exception "Inexact" set
Failure: ceil (0.625): Exception "Inexact" set
Failure: ceil (-min_subnorm_value): Exception "Inexact" set
Failure: ceil (-min_value): Exception "Inexact" set
Failure: ceil (-0.1): Exception "Inexact" set
Failure: ceil (-0.25): Exception "Inexact" set
Failure: ceil (-0.625): Exception "Inexact" set
Failure: ceil_downward (lit_pi): Exception "Inexact" set
Failure: ceil_downward (-lit_pi): Exception "Inexact" set
Failure: ceil_downward (min_subnorm_value): Exception "Inexact" set
Failure: ceil_downward (min_value): Exception "Inexact" set
Failure: ceil_downward (0.1): Exception "Inexact" set
Failure: ceil_downward (0.25): Exception "Inexact" set
Failure: ceil_downward (0.625): Exception "Inexact" set
Failure: ceil_downward (-min_subnorm_value): Exception "Inexact" set
Failure: ceil_downward (-min_value): Exception "Inexact" set
Failure: ceil_downward (-0.1): Exception "Inexact" set
Failure: ceil_downward (-0.25): Exception "Inexact" set
Failure: ceil_downward (-0.625): Exception "Inexact" set
Failure: ceil_towardzero (lit_pi): Exception "Inexact" set
Failure: ceil_towardzero (-lit_pi): Exception "Inexact" set
Failure: ceil_towardzero (min_subnorm_value): Exception "Inexact" set
Failure: ceil_towardzero (min_value): Exception "Inexact" set
Failure: ceil_towardzero (0.1): Exception "Inexact" set
Failure: ceil_towardzero (0.25): Exception "Inexact" set
Failure: ceil_towardzero (0.625): Exception "Inexact" set
Failure: ceil_towardzero (-min_subnorm_value): Exception "Inexact" set
Failure: ceil_towardzero (-min_value): Exception "Inexact" set
Failure: ceil_towardzero (-0.1): Exception "Inexact" set
Failure: ceil_towardzero (-0.25): Exception "Inexact" set
Failure: ceil_towardzero (-0.625): Exception "Inexact" set
Failure: ceil_upward (lit_pi): Exception "Inexact" set
Failure: ceil_upward (-lit_pi): Exception "Inexact" set
Failure: ceil_upward (min_subnorm_value): Exception "Inexact" set
Failure: ceil_upward (min_value): Exception "Inexact" set
Failure: ceil_upward (0.1): Exception "Inexact" set
Failure: ceil_upward (0.25): Exception "Inexact" set
Failure: ceil_upward (0.625): Exception "Inexact" set
Failure: ceil_upward (-min_subnorm_value): Exception "Inexact" set
Failure: ceil_upward (-min_value): Exception "Inexact" set
Failure: ceil_upward (-0.1): Exception "Inexact" set
Failure: ceil_upward (-0.25): Exception "Inexact" set
Failure: ceil_upward (-0.625): Exception "Inexact" set

Test suite completed:
  228 test cases plus 224 tests for exception flags and
    224 tests for errno executed.
  48 errors occurred.
testing double (without inline functions)
Failure: fma (-0x4p-1076, 0x8p-4, -0x3.ffffffffffffcp-1024): Exception "Underflow" not set
Failure: fma (-0x7.ffffffffffffp-1024, 0x8.0000000000008p-4, -0x4p-1076): Exception "Underflow" set
Failure: fma (0x1.deadbeef2feedp+900, 0x3.7ab6fbbcbfbb4p-1024, -0x6.817e300692fecp-124): Exception "Underflow" set
Failure: fma (0x4.0000000000004p-1024, -0x2.0000000000002p-56, -0x4p+968): Exception "Underflow" set
Failure: fma (0x4.0000000000004p-1024, -0x2.0000000000002p-56, -0x8p+1020): Exception "Underflow" set
Failure: fma (0x4.0000000000004p-1024, -0x2.0000000000002p-56, 0x4p+968): Exception "Underflow" set
Failure: fma (0x4.0000000000004p-1024, -0x2.0000000000002p-56, 0x8p+1020): Exception "Underflow" set
Failure: fma (0x4.0000000000004p-1024, 0x2.0000000000002p-56, -0x4p+968): Exception "Underflow" set
Failure: fma (0x4.0000000000004p-1024, 0x2.0000000000002p-56, -0x8p+1020): Exception "Underflow" set
Failure: fma (0x4.0000000000004p-1024, 0x2.0000000000002p-56, 0x4p+968): Exception "Underflow" set
Failure: fma (0x4.0000000000004p-1024, 0x2.0000000000002p-56, 0x8p+1020): Exception "Underflow" set
Failure: fma (0x4p-1076, 0x8p-4, 0x3.ffffffffffffcp-1024): Exception "Underflow" not set
Failure: fma (0x7.ffffffffffffcp-1024, -0x7.ffffffffffffcp+52, -0xf.ffffffffffff8p+1020): Exception "Underflow" set
Failure: fma (0x7.ffffffffffffcp-1024, -0x7.ffffffffffffcp+52, 0xf.ffffffffffff8p+1020): Exception "Underflow" set
Failure: fma (0x7.ffffffffffffcp-1024, 0x7.ffffffffffffcp+52, -0xf.ffffffffffff8p+1020): Exception "Underflow" set
Failure: fma (0x7.ffffffffffffcp-1024, 0x7.ffffffffffffcp+52, 0xf.ffffffffffff8p+1020): Exception "Underflow" set
Failure: fma (0x7.ffffffffffffp-1024, 0x8.0000000000008p-4, 0x4p-1076): Exception "Underflow" set
Failure: fma_downward (-0x4p-1076, 0x8p-4, -0x3.ffffffffffffcp-1024): Exception "Underflow" not set
Failure: fma_downward (-0x7.ffffffffffffp-1024, 0x8.0000000000008p-4, -0x4p-1076): Exception "Underflow" set
Failure: fma_downward (0x1.deadbeef2feedp+900, 0x3.7ab6fbbcbfbb4p-1024, -0x6.817e300692fecp-124): Exception "Underflow" set
Failure: fma_downward (0x4.0000000000004p-1024, -0x2.0000000000002p-56, -0x4p+968): Exception "Underflow" set
Failure: fma_downward (0x4.0000000000004p-1024, -0x2.0000000000002p-56, -0x8p+1020): Exception "Underflow" set
Failure: fma_downward (0x4.0000000000004p-1024, -0x2.0000000000002p-56, 0x4p+968): Exception "Underflow" set
Failure: fma_downward (0x4.0000000000004p-1024, -0x2.0000000000002p-56, 0x8p+1020): Exception "Underflow" set
Failure: fma_downward (0x4.0000000000004p-1024, 0x2.0000000000002p-56, -0x4p+968): Exception "Underflow" set
Failure: fma_downward (0x4.0000000000004p-1024, 0x2.0000000000002p-56, -0x8p+1020): Exception "Underflow" set
Failure: fma_downward (0x4.0000000000004p-1024, 0x2.0000000000002p-56, 0x4p+968): Exception "Underflow" set
Failure: fma_downward (0x4.0000000000004p-1024, 0x2.0000000000002p-56, 0x8p+1020): Exception "Underflow" set
Failure: fma_downward (0x7.ffffffffffffcp-1024, -0x7.ffffffffffffcp+52, -0xf.ffffffffffff8p+1020): Exception "Underflow" set
Failure: fma_downward (0x7.ffffffffffffcp-1024, -0x7.ffffffffffffcp+52, 0xf.ffffffffffff8p+1020): Exception "Underflow" set
Failure: fma_downward (0x7.ffffffffffffcp-1024, 0x7.ffffffffffffcp+52, -0xf.ffffffffffff8p+1020): Exception "Underflow" set
Failure: fma_downward (0x7.ffffffffffffcp-1024, 0x7.ffffffffffffcp+52, 0xf.ffffffffffff8p+1020): Exception "Underflow" set
Failure: fma_towardzero (0x1.deadbeef2feedp+900, 0x3.7ab6fbbcbfbb4p-1024, -0x6.817e300692fecp-124): Exception "Underflow" set
Failure: fma_towardzero (0x4.0000000000004p-1024, -0x2.0000000000002p-56, -0x4p+968): Exception "Underflow" set
Failure: fma_towardzero (0x4.0000000000004p-1024, -0x2.0000000000002p-56, -0x8p+1020): Exception "Underflow" set
Failure: fma_towardzero (0x4.0000000000004p-1024, -0x2.0000000000002p-56, 0x4p+968): Exception "Underflow" set
Failure: fma_towardzero (0x4.0000000000004p-1024, -0x2.0000000000002p-56, 0x8p+1020): Exception "Underflow" set
Failure: fma_towardzero (0x4.0000000000004p-1024, 0x2.0000000000002p-56, -0x4p+968): Exception "Underflow" set
Failure: fma_towardzero (0x4.0000000000004p-1024, 0x2.0000000000002p-56, -0x8p+1020): Exception "Underflow" set
Failure: fma_towardzero (0x4.0000000000004p-1024, 0x2.0000000000002p-56, 0x4p+968): Exception "Underflow" set
Failure: fma_towardzero (0x4.0000000000004p-1024, 0x2.0000000000002p-56, 0x8p+1020): Exception "Underflow" set
Failure: fma_towardzero (0x7.ffffffffffffcp-1024, -0x7.ffffffffffffcp+52, -0xf.ffffffffffff8p+1020): Exception "Underflow" set
Failure: fma_towardzero (0x7.ffffffffffffcp-1024, -0x7.ffffffffffffcp+52, 0xf.ffffffffffff8p+1020): Exception "Underflow" set
Failure: fma_towardzero (0x7.ffffffffffffcp-1024, 0x7.ffffffffffffcp+52, -0xf.ffffffffffff8p+1020): Exception "Underflow" set
Failure: fma_towardzero (0x7.ffffffffffffcp-1024, 0x7.ffffffffffffcp+52, 0xf.ffffffffffff8p+1020): Exception "Underflow" set
Failure: fma_upward (0x1.deadbeef2feedp+900, 0x3.7ab6fbbcbfbb4p-1024, -0x6.817e300692fecp-124): Exception "Underflow" set
Failure: fma_upward (0x4.0000000000004p-1024, -0x2.0000000000002p-56, -0x4p+968): Exception "Underflow" set
Failure: fma_upward (0x4.0000000000004p-1024, -0x2.0000000000002p-56, -0x8p+1020): Exception "Underflow" set
Failure: fma_upward (0x4.0000000000004p-1024, -0x2.0000000000002p-56, 0x4p+968): Exception "Underflow" set
Failure: fma_upward (0x4.0000000000004p-1024, -0x2.0000000000002p-56, 0x8p+1020): Exception "Underflow" set
Failure: fma_upward (0x4.0000000000004p-1024, 0x2.0000000000002p-56, -0x4p+968): Exception "Underflow" set
Failure: fma_upward (0x4.0000000000004p-1024, 0x2.0000000000002p-56, -0x8p+1020): Exception "Underflow" set
Failure: fma_upward (0x4.0000000000004p-1024, 0x2.0000000000002p-56, 0x4p+968): Exception "Underflow" set
Failure: fma_upward (0x4.0000000000004p-1024, 0x2.0000000000002p-56, 0x8p+1020): Exception "Underflow" set
Failure: fma_upward (0x4p-1076, 0x8p-4, 0x3.ffffffffffffcp-1024): Exception "Underflow" not set
Failure: fma_upward (0x7.ffffffffffffcp-1024, -0x7.ffffffffffffcp+52, -0xf.ffffffffffff8p+1020): Exception "Underflow" set
Failure: fma_upward (0x7.ffffffffffffcp-1024, -0x7.ffffffffffffcp+52, 0xf.ffffffffffff8p+1020): Exception "Underflow" set
Failure: fma_upward (0x7.ffffffffffffcp-1024, 0x7.ffffffffffffcp+52, -0xf.ffffffffffff8p+1020): Exception "Underflow" set
Failure: fma_upward (0x7.ffffffffffffcp-1024, 0x7.ffffffffffffcp+52, 0xf.ffffffffffff8p+1020): Exception "Underflow" set
Failure: fma_upward (0x7.ffffffffffffp-1024, 0x8.0000000000008p-4, 0x4p-1076): Exception "Underflow" set

Test suite completed:
  1628 test cases plus 1624 tests for exception flags and
    1624 tests for errno executed.
  60 errors occurred.
testing float (without inline functions)
Failure: fma (-0x8p-152, 0x8.8p-4, -0x3.fffff8p-128): Exception "Underflow" not set
Failure: fma (-0x8p-152, 0x8p-4, -0x3.fffff8p-128): Exception "Underflow" not set
Failure: fma (0x8p-152, 0x8.8p-4, 0x3.fffff8p-128): Exception "Underflow" not set
Failure: fma (0x8p-152, 0x8p-4, 0x3.fffff8p-128): Exception "Underflow" not set
Failure: fma_downward (-0x8p-152, 0x8p-4, -0x3.fffff8p-128): Exception "Underflow" not set
Failure: fma_downward (0x8p-152, -0x8p-152, -0x3.fffff8p-128): Exception "Underflow" not set
Failure: fma_upward (0x8p-152, 0x8p-152, 0x3.fffff8p-128): Exception "Underflow" not set
Failure: fma_upward (0x8p-152, 0x8p-4, 0x3.fffff8p-128): Exception "Underflow" not set

Test suite completed:
  768 test cases plus 764 tests for exception flags and
    764 tests for errno executed.
  8 errors occurred.
  
Joseph Myers Jan. 2, 2018, 2:04 p.m. UTC | #10
On Mon, 1 Jan 2018, Adhemerval Zanella wrote:

> > In the case of ceil, inexact should never be generated.  Since the alpha 
> > ceil implementations work entirely with asm which does not use /i to 
> > enable inexact exceptions, I'm not sure why they should generate such 
> > exceptions spuriously.  What failures are you seeing exactly - every case 
> > of noninteger arguments to ceil / ceilf, or only some such cases, or even 
> > cases of integer arguments?
> 
> The ceil/ceilf issues are in attachments (ran with s_ceil{f} built with
> -mieee-with-inexact).

ceil / ceilf should *not* be built with -mieee-with-inexact (since they 
should never raise inexact).  But also that option shouldn't make any 
difference to those functions.

This is systematically raising spurious inexact for noninteger ceil / 
ceilf arguments.  I don't see why these arguments would trap to the 
kernel, but maybe (a) confirm in a debugger exactly which instruction 
results in inexact being raised; (b) maybe instrument the kernel to report 
when that instruction is being emulated so you can see if the emulation is 
involved here at all?  If the emulation is involved, the kernel should be 
fixed to check TRP to see if inexact should be raised.

> > That however does not explain issues for fma / fmaf.  What do you see 
> > there - spurious inexact, missing inexact, wrong results?  The use of 
> > -mieee-with-inexact ought to ensure instructions are generated that set 
> > "inexact" appropriately, and unless it's set appropriately, wrong results 
> > can occur because the round-to-odd implementation relies on correct 
> > setting of inexact.  fmaf in particular is very simple, so as long as the 
> > right instructions are used and nothing gets reordered past the libc_fe* 
> > calls, not much should be able to go wrong.
> 
> The issues I am seeing on alpha for fma/fmaf are also in attachments.

For float, these are all missing underflow exceptions.

Alpha is an architecture with after-rounding tininess detection.  Recall 
that after-rounding tininess detection is based on what the result would 
be if rounded to normal precision but with infinite exponent range, so 
it's possible for a result to be rounded to +/- the least normal but still 
result in underflow with after-rounding tininess detection, which appears 
to be the case for the failing tests for float.

Now, the Linux kernel has an old soft-fp version that only supports 
before-rounding tininess detection, but the cases with before-rounding 
underflow are a strict superset of those with after-rounding underflow, so 
that can't explain missing underflow exceptions.  (I tried in 2015 to get 
updated soft-fp into the Linux kernel.  A patch series was accepted into a 
powerpc tree that was supposed to be pull-requested for Linux 4.4 
<https://lkml.org/lkml/2015/8/26/804> but it never actually got into 
Linus's tree for some reason.)

Maybe there is a a hardware bug that means certain underflow cases neither 
raise the underflow flag in hardware nor pass things to software 
emulation, or something like that?

(IEEE 754-1985, unlike IEEE 754-2008, allows for underflow to be raised 
only where there are both tininess and loss of accuracy as detected as a 
denormalization loss, as opposed to tininess and inexactness.  But the 
Alpha Architecture Handbook says "In the Alpha architecture, tininess is 
detected by hardware after rounding, and loss of accuracy is detected by 
software as an inexact result.", which indicates that option in IEEE 
754-1985 isn't relevant here.)

For double, there are a few cases of missing underflow exceptions, for 
which the above analysis would apply.  But most of the failures there are 
spurious underflow exceptions, which are more mysterious, as they include 
cases where the result is large, nowhere near underflowing.  I'd suggest 
finding out exactly which instruction, with what operands, is generating 
the spurious underflow exception (possibly an instruction that generates 
an exact subnormal result, where the underflow flag should not be set?).  
And, again, see whether kernel emulation is involved for that instruction.
  
Adhemerval Zanella Netto Jan. 2, 2018, 6:16 p.m. UTC | #11
On 02/01/2018 12:04, Joseph Myers wrote:
> On Mon, 1 Jan 2018, Adhemerval Zanella wrote:
> 
>>> In the case of ceil, inexact should never be generated.  Since the alpha 
>>> ceil implementations work entirely with asm which does not use /i to 
>>> enable inexact exceptions, I'm not sure why they should generate such 
>>> exceptions spuriously.  What failures are you seeing exactly - every case 
>>> of noninteger arguments to ceil / ceilf, or only some such cases, or even 
>>> cases of integer arguments?
>>
>> The ceil/ceilf issues are in attachments (ran with s_ceil{f} built with
>> -mieee-with-inexact).
> 
> ceil / ceilf should *not* be built with -mieee-with-inexact (since they 
> should never raise inexact).  But also that option shouldn't make any 
> difference to those functions.
> 
> This is systematically raising spurious inexact for noninteger ceil / 
> ceilf arguments.  I don't see why these arguments would trap to the 
> kernel, but maybe (a) confirm in a debugger exactly which instruction 
> results in inexact being raised; (b) maybe instrument the kernel to report 
> when that instruction is being emulated so you can see if the emulation is 
> involved here at all?  If the emulation is involved, the kernel should be 
> fixed to check TRP to see if inexact should be raised.

It is the 'cvttq/svm' which changes the fpcr and sets INE bit.

(gdb) i r fpcr
fpcr           0x680e000000000000       7497930429618454528
(gdb) ni
0x000002000009a194      38            __asm (
(gdb) i r fpcr
fpcr           0xe90e000000200000       -1653384013196296192

(0x000002000009a194 is the cvttq/svm from s_ceil.S).

A comment from alpha divq.S (present in other assembly implementation
as well) states:

 37    The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE
 38    for cvttq/c even without /sui being set.  It will not, however, properly
 39    raise the exception, so we don't have to worry about FPCR_INED being clear
 40    and so dying by SIGFPE.  */

Which leads to believe we are it seems valid to /m as well.  Also the comments
on qemu patch at [1] indicates that CVTTQ semantic does set inexact for 
1. denorms -> 0 and 2. values outside of that range -> lower 64 bits of value.
So I am not sure if it a hardware issue or a expected semantic (Alpha Architecture
Handbook I have access does indicate that cvttq sets INE bit for some operations).

I haven't tested if it is the case of an emulated instruction (I currently
I do not have access to rebuild/reinstall new kernel on the machine), but 
since I am checking on EV68CB I guess it is not.

In any case I think we have two options here: either adjust the implementation
to clear FPCR_INE bit after cvttq/svm (which will incur in a mf_fpcr followed 
by a mt_fpcr) or just remove the optimized implementation.  I more inclined
the the former since working on FPCR is usually costly, a very naive attempt
to save/restore the fpcr on cvttq for ceil did solved the issues but also
showed worse performance than using the generic implementation (I used a
ceil benchtests based on trunc{f} inputs).

[1] https://patchwork.ozlabs.org/patch/363303/

> 
>>> That however does not explain issues for fma / fmaf.  What do you see 
>>> there - spurious inexact, missing inexact, wrong results?  The use of 
>>> -mieee-with-inexact ought to ensure instructions are generated that set 
>>> "inexact" appropriately, and unless it's set appropriately, wrong results 
>>> can occur because the round-to-odd implementation relies on correct 
>>> setting of inexact.  fmaf in particular is very simple, so as long as the 
>>> right instructions are used and nothing gets reordered past the libc_fe* 
>>> calls, not much should be able to go wrong.
>>
>> The issues I am seeing on alpha for fma/fmaf are also in attachments.
> 
> For float, these are all missing underflow exceptions.
> 
> Alpha is an architecture with after-rounding tininess detection.  Recall 
> that after-rounding tininess detection is based on what the result would 
> be if rounded to normal precision but with infinite exponent range, so 
> it's possible for a result to be rounded to +/- the least normal but still 
> result in underflow with after-rounding tininess detection, which appears 
> to be the case for the failing tests for float.
> 
> Now, the Linux kernel has an old soft-fp version that only supports 
> before-rounding tininess detection, but the cases with before-rounding 
> underflow are a strict superset of those with after-rounding underflow, so 
> that can't explain missing underflow exceptions.  (I tried in 2015 to get 
> updated soft-fp into the Linux kernel.  A patch series was accepted into a 
> powerpc tree that was supposed to be pull-requested for Linux 4.4 
> <https://lkml.org/lkml/2015/8/26/804> but it never actually got into 
> Linus's tree for some reason.)
> 
> Maybe there is a a hardware bug that means certain underflow cases neither 
> raise the underflow flag in hardware nor pass things to software 
> emulation, or something like that?
> 
> (IEEE 754-1985, unlike IEEE 754-2008, allows for underflow to be raised 
> only where there are both tininess and loss of accuracy as detected as a 
> denormalization loss, as opposed to tininess and inexactness.  But the 
> Alpha Architecture Handbook says "In the Alpha architecture, tininess is 
> detected by hardware after rounding, and loss of accuracy is detected by 
> software as an inexact result.", which indicates that option in IEEE 
> 754-1985 isn't relevant here.)
> 
> For double, there are a few cases of missing underflow exceptions, for 
> which the above analysis would apply.  But most of the failures there are 
> spurious underflow exceptions, which are more mysterious, as they include 
> cases where the result is large, nowhere near underflowing.  I'd suggest 
> finding out exactly which instruction, with what operands, is generating 
> the spurious underflow exception (possibly an instruction that generates 
> an exact subnormal result, where the underflow flag should not be set?).  
> And, again, see whether kernel emulation is involved for that instruction.
> 

I will try to investigate fma{f} as well.
  
Joseph Myers Jan. 2, 2018, 6:32 p.m. UTC | #12
On Tue, 2 Jan 2018, Adhemerval Zanella wrote:

> I haven't tested if it is the case of an emulated instruction (I currently
> I do not have access to rebuild/reinstall new kernel on the machine), but 
> since I am checking on EV68CB I guess it is not.

If the instruction sets inexact in hardware (I was going on 4.7.6.4 
IEEE-Compliant Arithmetic Without Inexact Exception, "this model does not 
signal inexact results either by the inexact status flag or by trapping"), 
then indeed you need to avoid using the instruction or avoid any inexact 
bit from it being set on return from the function (while of course if 
inexact was set on entry to the function, it still needs to be set on 
return from the function, so you can't simply clear inexact after the 
instruction).
  
Adhemerval Zanella Netto Jan. 3, 2018, 5:54 p.m. UTC | #13
On 02/01/2018 16:32, Joseph Myers wrote:
> On Tue, 2 Jan 2018, Adhemerval Zanella wrote:
> 
>> I haven't tested if it is the case of an emulated instruction (I currently
>> I do not have access to rebuild/reinstall new kernel on the machine), but 
>> since I am checking on EV68CB I guess it is not.
> 
> If the instruction sets inexact in hardware (I was going on 4.7.6.4 
> IEEE-Compliant Arithmetic Without Inexact Exception, "this model does not 
> signal inexact results either by the inexact status flag or by trapping"), 
> then indeed you need to avoid using the instruction or avoid any inexact 
> bit from it being set on return from the function (while of course if 
> inexact was set on entry to the function, it still needs to be set on 
> return from the function, so you can't simply clear inexact after the 
> instruction).
> 

Although the manual I am using (Alpha Architecture Handbook version 3),
notes that CVTfi OUTPUT Exceptions (B.3 Mapping to IEEE Standard)
should not generate Inexact, the manual of the chip I am testing it
(Alpha 21264 [1]) states at A.8 IEEE Floating-Point Conformance that
CVTfi and CVTif OUTPUT does generate inexact exception for inexact
result.  I think easier solution is just use generic implementation
for alpha.

[1] https://www.star.bnl.gov/public/daq/HARDWARE/21264_data_sheet.pdf
  
Joseph Myers Jan. 3, 2018, 6:23 p.m. UTC | #14
On Wed, 3 Jan 2018, Adhemerval Zanella wrote:

> Although the manual I am using (Alpha Architecture Handbook version 3),
> notes that CVTfi OUTPUT Exceptions (B.3 Mapping to IEEE Standard)
> should not generate Inexact, the manual of the chip I am testing it
> (Alpha 21264 [1]) states at A.8 IEEE Floating-Point Conformance that
> CVTfi and CVTif OUTPUT does generate inexact exception for inexact
> result.  I think easier solution is just use generic implementation
> for alpha.

Sure.  Presumably this applies to all of ceil, floor and trunc.  And a GCC 
bug should be filed for inlining those functions on alpha in the default 
-ffp-int-builtin-inexact case which allows the exception to be raised.
  
Adhemerval Zanella Netto Jan. 3, 2018, 6:48 p.m. UTC | #15
On 03/01/2018 16:23, Joseph Myers wrote:
> On Wed, 3 Jan 2018, Adhemerval Zanella wrote:
> 
>> Although the manual I am using (Alpha Architecture Handbook version 3),
>> notes that CVTfi OUTPUT Exceptions (B.3 Mapping to IEEE Standard)
>> should not generate Inexact, the manual of the chip I am testing it
>> (Alpha 21264 [1]) states at A.8 IEEE Floating-Point Conformance that
>> CVTfi and CVTif OUTPUT does generate inexact exception for inexact
>> result.  I think easier solution is just use generic implementation
>> for alpha.
> 
> Sure.  Presumably this applies to all of ceil, floor and trunc.  And a GCC 
> bug should be filed for inlining those functions on alpha in the default 
> -ffp-int-builtin-inexact case which allows the exception to be raised.
> 

Indeed trunc is also affected, although because addt/suc and subt/suc,
which for 21264 raises inexact for inexact result.  I will track trunc
in another bugzilla and open a gcc one as well.
  
Maciej W. Rozycki Jan. 4, 2018, 8:43 a.m. UTC | #16
On Tue, 2 Jan 2018, Adhemerval Zanella wrote:

> It is the 'cvttq/svm' which changes the fpcr and sets INE bit.
> 
> (gdb) i r fpcr
> fpcr           0x680e000000000000       7497930429618454528
> (gdb) ni
> 0x000002000009a194      38            __asm (
> (gdb) i r fpcr
> fpcr           0xe90e000000200000       -1653384013196296192
> 
> (0x000002000009a194 is the cvttq/svm from s_ceil.S).

 FYI, you can use `x /i $pc' and also `display /i $pc' to have the current 
machine instruction disassembled in GDB.

  Maciej
  

Patch

diff --git a/ChangeLog b/ChangeLog
index cd6fc15767..3f6002a175 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@ 
+2017-12-31  Aurelien Jarno  <aurelien@aurel32.net>
+
+	[BZ #20947]
+	* sysdeps/alpha/fpu/s_fmax.S: Remove file.
+	* sysdeps/alpha/fpu/s_fmaxf.S: Likewise.
+	* sysdeps/alpha/fpu/s_fmin.S: Likewise.
+	* sysdeps/alpha/fpu/s_fminf.S: Likewise.
+
 2017-12-30  Aurelien Jarno  <aurelien@aurel32.net>
 	    Dmitry V. Levin  <ldv@altlinux.org>
 
diff --git a/sysdeps/alpha/fpu/s_fmax.S b/sysdeps/alpha/fpu/s_fmax.S
deleted file mode 100644
index 5da9e0df11..0000000000
--- a/sysdeps/alpha/fpu/s_fmax.S
+++ /dev/null
@@ -1,52 +0,0 @@ 
-/* Copyright (C) 2007-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Richard Henderson.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <math_ldbl_opt.h>
-#include <libm-alias-float.h>
-#include <libm-alias-double.h>
-
-        .set noat
-	.set noreorder
-
-	.text
-ENTRY (__fmax)
-	.prologue 0
-
-	cmptun/su	$f16, $f16, $f10
-	cmptun/su	$f17, $f17, $f11
-	fmov		$f17, $f0
-	unop
-
-	trapb
-	fbne		$f10, $ret
-	fmov		$f16, $f0
-	fbne		$f11, $ret
-
-	cmptlt/su	$f16, $f17, $f11
-	trapb
-	fcmovne		$f11, $f17, $f0
-$ret:	ret
-
-END (__fmax)
-
-/* Given the in-register format of single-precision, this works there too.  */
-strong_alias (__fmax, __fmaxf)
-libm_alias_float (__fmax, fmax)
-
-libm_alias_double (__fmax, fmax)
diff --git a/sysdeps/alpha/fpu/s_fmaxf.S b/sysdeps/alpha/fpu/s_fmaxf.S
deleted file mode 100644
index 3c2d62bb81..0000000000
--- a/sysdeps/alpha/fpu/s_fmaxf.S
+++ /dev/null
@@ -1 +0,0 @@ 
-/* __fmaxf is in s_fmax.c  */
diff --git a/sysdeps/alpha/fpu/s_fmin.S b/sysdeps/alpha/fpu/s_fmin.S
deleted file mode 100644
index d752223151..0000000000
--- a/sysdeps/alpha/fpu/s_fmin.S
+++ /dev/null
@@ -1,52 +0,0 @@ 
-/* Copyright (C) 2007-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Richard Henderson.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <math_ldbl_opt.h>
-#include <libm-alias-float.h>
-#include <libm-alias-double.h>
-
-        .set noat
-	.set noreorder
-
-	.text
-ENTRY (__fmin)
-	.prologue 0
-
-	cmptun/su	$f16, $f16, $f10
-	cmptun/su	$f17, $f17, $f11
-	fmov		$f17, $f0
-	unop
-
-	trapb
-	fbne		$f10, $ret
-	fmov		$f16, $f0
-	fbne		$f11, $ret
-
-	cmptlt/su	$f17, $f16, $f11
-	trapb
-	fcmovne		$f11, $f17, $f0
-$ret:	ret
-
-END (__fmin)
-
-/* Given the in-register format of single-precision, this works there too.  */
-strong_alias (__fmin, __fminf)
-libm_alias_float (__fmin, fmin)
-
-libm_alias_double (__fmin, fmin)
diff --git a/sysdeps/alpha/fpu/s_fminf.S b/sysdeps/alpha/fpu/s_fminf.S
deleted file mode 100644
index 10ab7fe53c..0000000000
--- a/sysdeps/alpha/fpu/s_fminf.S
+++ /dev/null
@@ -1 +0,0 @@ 
-/* __fminf is in s_fmin.c  */