From patchwork Mon Sep 14 22:41:26 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Joseph Myers X-Patchwork-Id: 8692 Received: (qmail 34342 invoked by alias); 14 Sep 2015 22:41:33 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 34332 invoked by uid 89); 14 Sep 2015 22:41:33 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-0.9 required=5.0 tests=AWL, BAYES_50, RCVD_IN_DNSWL_LOW, SPF_PASS autolearn=ham version=3.3.2 X-HELO: relay1.mentorg.com Date: Mon, 14 Sep 2015 22:41:26 +0000 From: Joseph Myers To: Subject: Fix i386 exp missing underflows (bug 18961) [committed] Message-ID: User-Agent: Alpine 2.10 (DEB 1266 2009-07-14) MIME-Version: 1.0 On i386, the double version of exp can miss underflow exceptions if the result is in the subnormal range for double but the last 11 bits of the 64-bit extended-precision mantissa happen to be zero. This patch forces the exception in a similar way to previous fixes. As with the exp2 fixes, the expf changes may in fact not be needed to ensure underflow exceptions, but are included for consistency and to fix the exp part of bug 18875 by ensuring that excess range and precision is removed from underflowing return values. Tested for x86_64 and x86. Committed. (auto-libm-test-out diffs omitted below.) 2015-09-14 Joseph Myers [BZ #18875] [BZ #18961] * sysdeps/i386/fpu/e_exp.S (dbl_min): New object. (MO): New macro. (__ieee754_exp): For small results, force underflow exception and remove excess range and precision from return value. (__exp_finite): Likewise. * sysdeps/i386/fpu/e_expf.S (flt_min): New object. (MO): New macro. (__ieee754_expf): For small results, force underflow exception and remove excess range and precision from return value. (__expf_finite): Likewise. * math/auto-libm-test-in: Add more tests of exp. * math/auto-libm-test-out: Regenerated. diff --git a/math/auto-libm-test-in b/math/auto-libm-test-in index 15c9146..9da01fb 100644 --- a/math/auto-libm-test-in +++ b/math/auto-libm-test-in @@ -1396,6 +1396,17 @@ exp -0x1.75f113c30b1c8p+9 exp -max exp -11342.8125 exp -0x2.c5b2319c4843acc0p12 +exp -0x2.c469d9p+8 +exp -0x2.c46d96p+8 +exp -0x2.c46727p+8 +exp -0x2.c469dep+8 +exp -0x2.c46c04p+8 +exp -0x2.c46adep+8 +exp -0x2.c471b3p+8 +exp -0x2.c46993p+8 +exp -0x2.c49fap+8 +exp -0x2.c4ac1p+8 +exp -0x2.c4d89p+8 exp 0x1p-10 exp -0x1p-10 exp 0x1p-20 diff --git a/sysdeps/i386/fpu/e_exp.S b/sysdeps/i386/fpu/e_exp.S index 2c331d9..c00beed 100644 --- a/sysdeps/i386/fpu/e_exp.S +++ b/sysdeps/i386/fpu/e_exp.S @@ -5,9 +5,25 @@ #include + .section .rodata.cst8,"aM",@progbits,8 + .p2align 3 + .type dbl_min,@object +dbl_min: .byte 0, 0, 0, 0, 0, 0, 0x10, 0 + ASM_SIZE_DIRECTIVE(dbl_min) + +#ifdef PIC +# define MO(op) op##@GOTOFF(%ecx) +#else +# define MO(op) op +#endif + + .text /* e^x = 2^(x * log2(e)) */ ENTRY(__ieee754_exp) +#ifdef PIC + LOAD_PIC_REG (cx) +#endif fldl 4(%esp) /* I added the following ugly construct because exp(+-Inf) resulted in NaN. The ugliness results from the bright minds at Intel. @@ -30,7 +46,22 @@ ENTRY(__ieee754_exp) faddp /* 2^(fract(x * log2(e))) */ fscale /* e^x */ fstp %st(1) - ret + fldl MO(dbl_min) + fld %st(1) + fucompp + fnstsw + sahf + jnc 3f + subl $8, %esp + cfi_adjust_cfa_offset (8) + fld %st(0) + fmul %st(0) + fstpl (%esp) + fstpl (%esp) + fldl (%esp) + addl $8, %esp + cfi_adjust_cfa_offset (-8) +3: ret 1: testl $0x200, %eax /* Test sign. */ jz 2f /* If positive, jump. */ @@ -41,6 +72,9 @@ END (__ieee754_exp) ENTRY(__exp_finite) +#ifdef PIC + LOAD_PIC_REG (cx) +#endif fldl2e fmull 4(%esp) /* x * log2(e) */ fld %st @@ -52,5 +86,20 @@ ENTRY(__exp_finite) faddp /* 2^(fract(x * log2(e))) */ fscale /* e^x */ fstp %st(1) - ret + fldl MO(dbl_min) + fld %st(1) + fucompp + fnstsw + sahf + jnc 4f + subl $8, %esp + cfi_adjust_cfa_offset (8) + fld %st(0) + fmul %st(0) + fstpl (%esp) + fstpl (%esp) + fldl (%esp) + addl $8, %esp + cfi_adjust_cfa_offset (-8) +4: ret END(__exp_finite) diff --git a/sysdeps/i386/fpu/e_expf.S b/sysdeps/i386/fpu/e_expf.S index 4e4f6a0..306afd1 100644 --- a/sysdeps/i386/fpu/e_expf.S +++ b/sysdeps/i386/fpu/e_expf.S @@ -6,9 +6,25 @@ #include + .section .rodata.cst4,"aM",@progbits,4 + .p2align 2 + .type flt_min,@object +flt_min: .byte 0, 0, 0x80, 0 + ASM_SIZE_DIRECTIVE(flt_min) + +#ifdef PIC +# define MO(op) op##@GOTOFF(%ecx) +#else +# define MO(op) op +#endif + + .text /* e^x = 2^(x * log2(e)) */ ENTRY(__ieee754_expf) +#ifdef PIC + LOAD_PIC_REG (cx) +#endif flds 4(%esp) /* I added the following ugly construct because exp(+-Inf) resulted in NaN. The ugliness results from the bright minds at Intel. @@ -31,7 +47,22 @@ ENTRY(__ieee754_expf) faddp /* 2^(fract(x * log2(e))) */ fscale /* e^x */ fstp %st(1) - ret + flds MO(flt_min) + fld %st(1) + fucompp + fnstsw + sahf + jnc 3f + subl $4, %esp + cfi_adjust_cfa_offset (4) + fld %st(0) + fmul %st(0) + fstps (%esp) + fstps (%esp) + flds (%esp) + addl $4, %esp + cfi_adjust_cfa_offset (-4) +3: ret 1: testl $0x200, %eax /* Test sign. */ jz 2f /* If positive, jump. */ @@ -42,6 +73,9 @@ END (__ieee754_expf) ENTRY(__expf_finite) +#ifdef PIC + LOAD_PIC_REG (cx) +#endif fldl2e fmuls 4(%esp) /* x * log2(e) */ fld %st @@ -53,5 +87,20 @@ ENTRY(__expf_finite) faddp /* 2^(fract(x * log2(e))) */ fscale /* e^x */ fstp %st(1) - ret + flds MO(flt_min) + fld %st(1) + fucompp + fnstsw + sahf + jnc 4f + subl $4, %esp + cfi_adjust_cfa_offset (4) + fld %st(0) + fmul %st(0) + fstps (%esp) + fstps (%esp) + flds (%esp) + addl $4, %esp + cfi_adjust_cfa_offset (-4) +4: ret END(__expf_finite)