From patchwork Fri Aug  1 17:51:55 2014
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Richard Henderson <rth@twiddle.net>
X-Patchwork-Id: 2286
Received: (qmail 4797 invoked by alias); 1 Aug 2014 17:52:07 -0000
Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm
Precedence: bulk
List-Id: <libc-alpha.sourceware.org>
List-Unsubscribe: <mailto:libc-alpha-unsubscribe-##L=##H@sourceware.org>
List-Subscribe: <mailto:libc-alpha-subscribe@sourceware.org>
List-Archive: <http://sourceware.org/ml/libc-alpha/>
List-Post: <mailto:libc-alpha@sourceware.org>
List-Help: <mailto:libc-alpha-help@sourceware.org>,
	<http://sourceware.org/ml/#faqs>
Sender: libc-alpha-owner@sourceware.org
Delivered-To: mailing list libc-alpha@sourceware.org
Received: (qmail 4780 invoked by uid 89); 1 Aug 2014 17:52:06 -0000
Authentication-Results: sourceware.org; auth=none
X-Virus-Found: No
X-Spam-SWARE-Status: No, score=-2.4 required=5.0 tests=BAYES_00,
	FREEMAIL_ENVFROM_END_DIGIT, FREEMAIL_FROM, RCVD_IN_DNSWL_LOW,
	SPF_PASS autolearn=ham version=3.3.2
X-HELO: mail-pa0-f50.google.com
X-Received: by 10.68.245.135 with SMTP id xo7mr8081548pbc.161.1406915523394;
	Fri, 01 Aug 2014 10:52:03 -0700 (PDT)
From: Richard Henderson <rth@twiddle.net>
To: libc-alpha@sourceware.org
Cc: joseph@codesourcery.com
Subject: [PATCH v3] force eval for fma implementations
Date: Fri,  1 Aug 2014 07:51:55 -1000
Message-Id: <1406915515-18596-1-git-send-email-rth@twiddle.net>

Now with corresponding fixes to ldbl-96/s_fma.c, ldbl-96/s_fmal.c
and ldbl-128/s_fmal.c, as suggested by Joseph.

Ok?


r~


	* sysdeps/ieee754/dbl-64/s_fma.c (__fma): Use math_force_eval before
	feclearexcept; use math_opt_barrier instead of open-coded asm; fix
	typo in exact zero test.
	* sysdeps/ieee754/ldbl-128/s_fmal.c (__fmal): Likewise.
	* sysdeps/ieee754/ldbl-96/s_fma.c (__fma): Likewise.
	* sysdeps/ieee754/ldbl-96/s_fmal.c (__fmal): Likewise.
---
 sysdeps/ieee754/dbl-64/s_fma.c    | 11 ++++++-----
 sysdeps/ieee754/ldbl-128/s_fmal.c | 11 ++++++-----
 sysdeps/ieee754/ldbl-96/s_fma.c   | 11 ++++++-----
 sysdeps/ieee754/ldbl-96/s_fmal.c  | 11 ++++++-----
 4 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/sysdeps/ieee754/dbl-64/s_fma.c b/sysdeps/ieee754/dbl-64/s_fma.c
index 389acd4..77065aa 100644
--- a/sysdeps/ieee754/dbl-64/s_fma.c
+++ b/sysdeps/ieee754/dbl-64/s_fma.c
@@ -198,16 +198,17 @@ __fma (double x, double y, double z)
   t1 = m1 - t1;
   t2 = z - t2;
   double a2 = t1 + t2;
+  /* Ensure the arithmetic is not scheduled after feclearexcept call.  */
+  math_force_eval (m2);
+  math_force_eval (a2);
   feclearexcept (FE_INEXACT);
 
-  /* If the result is an exact zero, ensure it has the correct
-     sign.  */
+  /* If the result is an exact zero, ensure it has the correct sign.  */
   if (a1 == 0 && m2 == 0)
     {
       libc_feupdateenv (&env);
-      /* Ensure that round-to-nearest value of z + m1 is not
-	 reused.  */
-      asm volatile ("" : "=m" (z) : "m" (z));
+      /* Ensure that round-to-nearest value of z + m1 is not reused.  */
+      z = math_opt_barrier (z);
       return z + m1;
     }
 
diff --git a/sysdeps/ieee754/ldbl-128/s_fmal.c b/sysdeps/ieee754/ldbl-128/s_fmal.c
index 1d5f27b..8147094 100644
--- a/sysdeps/ieee754/ldbl-128/s_fmal.c
+++ b/sysdeps/ieee754/ldbl-128/s_fmal.c
@@ -203,16 +203,17 @@ __fmal (long double x, long double y, long double z)
   t1 = m1 - t1;
   t2 = z - t2;
   long double a2 = t1 + t2;
+  /* Ensure the arithmetic is not scheduled after feclearexcept call.  */
+  math_force_eval (m2);
+  math_force_eval (a2);
   feclearexcept (FE_INEXACT);
 
-  /* If the result is an exact zero, ensure it has the correct
-     sign.  */
+  /* If the result is an exact zero, ensure it has the correct sign.  */
   if (a1 == 0 && m2 == 0)
     {
       feupdateenv (&env);
-      /* Ensure that round-to-nearest value of z + m1 is not
-	 reused.  */
-      asm volatile ("" : "=m" (z) : "m" (z));
+      /* Ensure that round-to-nearest value of z + m1 is not reused.  */
+      z = math_opt_barrier (z);
       return z + m1;
     }
 
diff --git a/sysdeps/ieee754/ldbl-96/s_fma.c b/sysdeps/ieee754/ldbl-96/s_fma.c
index 354be4e..a27102e 100644
--- a/sysdeps/ieee754/ldbl-96/s_fma.c
+++ b/sysdeps/ieee754/ldbl-96/s_fma.c
@@ -64,16 +64,17 @@ __fma (double x, double y, double z)
   t1 = m1 - t1;
   t2 = z - t2;
   long double a2 = t1 + t2;
+  /* Ensure the arithmetic is not scheduled after feclearexcept call.  */
+  math_force_eval (m2);
+  math_force_eval (a2);
   feclearexcept (FE_INEXACT);
 
-  /* If the result is an exact zero, ensure it has the correct
-     sign.  */
+  /* If the result is an exact zero, ensure it has the correct sign.  */
   if (a1 == 0 && m2 == 0)
     {
       feupdateenv (&env);
-      /* Ensure that round-to-nearest value of z + m1 is not
-	 reused.  */
-      asm volatile ("" : "=m" (z) : "m" (z));
+      /* Ensure that round-to-nearest value of z + m1 is not reused.  */
+      z = math_opt_barrier (z);
       return z + m1;
     }
 
diff --git a/sysdeps/ieee754/ldbl-96/s_fmal.c b/sysdeps/ieee754/ldbl-96/s_fmal.c
index 4983eda..0f299da 100644
--- a/sysdeps/ieee754/ldbl-96/s_fmal.c
+++ b/sysdeps/ieee754/ldbl-96/s_fmal.c
@@ -201,16 +201,17 @@ __fmal (long double x, long double y, long double z)
   t1 = m1 - t1;
   t2 = z - t2;
   long double a2 = t1 + t2;
+  /* Ensure the arithmetic is not scheduled after feclearexcept call.  */
+  math_force_eval (m2);
+  math_force_eval (a2);
   feclearexcept (FE_INEXACT);
 
-  /* If the result is an exact zero, ensure it has the correct
-     sign.  */
+  /* If the result is an exact zero, ensure it has the correct sign.  */
   if (a1 == 0 && m2 == 0)
     {
       feupdateenv (&env);
-      /* Ensure that round-to-nearest value of z + m1 is not
-	 reused.  */
-      asm volatile ("" : "=m" (z) : "m" (z));
+      /* Ensure that round-to-nearest value of z + m1 is not reused.  */
+      z = math_opt_barrier (z);
       return z + m1;
     }