From patchwork Wed May 25 01:32:28 2016
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Matt Turner <mattst88@gmail.com>
X-Patchwork-Id: 12504
Received: (qmail 71658 invoked by alias); 25 May 2016 01:32:41 -0000
Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm
Precedence: bulk
List-Id: <libc-alpha.sourceware.org>
List-Unsubscribe: <mailto:libc-alpha-unsubscribe-##L=##H@sourceware.org>
List-Subscribe: <mailto:libc-alpha-subscribe@sourceware.org>
List-Archive: <http://sourceware.org/ml/libc-alpha/>
List-Post: <mailto:libc-alpha@sourceware.org>
List-Help: <mailto:libc-alpha-help@sourceware.org>,
	<http://sourceware.org/ml/#faqs>
Sender: libc-alpha-owner@sourceware.org
Delivered-To: mailing list libc-alpha@sourceware.org
Received: (qmail 71608 invoked by uid 89); 25 May 2016 01:32:40 -0000
Authentication-Results: sourceware.org; auth=none
X-Virus-Found: No
X-Spam-SWARE-Status: No, score=-2.3 required=5.0 tests=AWL, BAYES_00,
	FREEMAIL_ENVFROM_END_DIGIT, FREEMAIL_FROM, RCVD_IN_DNSWL_LOW,
	SPF_PASS autolearn=ham version=3.3.2 spammy=Contributed,
	contributed, 2131, D*cygnus.com
X-HELO: mail-pa0-f66.google.com
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
	d=1e100.net; s=20130820;
	h=x-gm-message-state:from:to:cc:subject:date:message-id;
	bh=bfjDsL/Upv/5pVb1ZDXBL3LUD4IZ22HWNFa9rdwS7Fc=;
	b=BPa75Nk1xkeDEMyud74GDzy2Jt25osait9jDjUKP6MvckYKbi5u18VzeoUjn/yFH54
	hSXUiEs3VbNtZJhzVp1kGJQvtQIvCRZCFIHdod6ELaT6ruIzYsyjakOeI5OBZ/+vG4Fs
	4EEmmbNO+s7RuQYwx0sIDKwRS+YTSL90E8EC2Z9hsXkNl5LHsX3z0TH+Eyou7BlgOv6+
	S/+/6aXwF0oIuZ6QEY6YEzMXztpHu3m9A8umQwghocFvxBW+W6exCVIKxjgPCu+GeThE
	3bABSmXXxvnSzl5PsQrwErQ2nTGKx/nkUb824Xfsn/WZClc+pdimUQeEIfnHgsfUs1DX
	AZXw==
X-Gm-Message-State: 
 ALyK8tJWcEictnw/0aLD+WjWDLGXvrtCNwsPfwMGB8QKzafmnkCtx3DuIspFOkTtcYxPmw==
X-Received: by 10.66.6.35 with SMTP id x3mr1745709pax.135.1464139948521;
	Tue, 24 May 2016 18:32:28 -0700 (PDT)
From: Matt Turner <mattst88@gmail.com>
To: libc-alpha@sourceware.org
Cc: Joseph Myers <joseph@codesourcery.com>, Matt Turner <mattst88@gmail.com>
Subject: [PATCH 1/3] Optimize trunc() and truncf().
Date: Tue, 24 May 2016 18:32:28 -0700
Message-Id: <1464139950-31943-1-git-send-email-mattst88@gmail.com>

By creating a mask of non-fractional bits from the exponent.
---

Joseph suggested an SSE 4.1 implementation of trunc/truncf, so I thought
now would be a good time to send these patches even sans benchmarking data.

I do not believe the other two generic trunc* implementations (ldbl-128,
dbl-64) would benefit from the change made in this patch

Suggestions for ChangeLog entries welcome. Guidance for generating benchmark
data requested.

 sysdeps/ieee754/dbl-64/wordsize-64/s_trunc.c | 32 +++++++++++-----------------
 sysdeps/ieee754/flt-32/s_truncf.c            | 32 +++++++++++-----------------
 2 files changed, 26 insertions(+), 38 deletions(-)

diff --git a/sysdeps/ieee754/dbl-64/wordsize-64/s_trunc.c b/sysdeps/ieee754/dbl-64/wordsize-64/s_trunc.c
index 81ac55e..e4cba3b 100644
--- a/sysdeps/ieee754/dbl-64/wordsize-64/s_trunc.c
+++ b/sysdeps/ieee754/dbl-64/wordsize-64/s_trunc.c
@@ -1,7 +1,6 @@
 /* Truncate argument to nearest integral value not larger than the argument.
    Copyright (C) 1997-2016 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -21,31 +20,26 @@
 
 #include <math_private.h>
 
+static int64_t
+max (int64_t x, int64_t y)
+{
+  return x > y ? x : y;
+}
 
 double
 __trunc (double x)
 {
-  int64_t i0, j0;
-  int64_t sx;
+  int64_t i0;
 
   EXTRACT_WORDS64 (i0, x);
-  sx = i0 & UINT64_C(0x8000000000000000);
-  j0 = ((i0 >> 52) & 0x7ff) - 0x3ff;
-  if (j0 < 52)
-    {
-      if (j0 < 0)
-	/* The magnitude of the number is < 1 so the result is +-0.  */
-	INSERT_WORDS64 (x, sx);
-      else
-	INSERT_WORDS64 (x, sx | (i0 & ~(UINT64_C(0x000fffffffffffff) >> j0)));
-    }
-  else
-    {
-      if (j0 == 0x400)
-	/* x is inf or NaN.  */
-	return x + x;
-    }
+  int64_t exp = (i0 >> 52) & 0x7ff;
+  int64_t mask = UINT64_C(-1) << max(52 - (exp - 1023), 0);
+
+  if (exp < 1023)
+    mask = UINT64_C(0x8000000000000000);
 
+  i0 &= mask;
+  INSERT_WORDS64(x, i0);
   return x;
 }
 weak_alias (__trunc, trunc)
diff --git a/sysdeps/ieee754/flt-32/s_truncf.c b/sysdeps/ieee754/flt-32/s_truncf.c
index 43d35c7..67fdcc8 100644
--- a/sysdeps/ieee754/flt-32/s_truncf.c
+++ b/sysdeps/ieee754/flt-32/s_truncf.c
@@ -1,7 +1,6 @@
 /* Truncate argument to nearest integral value not larger than the argument.
    Copyright (C) 1997-2016 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -21,31 +20,26 @@
 
 #include <math_private.h>
 
+static int32_t
+max (int32_t x, int32_t y)
+{
+  return x > y ? x : y;
+}
 
 float
 __truncf (float x)
 {
-  int32_t i0, j0;
-  int sx;
+  int32_t i0;
 
   GET_FLOAT_WORD (i0, x);
-  sx = i0 & 0x80000000;
-  j0 = ((i0 >> 23) & 0xff) - 0x7f;
-  if (j0 < 23)
-    {
-      if (j0 < 0)
-	/* The magnitude of the number is < 1 so the result is +-0.  */
-	SET_FLOAT_WORD (x, sx);
-      else
-	SET_FLOAT_WORD (x, sx | (i0 & ~(0x007fffff >> j0)));
-    }
-  else
-    {
-      if (j0 == 0x80)
-	/* x is inf or NaN.  */
-	return x + x;
-    }
+  int32_t exp = (i0 >> 23) & 0xff;
+  int32_t mask = ~0u << max(23 - (exp - 127), 0);
+
+  if (exp < 127)
+    mask = 0x80000000;
 
+  i0 &= mask;
+  SET_FLOAT_WORD (x, i0);
   return x;
 }
 weak_alias (__truncf, truncf)