@@ -310,6 +310,8 @@ core-math:
sysdeps/ieee754/flt-32/s_log2p1f.c
# src/binary32/sin/sinf.c revision 8ea8ea35
sysdeps/ieee754/flt-32/s_sinf.c
+ # src/binary32/sincos/sincosf.c revision 8ea8ea35
+ sysdeps/ieee754/flt-32/s_sincosf.c
# src/binary32/sinpi/sinpif.c, revision bbfabd99d
sysdeps/ieee754/flt-32/s_sinpif.c
# src/binary32/tan/tanf.c, revision 59d21d7
@@ -9499,6 +9499,7 @@ sincos 1e22
sincos 0x1p1023
sincos 0x1p16383
sincos 0x1p+120
+sincos 0x1p-126
sincos 0x1p+127
sincos 0x1.fffff8p+127
sincos 0x1.fffffep+127
@@ -1197,6 +1197,31 @@ sincos 0x1p+120
= sincos tonearest ibm128 0x1p+120 : 0x6.0b8d19579bf2db5e5f1aa933f4p-4 -0xe.d06685b36c66c4cf35c11f6518p-4 : inexact-ok
= sincos towardzero ibm128 0x1p+120 : 0x6.0b8d19579bf2db5e5f1aa933f2p-4 -0xe.d06685b36c66c4cf35c11f6518p-4 : inexact-ok
= sincos upward ibm128 0x1p+120 : 0x6.0b8d19579bf2db5e5f1aa933f4p-4 -0xe.d06685b36c66c4cf35c11f6518p-4 : inexact-ok
+sincos 0x1p-126
+= sincos downward binary32 0x4p-128 : 0x3.fffff8p-128 0xf.fffffp-4 : inexact-ok underflow-ok errno-erange-ok
+= sincos tonearest binary32 0x4p-128 : 0x4p-128 0x1p+0 : inexact-ok underflow-ok errno-erange-ok
+= sincos towardzero binary32 0x4p-128 : 0x3.fffff8p-128 0xf.fffffp-4 : inexact-ok underflow-ok errno-erange-ok
+= sincos upward binary32 0x4p-128 : 0x4p-128 0x1p+0 : inexact-ok underflow-ok errno-erange-ok
+= sincos downward binary64 0x4p-128 : 0x3.ffffffffffffep-128 0xf.ffffffffffff8p-4 : inexact-ok
+= sincos tonearest binary64 0x4p-128 : 0x4p-128 0x1p+0 : inexact-ok
+= sincos towardzero binary64 0x4p-128 : 0x3.ffffffffffffep-128 0xf.ffffffffffff8p-4 : inexact-ok
+= sincos upward binary64 0x4p-128 : 0x4p-128 0x1p+0 : inexact-ok
+= sincos downward intel96 0x4p-128 : 0x3.fffffffffffffffcp-128 0xf.fffffffffffffffp-4 : inexact-ok
+= sincos tonearest intel96 0x4p-128 : 0x4p-128 0x1p+0 : inexact-ok
+= sincos towardzero intel96 0x4p-128 : 0x3.fffffffffffffffcp-128 0xf.fffffffffffffffp-4 : inexact-ok
+= sincos upward intel96 0x4p-128 : 0x4p-128 0x1p+0 : inexact-ok
+= sincos downward m68k96 0x4p-128 : 0x3.fffffffffffffffcp-128 0xf.fffffffffffffffp-4 : inexact-ok
+= sincos tonearest m68k96 0x4p-128 : 0x4p-128 0x1p+0 : inexact-ok
+= sincos towardzero m68k96 0x4p-128 : 0x3.fffffffffffffffcp-128 0xf.fffffffffffffffp-4 : inexact-ok
+= sincos upward m68k96 0x4p-128 : 0x4p-128 0x1p+0 : inexact-ok
+= sincos downward binary128 0x4p-128 : 0x3.fffffffffffffffffffffffffffep-128 0xf.fffffffffffffffffffffffffff8p-4 : inexact-ok
+= sincos tonearest binary128 0x4p-128 : 0x4p-128 0x1p+0 : inexact-ok
+= sincos towardzero binary128 0x4p-128 : 0x3.fffffffffffffffffffffffffffep-128 0xf.fffffffffffffffffffffffffff8p-4 : inexact-ok
+= sincos upward binary128 0x4p-128 : 0x4p-128 0x1p+0 : inexact-ok
+= sincos downward ibm128 0x4p-128 : 0x3.ffffffffffffffffffffffffffp-128 0xf.fffffffffffffffffffffffffcp-4 : inexact-ok
+= sincos tonearest ibm128 0x4p-128 : 0x4p-128 0x1p+0 : inexact-ok
+= sincos towardzero ibm128 0x4p-128 : 0x3.ffffffffffffffffffffffffffp-128 0xf.fffffffffffffffffffffffffcp-4 : inexact-ok
+= sincos upward ibm128 0x4p-128 : 0x4p-128 0x1p+0 : inexact-ok
sincos 0x1p+127
= sincos downward binary32 0x8p+124 : 0x9.f9631p-4 0xc.82b8ep-4 : inexact-ok
= sincos tonearest binary32 0x8p+124 : 0x9.f9631p-4 0xc.82b8fp-4 : inexact-ok
@@ -275,6 +275,18 @@ float: 0
Function: "sin_upward":
float: 0
+Function: "sincos":
+float: 0
+
+Function: "sincos_downward":
+float: 0
+
+Function: "sincos_towardzero":
+float: 0
+
+Function: "sincos_upward":
+float: 0
+
Function: "sinh":
float: 0
@@ -1,28 +1,35 @@
-/* Compute sine and cosine of argument.
- Copyright (C) 2018-2026 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
+/* Correctly-rounded sincos of binary32 value.
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
+Copyright (c) 2024-2025 Alexei Sibidanov
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
+The original version of this file was copied from the CORE-MATH
+project (file src/binary32/sincos/sincosf.c, revision 8ea8ea35.
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
-#include <errno.h>
-#include <stdint.h>
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+#include <array_length.h>
#include <math.h>
-#include <math-barriers.h>
#include <libm-alias-float.h>
#include "math_config.h"
-#include "s_sincosf.h"
+#include <math_uint128.h>
+#include <s_sincosf_data.h>
#ifndef SECTION
# define SECTION
@@ -34,73 +41,174 @@
# define SINCOSF_FUNC SINCOSF
#endif
-/* Fast sincosf implementation. Worst-case ULP is 0.5607, maximum relative
- error is 0.5303 * 2^-23. A single-step range reduction is used for
- small values. Large inputs have their range reduced using fast integer
- arithmetic. */
-void
-SECTION
-SINCOSF_FUNC (float y, float *sinp, float *cosp)
+static double __attribute__ ((noinline))
+rbig (uint32_t u, int *q)
{
- double x = y;
- double s;
- int n;
- const sincos_t *p = &__sincosf_table[0];
-
- if (abstop12 (y) < abstop12 (pio4))
+ int e = (u >> 23) & 0xff, i;
+ uint64_t m = (u & (~0u >> 9)) | 1 << 23;
+ u128 p0 = u128_mul (u128_from_u64 (m), u128_from_u64 (IPI[0]));
+ u128 p1 = u128_mul (u128_from_u64 (m), u128_from_u64 (IPI[1]));
+ p1 = u128_add (p1, u128_rshift (p0, 64));
+ u128 p2 = u128_mul (u128_from_u64 (m), u128_from_u64 (IPI[2]));
+ p2 = u128_add (p2, u128_rshift (p1, 64));
+ u128 p3 = u128_mul (u128_from_u64 (m), u128_from_u64 (IPI[3]));
+ p3 = u128_add (p3, u128_rshift (p2, 64));
+ uint64_t p3h = u128_high (p3), p3l = u128_low (p3), p2l = u128_low (p2),
+ p1l = u128_low (p1);
+ int64_t a;
+ int k = e - 124, s = k - 23;
+ /* in cr_sinf(), rbig() is called in the case 127+28 <= e < 0xff
+ thus 155 <= e <= 254, which yields 28 <= k <= 127 and 5 <= s <= 104 */
+ if (s < 64)
{
- double x2 = x * x;
+ i = p3h << s | p3l >> (64 - s);
+ a = p3l << s | p2l >> (64 - s);
+ }
+ else if (s == 64)
+ {
+ i = p3l;
+ a = p2l;
+ }
+ else
+ { /* s > 64 */
+ i = p3l << (s - 64) | p2l >> (128 - s);
+ a = p2l << (s - 64) | p1l >> (128 - s);
+ }
+ int sgn = u;
+ sgn >>= 31;
+ int64_t sm = a >> 63;
+ i -= sm;
+ double z = (a ^ sgn) * 0x1p-64;
+ i = (i ^ sgn) - sgn;
+ *q = i;
+ return z;
+}
- if (__glibc_unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
+static inline double
+rltl (float z, int *q)
+{
+ double x = z;
+ double idl = -0x1.b1bbead603d8bp-29 * x, idh = 0x1.45f306ep+2 * x,
+ id = roundeven_finite (idh);
+ *q = asuint64 (0x1.8p52 + id);
+ return (idh - id) + idl;
+}
+
+static inline double
+rltl0 (double x, int *q)
+{
+ double idh = 0x1.45f306dc9c883p+2 * x, id = roundeven_finite (idh);
+ *q = asuint64 (0x1.8p52 + id);
+ return idh - id;
+}
+
+static inline float
+add_sign (float x, float rh, float rl)
+{
+ float sgn = copysignf (1.0f, x);
+ return sgn * rh + sgn * rl;
+}
+
+static void __attribute__ ((noinline))
+as_sincosf_database (float x, float *sout, float *cout)
+{
+ uint32_t t = asuint (x);
+ uint32_t ax = t & (~0u >> 1);
+ for (unsigned i = 0; i < array_length (ST_SINCOSF); i++)
+ if (__glibc_unlikely (ST_SINCOSF[i].uarg == ax))
{
- /* Force underflow for tiny y. */
- if (__glibc_unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
- math_force_eval ((float)x2);
- *sinp = y;
- *cosp = 1.0f;
- return;
+ *sout = add_sign (x, ST_SINCOSF[i].sh, ST_SINCOSF[i].sl);
+ *cout = ST_SINCOSF[i].ch + ST_SINCOSF[i].cl;
+ break;
}
+}
- sincosf_poly (x, x2, p, 0, sinp, cosp);
+static void __attribute__ ((noinline))
+as_sincosf_big (float x, float *sout, float *cout)
+{
+ uint32_t t = asuint (x);
+ uint32_t ax = t << 1;
+ if (__glibc_unlikely (ax >= 0xffu << 24))
+ { // nan or +-inf
+ if (ax << 8)
+ {
+ *sout = x + x;
+ *cout = x + x;
+ return; // nan
+ }
+ *sout = *cout = x - x;
+ __math_invalidf (x + x);
+ return;
}
- else if (abstop12 (y) < abstop12 (120.0f))
- {
- x = reduce_fast (x, p, &n);
+ int ia;
+ double z = rbig (t, &ia);
+ double z2 = z * z, z4 = z2 * z2;
+ double aa = (A[0] + z2 * A[1]) + z4 * (A[2] + z2 * A[3]);
+ double bb = (B[0] + z2 * B[1]) + z4 * (B[2] + z2 * B[3]);
+ bb *= z;
+ double s0 = TB[ia & 31], c0 = TB[(ia + 8u) & 31];
+ double s = s0 + z * (aa * c0 - bb * s0);
+ double c = c0 - z * (aa * s0 + bb * c0);
+ *sout = s;
+ *cout = c;
+ uint64_t tr = asuint64 (c);
+ uint64_t tail = (tr + 6) & (~UINT64_C(0) >> 36);
+ if (__glibc_unlikely (tail <= 12))
+ return as_sincosf_database (x, sout, cout);
+}
- /* Setup the signs for sin and cos. */
- s = p->sign[n & 3];
-
- if (n & 2)
- p = &__sincosf_table[1];
-
- sincosf_poly (x * s, x * x, p, n, sinp, cosp);
- }
- else if (__glibc_likely (abstop12 (y) < abstop12 (INFINITY)))
- {
- uint32_t xi = asuint (y);
- int sign = xi >> 31;
-
- x = reduce_large (xi, &n);
-
- /* Setup signs for sin and cos - include original sign. */
- s = p->sign[(n + sign) & 3];
-
- if ((n + sign) & 2)
- p = &__sincosf_table[1];
-
- sincosf_poly (x * s, x * x, p, n, sinp, cosp);
+void SECTION
+SINCOSF_FUNC (float x, float *sout, float *cout)
+{
+ uint32_t ax = asuint (x) << 1;
+ int ia;
+ double z0 = x, z;
+ if (__glibc_likely (ax < 0x822d97c8u))
+ { // |x| < 0x1.2d97c8p+3
+ if (__glibc_unlikely (ax < 0x73000000u))
+ { // |x| < 0x1p-12
+ if (__glibc_unlikely (ax < 0x66000000u))
+ { // |x| < 0x1p-25
+ if (__glibc_unlikely (ax == 0u))
+ {
+ *sout = x;
+ *cout = 1.0f;
+ }
+ else
+ {
+ *sout = fmaf (-x, fabsf (x), x);
+ *cout = 1.0f - 0x1p-25f;
+ }
+ }
+ else
+ {
+ *sout = (-0x1.555556p-3f * x) * (x * x) + x;
+ *cout = (-0x1p-1f * x) * x + 1.0f;
+ }
+ return;
+ }
+ if (__glibc_unlikely (ax == 0x812d97c8u))
+ return as_sincosf_database (x, sout, cout);
+ z = rltl0 (z0, &ia);
}
else
{
- /* Return NaN if Inf or NaN for both sin and cos. */
- *sinp = *cosp = y - y;
-#if WANT_ERRNO
- /* Needed to set errno for +-Inf, the add is a hack to work
- around a gcc register allocation issue: just passing y
- affects code generation in the fast path (PR86901). */
- __math_invalidf (y + y);
-#endif
+ if (__glibc_unlikely (ax > 0x99000000u))
+ return as_sincosf_big (x, sout, cout);
+ if (__glibc_unlikely (ax == 0x8c333330u))
+ return as_sincosf_database (x, sout, cout);
+ z = rltl (z0, &ia);
}
+ double z2 = z * z, z4 = z2 * z2;
+ double aa = (A[0] + z2 * A[1]) + z4 * (A[2] + z2 * A[3]);
+ double bb = (B[0] + z2 * B[1]) + z4 * (B[2] + z2 * B[3]);
+ aa *= z;
+ bb *= z2;
+ double s0 = TB[ia & 31], c0 = TB[(ia + 8) & 31];
+ double rs = s0 + (aa * c0 - bb * s0);
+ double rc = c0 - (aa * s0 + bb * c0);
+ *sout = rs;
+ *cout = rc;
}
#ifndef SINCOSF
@@ -46,4 +46,17 @@ extern const sincosf_database_t __sinf_st[4] attribute_hidden;
extern const sincosf_database_t __cosf_st[5] attribute_hidden;
#define ST_COSF __cosf_st
+typedef struct
+{
+ union
+ {
+ float arg;
+ uint32_t uarg;
+ };
+ float sh, sl;
+ float ch, cl;
+} sincosf2_database_t;
+extern const sincosf2_database_t __sincosf_st[9] attribute_hidden;
+#define ST_SINCOSF __sincosf_st
+
#endif
@@ -88,3 +88,16 @@ const sincosf_database_t __cosf_st[] =
{ { 0x1.3170fp+63 }, 0x1.fe2976p-1, 0x1p-26 },
{ { 0x1.2b9622p+67 }, 0x1.f0285ep-1, -0x1p-26 },
};
+
+const sincosf2_database_t __sincosf_st[] =
+{
+ { { 0x1.33333p+13 }, -0x1.63f4bap-2, -0x1p-27, -0x1.e01216p-1, -0x1p-26 },
+ { { 0x1.75b8a2p-1 }, 0x1.55688ap-1, -0x1p-26, 0x1.7d8e1ep-1, 0x1p-26 },
+ { { 0x1.4f0654p+0 }, 0x1.ee836cp-1, -0x1p-26, 0x1.09558p-2, -0x1p-27 },
+ { { 0x1.2d97c8p+3 }, -0x1.99bc5ap-26, -0x1p-51, -0x1p+0, 0x1p-25 },
+ { { 0x1.2d97c8p+2 }, -0x1p+0, 0x1p-25, 0x1.99bc5cp-27, -0x1p-52 },
+ { { 0x1.4555p+51 }, -0x1.b0ea44p-1, 0x1p-26, 0x1.115d7ep-1, -0x1p-26 },
+ { { 0x1.48a858p+54 }, 0x1.beac8cp-1, 0x1p-26, 0x1.f48148p-2, 0x1p-27 },
+ { { 0x1.3170fp+63 }, 0x1.5ac1eep-4, -0x1p-30, 0x1.fe2976p-1, 0x1p-26 },
+ { { 0x1.2b9622p+67 }, -0x1.f983c2p-3, 0x1p-28, 0x1.f0285ep-1, -0x1p-26 },
+};