[1/6,v2] libstdc++: Directly implement hexfloat std::from_chars for binary32/64
Commit Message
Series tested on x86_64, i686, ppc64, ppc64le, aarch64, does it look OK for
trunk?
libstdc++-v3/ChangeLog:
* src/c++17/floating_from_chars.cc: Include <bit>.
(ascii_to_hexit, starts_with_ci): Define.
(__floating_from_chars_hex): Define.
(from_chars): Conditionally call __floating_from_chars_hex.
(testsuite/20_util/from_chars/7.cc): New test.
---
libstdc++-v3/src/c++17/floating_from_chars.cc | 376 ++++++++++++++++++
.../testsuite/20_util/from_chars/7.cc | 151 +++++++
2 files changed, 527 insertions(+)
create mode 100644 libstdc++-v3/testsuite/20_util/from_chars/7.cc
Comments
On Sun, 16 Jan 2022 at 18:07, Patrick Palka via Libstdc++ <
libstdc++@gcc.gnu.org> wrote:
> Series tested on x86_64, i686, ppc64, ppc64le, aarch64, does it look OK for
> trunk?
>
OK, thanks.
> libstdc++-v3/ChangeLog:
>
> * src/c++17/floating_from_chars.cc: Include <bit>.
> (ascii_to_hexit, starts_with_ci): Define.
> (__floating_from_chars_hex): Define.
> (from_chars): Conditionally call __floating_from_chars_hex.
> (testsuite/20_util/from_chars/7.cc): New test.
> ---
> libstdc++-v3/src/c++17/floating_from_chars.cc | 376 ++++++++++++++++++
> .../testsuite/20_util/from_chars/7.cc | 151 +++++++
> 2 files changed, 527 insertions(+)
> create mode 100644 libstdc++-v3/testsuite/20_util/from_chars/7.cc
>
> diff --git a/libstdc++-v3/src/c++17/floating_from_chars.cc
> b/libstdc++-v3/src/c++17/floating_from_chars.cc
> index 479e042bb5f..b186da9a955 100644
> --- a/libstdc++-v3/src/c++17/floating_from_chars.cc
> +++ b/libstdc++-v3/src/c++17/floating_from_chars.cc
> @@ -31,6 +31,7 @@
> #define _GLIBCXX_USE_CXX11_ABI 1
>
> #include <charconv>
> +#include <bit>
> #include <string>
> #include <memory_resource>
> #include <cfenv>
> @@ -396,6 +397,371 @@ namespace
> }
> #endif
>
> +#if _GLIBCXX_FLOAT_IS_IEEE_BINARY32 && _GLIBCXX_DOUBLE_IS_IEEE_BINARY64
> + // If the given ASCII character represents a hexit, return that hexit.
> + // Otherwise return -1.
> + int
> + ascii_to_hexit(char ch)
> + {
> + if (ch >= '0' && ch <= '9')
> + return ch - '0';
> + if (ch >= 'a' && ch <= 'f')
> + return ch - 'a' + 10;
> + if (ch >= 'A' && ch <= 'F')
> + return ch - 'A' + 10;
> + return -1;
> + }
> +
> + // Return true iff [FIRST,LAST) begins with PREFIX, ignoring case.
> + bool
> + starts_with_ci(const char* first, const char* last, string_view prefix)
> + {
> + __glibcxx_requires_valid_range(first, last);
> +
> + for (char ch : prefix)
> + {
> + __glibcxx_assert(ch >= 'a' && ch <= 'z');
> + if (first == last || (*first != ch && *first != ch - 32))
> + return false;
> + ++first;
> + }
> +
> + return true;
> + }
> +
> + // An implementation of hexadecimal float parsing for binary32/64.
> + template<typename T>
> + from_chars_result
> + __floating_from_chars_hex(const char* first, const char* last, T& value)
> + {
> + static_assert(is_same_v<T, float> || is_same_v<T, double>);
> +
> + using uint_t = conditional_t<is_same_v<T, float>, uint32_t, uint64_t>;
> + constexpr int mantissa_width = is_same_v<T, float> ? 23 : 52;
> + constexpr int exponent_width = is_same_v<T, float> ? 8 : 11;
> + constexpr int exponent_bias = (1 << (exponent_width - 1)) - 1;
> +
> + __glibcxx_requires_valid_range(first, last);
> + if (first == last)
> + return {first, errc::invalid_argument};
> +
> + // Consume the sign bit.
> + const char* const orig_first = first;
> + bool sign_bit = false;
> + if (*first == '-')
> + {
> + sign_bit = true;
> + ++first;
> + }
> +
> + // Handle "inf", "infinity", "NaN" and variants thereof.
> + if (first != last)
> + if (*first == 'i' || *first == 'I' || *first == 'n' || *first ==
> 'N') [[unlikely]]
> + {
> + if (starts_with_ci(first, last, "inf"sv))
> + {
> + first += 3;
> + if (starts_with_ci(first, last, "inity"sv))
> + first += 5;
> +
> + uint_t result = 0;
> + result |= sign_bit;
> + result <<= exponent_width;
> + result |= (1ull << exponent_width) - 1;
> + result <<= mantissa_width;
> + memcpy(&value, &result, sizeof(result));
> +
> + return {first, errc{}};
> + }
> + else if (starts_with_ci(first, last, "nan"))
> + {
> + first += 3;
> +
> + if (first != last && *first == '(')
> + {
> + // Tentatively consume the '(' as we look for an optional
> + // n-char-sequence followed by a ')'.
> + const char* const fallback_first = first;
> + for (;;)
> + {
> + ++first;
> + if (first == last)
> + {
> + first = fallback_first;
> + break;
> + }
> +
> + char ch = *first;
> + if (ch == ')')
> + {
> + ++first;
> + break;
> + }
> + else if ((ch >= '0' && ch <= '9')
> + || (ch >= 'a' && ch <= 'z')
> + || (ch >= 'A' && ch <= 'Z')
> + || ch == '_')
> + continue;
> + else
> + {
> + first = fallback_first;
> + break;
> + }
> + }
> + }
> +
> + // We make the implementation-defined decision of ignoring
> the
> + // sign bit and the n-char-sequence when assembling the NaN.
> + uint_t result = 0;
> + result <<= exponent_width;
> + result |= (1ull << exponent_width) - 1;
> + result <<= mantissa_width;
> + result |= (1ull << (mantissa_width - 1)) | 1;
> + memcpy(&value, &result, sizeof(result));
> +
> + return {first, errc{}};
> + }
> + }
> +
> + // Consume all insignificant leading zeros in the whole part of the
> + // mantissa.
> + bool seen_digit = false;
> + while (first != last && *first == '0')
> + {
> + seen_digit = true;
> + ++first;
> + }
> +
> + // Now consume the rest of the written mantissa, populating MANTISSA
> with the
> + // first MANTISSA_WIDTH+k significant bits of the written mantissa,
> where 1
> + // <= k <= 4 is the bit width of the leading significant written
> hexit.
> + //
> + // Examples:
> + // After parsing "1.2f3", MANTISSA is 0x12f30000000000
> (bit_width=52+1).
> + // After parsing ".0000f0e", MANTISSA is 0xf0e00000000000
> (bit_width=52+4).
> + // After parsing ".1234567890abcd8", MANTISSA is 0x1234567890abcd
> (bit_width=52+1)
> + // and MIDPOINT_BIT is true and NONZERO_TAIL is false.
> + uint_t mantissa = 0;
> + int mantissa_idx = mantissa_width; // The current bit index into
> MANTISSA
> + // into which we'll write the next
> hexit.
> + int exponent_adjustment = 0; // How much we'd have to adjust the
> written
> + // exponent in order to represent the
> mantissa
> + // in scientific form h.hhhhhhhhhhhhh.
> + bool midpoint_bit = false; // Whether the MANTISSA_WIDTH+k+1
> significant
> + // bit is set in the written mantissa.
> + bool nonzero_tail = false; // Whether some bit thereafter is set in
> the
> + // written mantissa.
> + bool seen_decimal_point = false;
> + for (; first != last; ++first)
> + {
> + char ch = *first;
> + if (ch == '.' && !seen_decimal_point)
> + {
> + seen_decimal_point = true;
> + continue;
> + }
> +
> + int hexit = ascii_to_hexit(ch);
> + if (hexit == -1)
> + break;
> + seen_digit = true;
> +
> + if (!seen_decimal_point && mantissa != 0)
> + exponent_adjustment += 4;
> + else if (seen_decimal_point && mantissa == 0)
> + {
> + exponent_adjustment -= 4;
> + if (hexit == 0x0)
> + continue;
> + }
> +
> + if (mantissa_idx >= 0)
> + mantissa |= uint_t(hexit) << mantissa_idx;
> + else if (mantissa_idx >= -4)
> + {
> + if constexpr (is_same_v<T, float>)
> + {
> + __glibcxx_assert(mantissa_idx == -1);
> + mantissa |= hexit >> 1;
> + midpoint_bit = (hexit & 0b0001) != 0;
> + }
> + else
> + {
> + __glibcxx_assert(mantissa_idx == -4);
> + midpoint_bit = (hexit & 0b1000) != 0;
> + nonzero_tail = (hexit & 0b0111) != 0;
> + }
> + }
> + else
> + nonzero_tail |= (hexit != 0x0);
> +
> + mantissa_idx -= 4;
> + }
> + if (mantissa != 0)
> + __glibcxx_assert(__bit_width(mantissa) >= mantissa_width + 1
> + && __bit_width(mantissa) <= mantissa_width + 4);
> + else
> + __glibcxx_assert(!midpoint_bit && !nonzero_tail);
> +
> + if (!seen_digit)
> + {
> + // If we haven't seen any digit at this point, the parse failed.
> + first = orig_first;
> + return {first, errc::invalid_argument};
> + }
> +
> + // Parse the written exponent.
> + int written_exponent = 0;
> + if (first != last && *first == 'p')
> + {
> + // Tentatively consume the the 'p' and try to parse a decimal
> number.
> + const char* const fallback_first = first;
> + ++first;
> + if (first != last && *first == '+')
> + ++first;
> + from_chars_result fcr = from_chars(first, last, written_exponent,
> 10);
> + if (fcr.ptr == first)
> + // The parse failed, so undo consuming the 'p' and carry on as
> if the
> + // exponent was omitted (i.e. is 0).
> + first = fallback_first;
> + else
> + {
> + first = fcr.ptr;
> + if (mantissa != 0 && fcr.ec == errc::result_out_of_range)
> + // FIXME: Punt on large exponents for now.
> + return {first, errc::result_out_of_range};
> + }
> + }
> + int biased_exponent = written_exponent + exponent_bias;
> + if (exponent_adjustment != 0)
> + // The mantissa wasn't written in scientific form. Adjust the
> exponent
> + // so that we may assume scientific form.
> + //
> + // Examples;
> + // For input "a.bcp5", EXPONENT_ADJUSTMENT would be 0 since this
> + // written mantissa is already in scientific form.
> + // For input "ab.cp5", EXPONENT_ADJUSTMENT would be 4 since the
> + // scientific form is "a.bcp9".
> + // For input 0.0abcp5", EXPONENT_ADJUSTMENT would be -8 since the
> + // scientific form is "a.bcp-3".
> + biased_exponent += exponent_adjustment;
> +
> + // Shifts the mantissa to the right by AMOUNT while updating
> + // BIASED_EXPONENT, MIDPOINT_BIT and NONZERO_TAIL accordingly.
> + auto shift_mantissa = [&] (int amount) {
> + __glibcxx_assert(amount >= 0);
> + if (amount > mantissa_width + 1)
> + {
> + // Shifting the mantissa by an amount greater than its precision.
> + nonzero_tail |= midpoint_bit;
> + nonzero_tail |= mantissa != 0;
> + midpoint_bit = false;
> + mantissa = 0;
> + biased_exponent += amount;
> + }
> + else if (amount != 0)
> + {
> + nonzero_tail |= midpoint_bit;
> + nonzero_tail |= (mantissa & ((1ull << (amount - 1)) - 1)) != 0;
> + midpoint_bit = (mantissa & (1ull << (amount - 1))) != 0;
> + mantissa >>= amount;
> + biased_exponent += amount;
> + }
> + };
> +
> + if (mantissa != 0)
> + {
> + // If the leading hexit is not '1', shift MANTISSA to make it so.
> + // This normalizes input like "4.04p0" into "1.01p2".
> + const int leading_hexit = mantissa >> mantissa_width;
> + const int leading_hexit_width = __bit_width(leading_hexit); //
> FIXME: optimize?
> + __glibcxx_assert(leading_hexit_width >= 1 && leading_hexit_width
> <= 4);
> + shift_mantissa(leading_hexit_width - 1);
> + // After this adjustment, we can assume the leading hexit is a '1'.
> + __glibcxx_assert((mantissa >> mantissa_width) == 0x1);
> + }
> +
> + if (biased_exponent <= 0)
> + {
> + // This number is too small to be represented as a normal number,
> so
> + // try for a subnormal number by shifting the mantissa
> sufficiently.
> + // We need to shift by 1 more than -BIASED_EXPONENT because the
> leading
> + // mantissa bit is omitted in the representation of a normal
> number but
> + // not in a subnormal number.
> + shift_mantissa(-biased_exponent + 1);
> + __glibcxx_assert(!(mantissa & (1ull << mantissa_width)));
> + __glibcxx_assert(biased_exponent == 1);
> + biased_exponent = 0;
> + }
> +
> + // Perform round-to-nearest, tie-to-even rounding.
> + if (midpoint_bit && (nonzero_tail || (mantissa % 2) != 0))
> + {
> + // Rounding away from zero.
> + ++mantissa;
> + midpoint_bit = false;
> + nonzero_tail = false;
> +
> + // Deal with a couple of corner cases after rounding.
> + if (mantissa == (1ull << mantissa_width))
> + {
> + // We rounded the subnormal number 1.fffffffffffff...p-1023
> + // up to the normal number 1p-1022.
> + __glibcxx_assert(biased_exponent == 0);
> + ++biased_exponent;
> + }
> + else if (mantissa & (1ull << (mantissa_width + 1)))
> + {
> + // We rounded the normal number 1.fffffffffffff8pN (with
> maximal
> + // mantissa) up to to 1p(N+1).
> + mantissa >>= 1;
> + ++biased_exponent;
> + }
> + }
> + else
> + {
> + // Rounding towards zero.
> +
> + if (mantissa == 0 && (midpoint_bit || nonzero_tail))
> + {
> + // A nonzero number that rounds to zero is unrepresentable.
> + __glibcxx_assert(biased_exponent == 0);
> + return {first, errc::result_out_of_range};
> + }
> +
> + midpoint_bit = false;
> + nonzero_tail = false;
> + }
> +
> + if (mantissa != 0 && biased_exponent >= (1 << exponent_width) - 1)
> + // The exponent of this number is too large to be representable.
> + return {first, errc::result_out_of_range};
> +
> + uint_t result = 0;
> + if (mantissa == 0)
> + {
> + // Assemble a (possibly signed) zero.
> + if (sign_bit)
> + result |= 1ull << (exponent_width + mantissa_width);
> + }
> + else
> + {
> + // Assemble a nonzero normal or subnormal value.
> + result |= sign_bit;
> + result <<= exponent_width;
> + result |= biased_exponent;
> + result <<= mantissa_width;
> + result |= mantissa & ((1ull << mantissa_width) - 1);
> + // The implicit leading mantissa bit is set iff the number is
> normal.
> + __glibcxx_assert(((mantissa & (1ull << mantissa_width)) != 0)
> + == (biased_exponent != 0));
> + }
> + memcpy(&value, &result, sizeof(result));
> +
> + return {first, errc{}};
> + }
> +#endif
> +
> } // namespace
>
> // FIXME: This should be reimplemented so it doesn't use strtod and
> newlocale.
> @@ -406,6 +772,11 @@ from_chars_result
> from_chars(const char* first, const char* last, float& value,
> chars_format fmt) noexcept
> {
> +#if _GLIBCXX_FLOAT_IS_IEEE_BINARY32 && _GLIBCXX_DOUBLE_IS_IEEE_BINARY64
> + if (fmt == chars_format::hex)
> + return __floating_from_chars_hex(first, last, value);
> +#endif
> +
> errc ec = errc::invalid_argument;
> #if _GLIBCXX_USE_CXX11_ABI
> buffer_resource mr;
> @@ -432,6 +803,11 @@ from_chars_result
> from_chars(const char* first, const char* last, double& value,
> chars_format fmt) noexcept
> {
> +#if _GLIBCXX_FLOAT_IS_IEEE_BINARY32 && _GLIBCXX_DOUBLE_IS_IEEE_BINARY64
> + if (fmt == chars_format::hex)
> + return __floating_from_chars_hex(first, last, value);
> +#endif
> +
> errc ec = errc::invalid_argument;
> #if _GLIBCXX_USE_CXX11_ABI
> buffer_resource mr;
> diff --git a/libstdc++-v3/testsuite/20_util/from_chars/7.cc
> b/libstdc++-v3/testsuite/20_util/from_chars/7.cc
> new file mode 100644
> index 00000000000..090ad7a87bb
> --- /dev/null
> +++ b/libstdc++-v3/testsuite/20_util/from_chars/7.cc
> @@ -0,0 +1,151 @@
> +// Various testcases for binary64 hexfloat std::from_chars.
> +// { dg-do run { target c++17 } }
> +// { dg-require-effective-target ieee_floats }
> +
> +#include <charconv>
> +
> +#include <cfenv>
> +#include <cmath>
> +#include <cstring>
> +#include <cstdio>
> +#include <limits>
> +#include <testsuite_hooks.h>
> +
> +struct testcase {
> + const char* input;
> + size_t correct_idx;
> + std::errc correct_ec;
> + double correct_value;
> +};
> +
> +constexpr testcase testcases[] = {
> + { "1.fffffffffffff8p0", 18, {}, 0x1.fffffffffffff8p0 },
> + { "0.fffffffffffff8p-1022", 22, std::errc{}, 0x0.fffffffffffffep-1022 },
> + { "inf", 3, {}, std::numeric_limits<double>::infinity() },
> + { "inff", 3, {}, std::numeric_limits<double>::infinity() },
> + { "-inf", 4, {}, -std::numeric_limits<double>::infinity() },
> + { "-inff", 4, {}, -std::numeric_limits<double>::infinity() },
> + { "NAN", 3, {}, std::numeric_limits<double>::quiet_NaN() },
> + { "-NAN", 4, {}, std::numeric_limits<double>::quiet_NaN() },
> + { "NAN()", 5, {}, std::numeric_limits<double>::quiet_NaN() },
> + { "-NAN()", 6, {}, std::numeric_limits<double>::quiet_NaN() },
> + { "-NAN(test)", 10, {}, std::numeric_limits<double>::quiet_NaN() },
> + { "-NAN(test", 4, {}, std::numeric_limits<double>::quiet_NaN() },
> + { "-NAN(", 4, {}, std::numeric_limits<double>::quiet_NaN() },
> + { "0.000000000000001p-100000000000000000", 37,
> std::errc::result_out_of_range, 0 },
> + { "-lol", 0, std::errc::invalid_argument, 1 },
> + { " 0", 0, std::errc::invalid_argument, 1 },
> + { "", 0, std::errc::invalid_argument, 0 },
> + { "1", 1, {}, 1 },
> + { "2", 1, {}, 2 },
> + { "3", 1, {}, 3 },
> + { "4", 1, {}, 4 },
> + { "5", 1, {}, 5 },
> + { "6", 1, {}, 6 },
> + { "7", 1, {}, 7 },
> + { "8", 1, {}, 8 },
> + { "9", 1, {}, 9 },
> + { "a", 1, {}, 0xa },
> + { "b", 1, {}, 0xb },
> + { "c", 1, {}, 0xc },
> + { "d", 1, {}, 0xd },
> + { "e", 1, {}, 0xe },
> + { "f", 1, {}, 0xf },
> + { "0.000000000000000000000000000000000000000000001p-1022", 53,
> + std::errc::result_out_of_range, 0 },
> + { "0.00000000000000p-1022", 22, {}, 0 },
> + { "0.00000000000009", 16, {}, 0x0.00000000000009p0 },
> + { "0.0", 3, {}, 0 },
> + { "1p10000000000000000000000", 25, std::errc::result_out_of_range, 0 },
> + { "-0.0", 4, {}, -0.0 },
> + { "0.00000000000000", 16, {}, 0 },
> + { "0.0000000000000p-1022", 21, {}, 0 },
> + { ".", 0, std::errc::invalid_argument, 0 },
> + { "-.", 0, std::errc::invalid_argument, 0 },
> + { "0", 1, {}, 0 },
> + { "00", 2, {}, 0 },
> + { "00.", 3, {}, 0 },
> + { "0.", 2, {}, 0 },
> + { "1.ffffFFFFFFFFFF", 16, {}, 0x2 },
> + { "1.ffffffffffffff", 16, {}, 0x2 },
> + { "1.00000000000029", 16, {}, 0x1.0000000000003p0 },
> + { "0.00000000000008p-1022", 22, std::errc::result_out_of_range, 0 },
> + { "1.fffffffffffffp-1023", 21, {}, 0x1p-1022 },
> + { "1.fffffffffffff8p+1023", 22, std::errc::result_out_of_range, 0 },
> + { "0.ffffffffffffe8p-1022", 22, {}, 0x0.ffffffffffffep-1022 },
> + { "2.11111111111111", 16, {}, 0x1.0888888888889p+1 },
> + { "1.1111111111111", 15, {}, 0x1.1111111111111p0 },
> + { "1.11111111111111", 16, {}, 0x1.1111111111111p0 },
> + { "1.11111111111118", 16, {}, 0x1.1111111111112p0 },
> + { "1.11111111111128", 16, {}, 0x1.1111111111112p0 },
> + { "1.1111111111112801", 18, {}, 0x1.1111111111113p0 },
> + { "1.08888888888888", 16, {}, 0x1.0888888888888p0 },
> + { "1.088888888888888", 17, {}, 0x1.0888888888889p0 },
> + { "2.00000000000029", 16, {}, 0x2.0000000000002p0 },
> + { "0.ffffffffffffep-1022", 21, {}, 0x0.ffffffffffffep-1022 },
> + { "3.ffffffffffffep-1024", 21, {}, 0x1p-1022 },
> + { "1.00000000000008p+0", 19, {}, 1 },
> + { "1p-1023", 7, {}, 0x0.8p-1022 },
> + { "1p-1022", 7, {}, 0x1p-1022 },
> + { "1.1p-1033", 9, {}, 0x1.1p-1033 }, // 0.0022p-1022
> + { "22p-1038", 8, {}, 0x1.1p-1033 },
> + { "5", 1, {}, 0x5 },
> + { "a", 1, {}, 0xa },
> + { "1", 1, {}, 1.0 },
> + { "1p1", 3, {}, 0x1p1 },
> + { "1p-1", 4, {}, 0x1p-1 },
> + { "0", 1, {}, 0.0 },
> + { "A", 1, {}, 0xA },
> + { "-1", 2, {}, -1.0 },
> + { "-0", 2, {}, -0.0 },
> + { "42", 2, {}, 0x42p0 },
> + { "-42", 3, {}, -0x42p0 },
> + { ".1", 2, {}, 0x0.1p0 },
> + { "22p-1000", 8, {}, 0x22p-1000 },
> + { ".0000008", 8, {}, 0x.0000008p0 },
> + { ".0000008p-1022", 14, {}, 0x.0000008p-1022 },
> + { "1p-1074", 7, {}, 0x.0000000000001p-1022 },
> + { "9999999999999", 13, {}, 0x9999999999999p0 },
> + { "1.000000000000a000", 18, {}, 0x1.000000000000ap0 },
> + { "1.000000000000a001", 18, {}, 0x1.000000000000ap0 },
> + { "1.000000000000a800", 18, {}, 0x1.000000000000ap0 },
> + { "1.000000000000a801", 18, {}, 0x1.000000000000bp0 },
> + { "1.000000000000b800", 18, {}, 0x1.000000000000cp0 },
> + { "000000", 6, {}, 0x0 },
> + { "1p", 1, {}, 0x1 },
> + { "0p99999999999999999999", 22, {}, 0 },
> + { "1p99999999999999999999", 22, std::errc::result_out_of_range, 0 },
> + { "0p-99999999999999999999", 23, {}, 0 },
> + { "1p-99999999999999999999", 23, std::errc::result_out_of_range, 0 },
> + { "99999999999999999999999", 23, {}, 0x99999999999999999999999p0 },
> + { "-1.fffffffffffffp-1023", 22, {}, -0x1p-1022 },
> + { "1.337", 5, {}, 0x1.337p0 },
> +};
> +
> +void
> +test01()
> +{
> + for (auto [input,correct_idx,correct_ec,correct_value] : testcases)
> + {
> + double value;
> + auto [ptr,ec] = std::from_chars(input, input+strlen(input),
> + value, std::chars_format::hex);
> + VERIFY( ptr == input + correct_idx );
> + VERIFY( ec == correct_ec );
> + if (ec == std::errc{})
> + {
> + if (std::isnan(value) && std::isnan(value))
> + ;
> + else
> + {
> + VERIFY( value == correct_value );
> + VERIFY( !memcmp(&value, &correct_value, sizeof(value)) );
> + }
> + }
> + }
> +}
> +
> +int main()
> +{
> + test01();
> +}
> --
> 2.35.0.rc1
>
>
@@ -31,6 +31,7 @@
#define _GLIBCXX_USE_CXX11_ABI 1
#include <charconv>
+#include <bit>
#include <string>
#include <memory_resource>
#include <cfenv>
@@ -396,6 +397,371 @@ namespace
}
#endif
+#if _GLIBCXX_FLOAT_IS_IEEE_BINARY32 && _GLIBCXX_DOUBLE_IS_IEEE_BINARY64
+ // If the given ASCII character represents a hexit, return that hexit.
+ // Otherwise return -1.
+ int
+ ascii_to_hexit(char ch)
+ {
+ if (ch >= '0' && ch <= '9')
+ return ch - '0';
+ if (ch >= 'a' && ch <= 'f')
+ return ch - 'a' + 10;
+ if (ch >= 'A' && ch <= 'F')
+ return ch - 'A' + 10;
+ return -1;
+ }
+
+ // Return true iff [FIRST,LAST) begins with PREFIX, ignoring case.
+ bool
+ starts_with_ci(const char* first, const char* last, string_view prefix)
+ {
+ __glibcxx_requires_valid_range(first, last);
+
+ for (char ch : prefix)
+ {
+ __glibcxx_assert(ch >= 'a' && ch <= 'z');
+ if (first == last || (*first != ch && *first != ch - 32))
+ return false;
+ ++first;
+ }
+
+ return true;
+ }
+
+ // An implementation of hexadecimal float parsing for binary32/64.
+ template<typename T>
+ from_chars_result
+ __floating_from_chars_hex(const char* first, const char* last, T& value)
+ {
+ static_assert(is_same_v<T, float> || is_same_v<T, double>);
+
+ using uint_t = conditional_t<is_same_v<T, float>, uint32_t, uint64_t>;
+ constexpr int mantissa_width = is_same_v<T, float> ? 23 : 52;
+ constexpr int exponent_width = is_same_v<T, float> ? 8 : 11;
+ constexpr int exponent_bias = (1 << (exponent_width - 1)) - 1;
+
+ __glibcxx_requires_valid_range(first, last);
+ if (first == last)
+ return {first, errc::invalid_argument};
+
+ // Consume the sign bit.
+ const char* const orig_first = first;
+ bool sign_bit = false;
+ if (*first == '-')
+ {
+ sign_bit = true;
+ ++first;
+ }
+
+ // Handle "inf", "infinity", "NaN" and variants thereof.
+ if (first != last)
+ if (*first == 'i' || *first == 'I' || *first == 'n' || *first == 'N') [[unlikely]]
+ {
+ if (starts_with_ci(first, last, "inf"sv))
+ {
+ first += 3;
+ if (starts_with_ci(first, last, "inity"sv))
+ first += 5;
+
+ uint_t result = 0;
+ result |= sign_bit;
+ result <<= exponent_width;
+ result |= (1ull << exponent_width) - 1;
+ result <<= mantissa_width;
+ memcpy(&value, &result, sizeof(result));
+
+ return {first, errc{}};
+ }
+ else if (starts_with_ci(first, last, "nan"))
+ {
+ first += 3;
+
+ if (first != last && *first == '(')
+ {
+ // Tentatively consume the '(' as we look for an optional
+ // n-char-sequence followed by a ')'.
+ const char* const fallback_first = first;
+ for (;;)
+ {
+ ++first;
+ if (first == last)
+ {
+ first = fallback_first;
+ break;
+ }
+
+ char ch = *first;
+ if (ch == ')')
+ {
+ ++first;
+ break;
+ }
+ else if ((ch >= '0' && ch <= '9')
+ || (ch >= 'a' && ch <= 'z')
+ || (ch >= 'A' && ch <= 'Z')
+ || ch == '_')
+ continue;
+ else
+ {
+ first = fallback_first;
+ break;
+ }
+ }
+ }
+
+ // We make the implementation-defined decision of ignoring the
+ // sign bit and the n-char-sequence when assembling the NaN.
+ uint_t result = 0;
+ result <<= exponent_width;
+ result |= (1ull << exponent_width) - 1;
+ result <<= mantissa_width;
+ result |= (1ull << (mantissa_width - 1)) | 1;
+ memcpy(&value, &result, sizeof(result));
+
+ return {first, errc{}};
+ }
+ }
+
+ // Consume all insignificant leading zeros in the whole part of the
+ // mantissa.
+ bool seen_digit = false;
+ while (first != last && *first == '0')
+ {
+ seen_digit = true;
+ ++first;
+ }
+
+ // Now consume the rest of the written mantissa, populating MANTISSA with the
+ // first MANTISSA_WIDTH+k significant bits of the written mantissa, where 1
+ // <= k <= 4 is the bit width of the leading significant written hexit.
+ //
+ // Examples:
+ // After parsing "1.2f3", MANTISSA is 0x12f30000000000 (bit_width=52+1).
+ // After parsing ".0000f0e", MANTISSA is 0xf0e00000000000 (bit_width=52+4).
+ // After parsing ".1234567890abcd8", MANTISSA is 0x1234567890abcd (bit_width=52+1)
+ // and MIDPOINT_BIT is true and NONZERO_TAIL is false.
+ uint_t mantissa = 0;
+ int mantissa_idx = mantissa_width; // The current bit index into MANTISSA
+ // into which we'll write the next hexit.
+ int exponent_adjustment = 0; // How much we'd have to adjust the written
+ // exponent in order to represent the mantissa
+ // in scientific form h.hhhhhhhhhhhhh.
+ bool midpoint_bit = false; // Whether the MANTISSA_WIDTH+k+1 significant
+ // bit is set in the written mantissa.
+ bool nonzero_tail = false; // Whether some bit thereafter is set in the
+ // written mantissa.
+ bool seen_decimal_point = false;
+ for (; first != last; ++first)
+ {
+ char ch = *first;
+ if (ch == '.' && !seen_decimal_point)
+ {
+ seen_decimal_point = true;
+ continue;
+ }
+
+ int hexit = ascii_to_hexit(ch);
+ if (hexit == -1)
+ break;
+ seen_digit = true;
+
+ if (!seen_decimal_point && mantissa != 0)
+ exponent_adjustment += 4;
+ else if (seen_decimal_point && mantissa == 0)
+ {
+ exponent_adjustment -= 4;
+ if (hexit == 0x0)
+ continue;
+ }
+
+ if (mantissa_idx >= 0)
+ mantissa |= uint_t(hexit) << mantissa_idx;
+ else if (mantissa_idx >= -4)
+ {
+ if constexpr (is_same_v<T, float>)
+ {
+ __glibcxx_assert(mantissa_idx == -1);
+ mantissa |= hexit >> 1;
+ midpoint_bit = (hexit & 0b0001) != 0;
+ }
+ else
+ {
+ __glibcxx_assert(mantissa_idx == -4);
+ midpoint_bit = (hexit & 0b1000) != 0;
+ nonzero_tail = (hexit & 0b0111) != 0;
+ }
+ }
+ else
+ nonzero_tail |= (hexit != 0x0);
+
+ mantissa_idx -= 4;
+ }
+ if (mantissa != 0)
+ __glibcxx_assert(__bit_width(mantissa) >= mantissa_width + 1
+ && __bit_width(mantissa) <= mantissa_width + 4);
+ else
+ __glibcxx_assert(!midpoint_bit && !nonzero_tail);
+
+ if (!seen_digit)
+ {
+ // If we haven't seen any digit at this point, the parse failed.
+ first = orig_first;
+ return {first, errc::invalid_argument};
+ }
+
+ // Parse the written exponent.
+ int written_exponent = 0;
+ if (first != last && *first == 'p')
+ {
+ // Tentatively consume the the 'p' and try to parse a decimal number.
+ const char* const fallback_first = first;
+ ++first;
+ if (first != last && *first == '+')
+ ++first;
+ from_chars_result fcr = from_chars(first, last, written_exponent, 10);
+ if (fcr.ptr == first)
+ // The parse failed, so undo consuming the 'p' and carry on as if the
+ // exponent was omitted (i.e. is 0).
+ first = fallback_first;
+ else
+ {
+ first = fcr.ptr;
+ if (mantissa != 0 && fcr.ec == errc::result_out_of_range)
+ // FIXME: Punt on large exponents for now.
+ return {first, errc::result_out_of_range};
+ }
+ }
+ int biased_exponent = written_exponent + exponent_bias;
+ if (exponent_adjustment != 0)
+ // The mantissa wasn't written in scientific form. Adjust the exponent
+ // so that we may assume scientific form.
+ //
+ // Examples;
+ // For input "a.bcp5", EXPONENT_ADJUSTMENT would be 0 since this
+ // written mantissa is already in scientific form.
+ // For input "ab.cp5", EXPONENT_ADJUSTMENT would be 4 since the
+ // scientific form is "a.bcp9".
+ // For input 0.0abcp5", EXPONENT_ADJUSTMENT would be -8 since the
+ // scientific form is "a.bcp-3".
+ biased_exponent += exponent_adjustment;
+
+ // Shifts the mantissa to the right by AMOUNT while updating
+ // BIASED_EXPONENT, MIDPOINT_BIT and NONZERO_TAIL accordingly.
+ auto shift_mantissa = [&] (int amount) {
+ __glibcxx_assert(amount >= 0);
+ if (amount > mantissa_width + 1)
+ {
+ // Shifting the mantissa by an amount greater than its precision.
+ nonzero_tail |= midpoint_bit;
+ nonzero_tail |= mantissa != 0;
+ midpoint_bit = false;
+ mantissa = 0;
+ biased_exponent += amount;
+ }
+ else if (amount != 0)
+ {
+ nonzero_tail |= midpoint_bit;
+ nonzero_tail |= (mantissa & ((1ull << (amount - 1)) - 1)) != 0;
+ midpoint_bit = (mantissa & (1ull << (amount - 1))) != 0;
+ mantissa >>= amount;
+ biased_exponent += amount;
+ }
+ };
+
+ if (mantissa != 0)
+ {
+ // If the leading hexit is not '1', shift MANTISSA to make it so.
+ // This normalizes input like "4.04p0" into "1.01p2".
+ const int leading_hexit = mantissa >> mantissa_width;
+ const int leading_hexit_width = __bit_width(leading_hexit); // FIXME: optimize?
+ __glibcxx_assert(leading_hexit_width >= 1 && leading_hexit_width <= 4);
+ shift_mantissa(leading_hexit_width - 1);
+ // After this adjustment, we can assume the leading hexit is a '1'.
+ __glibcxx_assert((mantissa >> mantissa_width) == 0x1);
+ }
+
+ if (biased_exponent <= 0)
+ {
+ // This number is too small to be represented as a normal number, so
+ // try for a subnormal number by shifting the mantissa sufficiently.
+ // We need to shift by 1 more than -BIASED_EXPONENT because the leading
+ // mantissa bit is omitted in the representation of a normal number but
+ // not in a subnormal number.
+ shift_mantissa(-biased_exponent + 1);
+ __glibcxx_assert(!(mantissa & (1ull << mantissa_width)));
+ __glibcxx_assert(biased_exponent == 1);
+ biased_exponent = 0;
+ }
+
+ // Perform round-to-nearest, tie-to-even rounding.
+ if (midpoint_bit && (nonzero_tail || (mantissa % 2) != 0))
+ {
+ // Rounding away from zero.
+ ++mantissa;
+ midpoint_bit = false;
+ nonzero_tail = false;
+
+ // Deal with a couple of corner cases after rounding.
+ if (mantissa == (1ull << mantissa_width))
+ {
+ // We rounded the subnormal number 1.fffffffffffff...p-1023
+ // up to the normal number 1p-1022.
+ __glibcxx_assert(biased_exponent == 0);
+ ++biased_exponent;
+ }
+ else if (mantissa & (1ull << (mantissa_width + 1)))
+ {
+ // We rounded the normal number 1.fffffffffffff8pN (with maximal
+ // mantissa) up to to 1p(N+1).
+ mantissa >>= 1;
+ ++biased_exponent;
+ }
+ }
+ else
+ {
+ // Rounding towards zero.
+
+ if (mantissa == 0 && (midpoint_bit || nonzero_tail))
+ {
+ // A nonzero number that rounds to zero is unrepresentable.
+ __glibcxx_assert(biased_exponent == 0);
+ return {first, errc::result_out_of_range};
+ }
+
+ midpoint_bit = false;
+ nonzero_tail = false;
+ }
+
+ if (mantissa != 0 && biased_exponent >= (1 << exponent_width) - 1)
+ // The exponent of this number is too large to be representable.
+ return {first, errc::result_out_of_range};
+
+ uint_t result = 0;
+ if (mantissa == 0)
+ {
+ // Assemble a (possibly signed) zero.
+ if (sign_bit)
+ result |= 1ull << (exponent_width + mantissa_width);
+ }
+ else
+ {
+ // Assemble a nonzero normal or subnormal value.
+ result |= sign_bit;
+ result <<= exponent_width;
+ result |= biased_exponent;
+ result <<= mantissa_width;
+ result |= mantissa & ((1ull << mantissa_width) - 1);
+ // The implicit leading mantissa bit is set iff the number is normal.
+ __glibcxx_assert(((mantissa & (1ull << mantissa_width)) != 0)
+ == (biased_exponent != 0));
+ }
+ memcpy(&value, &result, sizeof(result));
+
+ return {first, errc{}};
+ }
+#endif
+
} // namespace
// FIXME: This should be reimplemented so it doesn't use strtod and newlocale.
@@ -406,6 +772,11 @@ from_chars_result
from_chars(const char* first, const char* last, float& value,
chars_format fmt) noexcept
{
+#if _GLIBCXX_FLOAT_IS_IEEE_BINARY32 && _GLIBCXX_DOUBLE_IS_IEEE_BINARY64
+ if (fmt == chars_format::hex)
+ return __floating_from_chars_hex(first, last, value);
+#endif
+
errc ec = errc::invalid_argument;
#if _GLIBCXX_USE_CXX11_ABI
buffer_resource mr;
@@ -432,6 +803,11 @@ from_chars_result
from_chars(const char* first, const char* last, double& value,
chars_format fmt) noexcept
{
+#if _GLIBCXX_FLOAT_IS_IEEE_BINARY32 && _GLIBCXX_DOUBLE_IS_IEEE_BINARY64
+ if (fmt == chars_format::hex)
+ return __floating_from_chars_hex(first, last, value);
+#endif
+
errc ec = errc::invalid_argument;
#if _GLIBCXX_USE_CXX11_ABI
buffer_resource mr;
new file mode 100644
@@ -0,0 +1,151 @@
+// Various testcases for binary64 hexfloat std::from_chars.
+// { dg-do run { target c++17 } }
+// { dg-require-effective-target ieee_floats }
+
+#include <charconv>
+
+#include <cfenv>
+#include <cmath>
+#include <cstring>
+#include <cstdio>
+#include <limits>
+#include <testsuite_hooks.h>
+
+struct testcase {
+ const char* input;
+ size_t correct_idx;
+ std::errc correct_ec;
+ double correct_value;
+};
+
+constexpr testcase testcases[] = {
+ { "1.fffffffffffff8p0", 18, {}, 0x1.fffffffffffff8p0 },
+ { "0.fffffffffffff8p-1022", 22, std::errc{}, 0x0.fffffffffffffep-1022 },
+ { "inf", 3, {}, std::numeric_limits<double>::infinity() },
+ { "inff", 3, {}, std::numeric_limits<double>::infinity() },
+ { "-inf", 4, {}, -std::numeric_limits<double>::infinity() },
+ { "-inff", 4, {}, -std::numeric_limits<double>::infinity() },
+ { "NAN", 3, {}, std::numeric_limits<double>::quiet_NaN() },
+ { "-NAN", 4, {}, std::numeric_limits<double>::quiet_NaN() },
+ { "NAN()", 5, {}, std::numeric_limits<double>::quiet_NaN() },
+ { "-NAN()", 6, {}, std::numeric_limits<double>::quiet_NaN() },
+ { "-NAN(test)", 10, {}, std::numeric_limits<double>::quiet_NaN() },
+ { "-NAN(test", 4, {}, std::numeric_limits<double>::quiet_NaN() },
+ { "-NAN(", 4, {}, std::numeric_limits<double>::quiet_NaN() },
+ { "0.000000000000001p-100000000000000000", 37, std::errc::result_out_of_range, 0 },
+ { "-lol", 0, std::errc::invalid_argument, 1 },
+ { " 0", 0, std::errc::invalid_argument, 1 },
+ { "", 0, std::errc::invalid_argument, 0 },
+ { "1", 1, {}, 1 },
+ { "2", 1, {}, 2 },
+ { "3", 1, {}, 3 },
+ { "4", 1, {}, 4 },
+ { "5", 1, {}, 5 },
+ { "6", 1, {}, 6 },
+ { "7", 1, {}, 7 },
+ { "8", 1, {}, 8 },
+ { "9", 1, {}, 9 },
+ { "a", 1, {}, 0xa },
+ { "b", 1, {}, 0xb },
+ { "c", 1, {}, 0xc },
+ { "d", 1, {}, 0xd },
+ { "e", 1, {}, 0xe },
+ { "f", 1, {}, 0xf },
+ { "0.000000000000000000000000000000000000000000001p-1022", 53,
+ std::errc::result_out_of_range, 0 },
+ { "0.00000000000000p-1022", 22, {}, 0 },
+ { "0.00000000000009", 16, {}, 0x0.00000000000009p0 },
+ { "0.0", 3, {}, 0 },
+ { "1p10000000000000000000000", 25, std::errc::result_out_of_range, 0 },
+ { "-0.0", 4, {}, -0.0 },
+ { "0.00000000000000", 16, {}, 0 },
+ { "0.0000000000000p-1022", 21, {}, 0 },
+ { ".", 0, std::errc::invalid_argument, 0 },
+ { "-.", 0, std::errc::invalid_argument, 0 },
+ { "0", 1, {}, 0 },
+ { "00", 2, {}, 0 },
+ { "00.", 3, {}, 0 },
+ { "0.", 2, {}, 0 },
+ { "1.ffffFFFFFFFFFF", 16, {}, 0x2 },
+ { "1.ffffffffffffff", 16, {}, 0x2 },
+ { "1.00000000000029", 16, {}, 0x1.0000000000003p0 },
+ { "0.00000000000008p-1022", 22, std::errc::result_out_of_range, 0 },
+ { "1.fffffffffffffp-1023", 21, {}, 0x1p-1022 },
+ { "1.fffffffffffff8p+1023", 22, std::errc::result_out_of_range, 0 },
+ { "0.ffffffffffffe8p-1022", 22, {}, 0x0.ffffffffffffep-1022 },
+ { "2.11111111111111", 16, {}, 0x1.0888888888889p+1 },
+ { "1.1111111111111", 15, {}, 0x1.1111111111111p0 },
+ { "1.11111111111111", 16, {}, 0x1.1111111111111p0 },
+ { "1.11111111111118", 16, {}, 0x1.1111111111112p0 },
+ { "1.11111111111128", 16, {}, 0x1.1111111111112p0 },
+ { "1.1111111111112801", 18, {}, 0x1.1111111111113p0 },
+ { "1.08888888888888", 16, {}, 0x1.0888888888888p0 },
+ { "1.088888888888888", 17, {}, 0x1.0888888888889p0 },
+ { "2.00000000000029", 16, {}, 0x2.0000000000002p0 },
+ { "0.ffffffffffffep-1022", 21, {}, 0x0.ffffffffffffep-1022 },
+ { "3.ffffffffffffep-1024", 21, {}, 0x1p-1022 },
+ { "1.00000000000008p+0", 19, {}, 1 },
+ { "1p-1023", 7, {}, 0x0.8p-1022 },
+ { "1p-1022", 7, {}, 0x1p-1022 },
+ { "1.1p-1033", 9, {}, 0x1.1p-1033 }, // 0.0022p-1022
+ { "22p-1038", 8, {}, 0x1.1p-1033 },
+ { "5", 1, {}, 0x5 },
+ { "a", 1, {}, 0xa },
+ { "1", 1, {}, 1.0 },
+ { "1p1", 3, {}, 0x1p1 },
+ { "1p-1", 4, {}, 0x1p-1 },
+ { "0", 1, {}, 0.0 },
+ { "A", 1, {}, 0xA },
+ { "-1", 2, {}, -1.0 },
+ { "-0", 2, {}, -0.0 },
+ { "42", 2, {}, 0x42p0 },
+ { "-42", 3, {}, -0x42p0 },
+ { ".1", 2, {}, 0x0.1p0 },
+ { "22p-1000", 8, {}, 0x22p-1000 },
+ { ".0000008", 8, {}, 0x.0000008p0 },
+ { ".0000008p-1022", 14, {}, 0x.0000008p-1022 },
+ { "1p-1074", 7, {}, 0x.0000000000001p-1022 },
+ { "9999999999999", 13, {}, 0x9999999999999p0 },
+ { "1.000000000000a000", 18, {}, 0x1.000000000000ap0 },
+ { "1.000000000000a001", 18, {}, 0x1.000000000000ap0 },
+ { "1.000000000000a800", 18, {}, 0x1.000000000000ap0 },
+ { "1.000000000000a801", 18, {}, 0x1.000000000000bp0 },
+ { "1.000000000000b800", 18, {}, 0x1.000000000000cp0 },
+ { "000000", 6, {}, 0x0 },
+ { "1p", 1, {}, 0x1 },
+ { "0p99999999999999999999", 22, {}, 0 },
+ { "1p99999999999999999999", 22, std::errc::result_out_of_range, 0 },
+ { "0p-99999999999999999999", 23, {}, 0 },
+ { "1p-99999999999999999999", 23, std::errc::result_out_of_range, 0 },
+ { "99999999999999999999999", 23, {}, 0x99999999999999999999999p0 },
+ { "-1.fffffffffffffp-1023", 22, {}, -0x1p-1022 },
+ { "1.337", 5, {}, 0x1.337p0 },
+};
+
+void
+test01()
+{
+ for (auto [input,correct_idx,correct_ec,correct_value] : testcases)
+ {
+ double value;
+ auto [ptr,ec] = std::from_chars(input, input+strlen(input),
+ value, std::chars_format::hex);
+ VERIFY( ptr == input + correct_idx );
+ VERIFY( ec == correct_ec );
+ if (ec == std::errc{})
+ {
+ if (std::isnan(value) && std::isnan(value))
+ ;
+ else
+ {
+ VERIFY( value == correct_value );
+ VERIFY( !memcmp(&value, &correct_value, sizeof(value)) );
+ }
+ }
+ }
+}
+
+int main()
+{
+ test01();
+}