//===--- SwiftDtoa.cpp ---------------------------------------------*- C++ -*-===// // // This source file is part of the Swift.org open source project // // Copyright (c) 2018-2020 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // //===---------------------------------------------------------------------===// // // Note: This source file is used in different projects where it gets // compiled variously as ".c" or ".cpp". Please keep the code clean // portable C so others can share your improvements. // /// For binary16, this uses a simple approach that is normally /// implemented with variable-length arithmetic. However, due to /// the limited range of binary16, this can be implemented simply /// with only 32-bit integer arithmetic. /// /// For other formats, SwiftDtoa uses a modified form of the Grisu2 /// algorithm from Florian Loitsch; "Printing Floating-Point Numbers /// Quickly and Accurately with Integers", 2010. /// https://doi.org/10.1145/1806596.1806623 /// /// Some of the Grisu2 modifications were suggested by the "Errol /// paper": Marc Andrysco, Ranjit Jhala, Sorin Lerner; "Printing /// Floating-Point Numbers: A Faster, Always Correct Method", 2016. /// https://doi.org/10.1145/2837614.2837654 /// In particular, the Errol paper explored the impact of higher-precision /// fixed-width arithmetic on Grisu2 and showed a way to rapidly test /// the correctness of such algorithms. /// /// A few further improvements were inspired by the Ryu algorithm /// from Ulf Anders; "Ryū: fast float-to-string conversion", 2018. /// https://doi.org/10.1145/3296979.3192369 /// /// In summary, this implementation is: /// /// * Fast. It uses only fixed-width integer arithmetic and has /// constant memory requirements. For double-precision values on /// 64-bit processors, it is competitive with Ryu. For double-precision /// values on 32-bit processors, and higher-precision values on all /// processors, it is considerably faster. /// /// * Always Accurate. Converting the decimal form back to binary /// will always yield exactly the same value. For the IEEE 754 /// formats, the round-trip will produce exactly the same bit /// pattern in memory. /// /// * Always Short. This always selects an accurate result with the /// minimum number of significant digits. /// /// * Always Close. Among all accurate, short results, this always /// chooses the result that is closest to the exact floating-point /// value. (In case of an exact tie, it rounds the last digit even.) /// /// * Portable. The code is written in portable C99. The core /// implementations utilize only fixed-size integer arithmetic. /// 128-bit integer support is utilized if present but is not /// necessary. There are thin wrappers that accept platform-native /// floating point types and delegate to the portable core /// functions. /// // ---------------------------------------------------------------------------- #include #include #include #include #include #include #include #include "swift/Runtime/SwiftDtoa.h" #if defined(__SIZEOF_INT128__) // We get a significant speed boost if we can use the __uint128_t // type that's present in GCC and Clang on 64-bit architectures. In // particular, we can do 128-bit arithmetic directly and can // represent 256-bit integers as collections of 64-bit elements. #define HAVE_UINT128_T 1 #else // On 32-bit, we use slower code that manipulates 128-bit // and 256-bit integers as collections of 32-bit elements. #define HAVE_UINT128_T 0 #endif // // Predefine various arithmetic helpers. Most implementations and extensive // comments are at the bottom of this file. // #if SWIFT_DTOA_BINARY32_SUPPORT || SWIFT_DTOA_BINARY64_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT // The power-of-10 tables do not directly store the associated binary // exponent. That's because the binary exponent is a simple linear // function of the decimal power (and vice versa), so it's just as // fast (and uses much less memory) to compute it: // The binary exponent corresponding to a particular power of 10. // This matches the power-of-10 tables across the full range of binary128. #define binaryExponentFor10ToThe(p) ((int)(((((int64_t)(p)) * 55732705) >> 24) + 1)) // A decimal exponent that approximates a particular binary power. #define decimalExponentFor2ToThe(e) ((int)(((int64_t)e * 20201781) >> 26)) #endif // // Helper functions used only by the single-precision binary32 formatter // #if SWIFT_DTOA_BINARY32_SUPPORT static uint64_t multiply64x32RoundingDown(uint64_t lhs, uint32_t rhs) { static const uint64_t mask32 = UINT32_MAX; uint64_t t = ((lhs & mask32) * rhs) >> 32; return t + (lhs >> 32) * rhs; } static uint64_t multiply64x32RoundingUp(uint64_t lhs, uint32_t rhs) { static const uint64_t mask32 = UINT32_MAX; uint64_t t = (((lhs & mask32) * rhs) + mask32) >> 32; return t + (lhs >> 32) * rhs; } static void intervalContainingPowerOf10_Binary32(int p, uint64_t *lower, uint64_t *upper, int *exponent); #endif // // Helpers used by binary32, binary64, float80, and binary128. // #if SWIFT_DTOA_BINARY32_SUPPORT || SWIFT_DTOA_BINARY64_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT #if HAVE_UINT128_T typedef __uint128_t swift_uint128_t; #define initialize128WithHighLow64(dest, high64, low64) ((dest) = ((__uint128_t)(high64) << 64) | (low64)) #define shiftLeft128(u128, shift) (*(u128) <<= shift) #else typedef struct { uint32_t low, b, c, high; } swift_uint128_t; #define initialize128WithHighLow64(dest, high64, low64) \ ((dest).low = (uint32_t)(low64), \ (dest).b = (uint32_t)((low64) >> 32), \ (dest).c = (uint32_t)(high64), \ (dest).high = (uint32_t)((high64) >> 32)) static void shiftLeft128(swift_uint128_t *, int shift); #endif inline static int finishFormatting(char *, size_t, char *, char *, int, int); #endif // // Helper functions needed by the binary64 formatter. // #if SWIFT_DTOA_BINARY64_SUPPORT #if HAVE_UINT128_T #define isLessThan128x128(lhs, rhs) ((lhs) < (rhs)) #define subtract128x128(lhs, rhs) (*(lhs) -= (rhs)) #define multiply128xu32(lhs, rhs) (*(lhs) *= (rhs)) #define initialize128WithHigh64(dest, value) ((dest) = (__uint128_t)(value) << 64) #define extractHigh64From128(arg) ((uint64_t)((arg) >> 64)) #define is128bitZero(arg) ((arg) == 0) static int extractIntegerPart128(__uint128_t *fixed128, int integerBits) { const int fractionBits = 128 - integerBits; int integerPart = (int)(*fixed128 >> fractionBits); const swift_uint128_t fixedPointMask = (((__uint128_t)1 << fractionBits) - 1); *fixed128 &= fixedPointMask; return integerPart; } #define shiftRightRoundingDown128(val, shift) ((val) >> (shift)) #define shiftRightRoundingUp128(val, shift) (((val) + (((uint64_t)1 << (shift)) - 1)) >> (shift)) #else static int isLessThan128x128(swift_uint128_t lhs, swift_uint128_t rhs); static void subtract128x128(swift_uint128_t *lhs, swift_uint128_t rhs); static void multiply128xu32(swift_uint128_t *lhs, uint32_t rhs); #define initialize128WithHigh64(dest, value) \ ((dest).low = (dest).b = 0, \ (dest).c = (uint32_t)(value), \ (dest).high = (uint32_t)((value) >> 32)) #define extractHigh64From128(arg) (((uint64_t)(arg).high << 32)|((arg).c)) #define is128bitZero(dest) \ (((dest).low | (dest).b | (dest).c | (dest).high) == 0) // Treat a uint128_t as a fixed-point value with `integerBits` bits in // the integer portion. Return the integer portion and zero it out. static int extractIntegerPart128(swift_uint128_t *fixed128, int integerBits) { const int highFractionBits = 32 - integerBits; int integerPart = (int)(fixed128->high >> highFractionBits); fixed128->high &= ((uint32_t)1 << highFractionBits) - 1; return integerPart; } static swift_uint128_t shiftRightRoundingDown128(swift_uint128_t lhs, int shift); static swift_uint128_t shiftRightRoundingUp128(swift_uint128_t lhs, int shift); #endif static swift_uint128_t multiply128x64RoundingDown(swift_uint128_t lhs, uint64_t rhs); static swift_uint128_t multiply128x64RoundingUp(swift_uint128_t lhs, uint64_t rhs); static void intervalContainingPowerOf10_Binary64(int p, swift_uint128_t *lower, swift_uint128_t *upper, int *exponent); #endif // // Helper functions used by the 256-bit backend needed for // float80 and binary128 // #if SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT #if HAVE_UINT128_T // A 256-bit unsigned integer type stored as 3 64-bit words typedef struct {uint64_t low, midlow, midhigh, high;} swift_uint256_t; #define initialize256WithHighMidLow64(dest, high64, midhigh64, midlow64, low64) \ ((dest).low = (low64), \ (dest).midlow = (midlow64), \ (dest).midhigh = (midhigh64), \ (dest).high = (high64)) #define is256bitZero(dest) \ (((dest).low | (dest).midlow | (dest).midhigh | (dest).high) == 0) static int extractIntegerPart256(swift_uint256_t *fixed256, int integerBits) { int integerPart = (int)(fixed256->high >> (64 - integerBits)); const uint64_t fixedPointMask = (((uint64_t)1 << (64 - integerBits)) - 1); fixed256->high &= fixedPointMask; return integerPart; } #else // A 256-bit unsigned integer type stored as 8 32-bit words typedef struct { uint32_t elt[8]; } swift_uint256_t; // [0]=low, [7]=high #define initialize256WithHighMidLow64(dest, high64, midhigh64, midlow64, low64) \ ((dest).elt[0] = (uint64_t)(low64), \ (dest).elt[1] = (uint64_t)(low64) >> 32, \ (dest).elt[2] = (uint64_t)(midlow64), \ (dest).elt[3] = (uint64_t)(midlow64) >> 32, \ (dest).elt[4] = (uint64_t)(midhigh64), \ (dest).elt[5] = (uint64_t)(midhigh64) >> 32, \ (dest).elt[6] = (uint64_t)(high64), \ (dest).elt[7] = (uint64_t)(high64) >> 32) #define is256bitZero(dest) \ (((dest).elt[0] | (dest).elt[1] | (dest).elt[2] | (dest).elt[3] \ | (dest).elt[4] | (dest).elt[5] | (dest).elt[6] | (dest).elt[7]) == 0) static int extractIntegerPart256(swift_uint256_t *fixed256, int integerBits) { int integerPart = (int)(fixed256->elt[7] >> (32 - integerBits)); const uint64_t fixedPointMask = (((uint64_t)1 << (32 - integerBits)) - 1); fixed256->elt[7] &= fixedPointMask; return integerPart; } #endif static void multiply256xu32(swift_uint256_t *lhs, uint32_t rhs); // Multiply a 256-bit fraction times a 128-bit fraction, with controlled rounding static void multiply256x128RoundingDown(swift_uint256_t *lhs, swift_uint128_t rhs); static void multiply256x128RoundingUp(swift_uint256_t *lhs, swift_uint128_t rhs); static void subtract256x256(swift_uint256_t *lhs, swift_uint256_t rhs); static int isLessThan256x256(swift_uint256_t lhs, swift_uint256_t rhs); static void shiftRightRoundingDown256(swift_uint256_t *lhs, int shift); static void shiftRightRoundingUp256(swift_uint256_t *lhs, int shift); static void intervalContainingPowerOf10_Binary128(int p, swift_uint256_t *lower, swift_uint256_t *upper, int *exponent); static size_t _swift_dtoa_256bit_backend(char *, size_t, swift_uint128_t, swift_uint128_t, int, int, int, int, bool); #endif // A table of all two-digit decimal numbers #if SWIFT_DTOA_BINARY16_SUPPORT || SWIFT_DTOA_BINARY32_SUPPORT || SWIFT_DTOA_BINARY64_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT static const char asciiDigitTable[] = "0001020304050607080910111213141516171819" "2021222324252627282930313233343536373839" "4041424344454647484950515253545556575859" "6061626364656667686970717273747576777879" "8081828384858687888990919293949596979899"; #endif // ================================================================ // // Helpers to output formatted results for infinity, zero, and NaN // // ================================================================ static size_t infinity(char *dest, size_t len, int negative) { if (negative) { if (len >= 5) { memcpy(dest, "-inf", 5); return 4; } } else { if (len >= 4) { memcpy(dest, "inf", 4); return 3; } } if (len > 0) { dest[0] = '\0'; } return 0; } static size_t zero(char *dest, size_t len, int negative) { if (negative) { if (len >= 5) { memcpy(dest, "-0.0", 5); return 4; } } else { if (len >= 4) { memcpy(dest, "0.0", 4); return 3; } } if (len > 0) { dest[0] = '\0'; } return 0; } static size_t nan_details(char *dest, size_t len, int negative, int quiet, uint64_t payloadHigh, uint64_t payloadLow) { const char *sign = negative ? "-" : ""; const char *signalingChar = quiet ? "" : "s"; char buff[64]; if (payloadLow != 0) { if (payloadHigh != 0) { snprintf(buff, sizeof(buff), "%s%snan(0x%" PRIx64 "%016" PRIx64 ")", sign, signalingChar, payloadHigh, payloadLow); } else { snprintf(buff, sizeof(buff), "%s%snan(0x%" PRIx64 ")", sign, signalingChar, payloadLow); } } else { snprintf(buff, sizeof(buff), "%s%snan", sign, signalingChar); } size_t nanlen = strlen(buff); if (nanlen < len) { memcpy(dest, buff, nanlen + 1); return nanlen; } if (len > 0) { dest[0] = '\0'; } return 0; } // ================================================================ // // BINARY16 // // ================================================================ #if SWIFT_DTOA_BINARY16_SUPPORT #if !SWIFT_DTOA_PASS_FLOAT16_AS_FLOAT // Format a C `_Float16` size_t swift_dtoa_optimal_binary16(_Float16 d, char *dest, size_t length) { return swift_dtoa_optimal_binary16_p(&d, dest, length); } #endif // Format an IEEE 754 binary16 half-precision floating point value // into an optimal text form. // This does not assume that the C environment has any support // for binary16. // Because binary16 has such a limited range, a simple exact // implementation can fit in 32 bit arithmetic. Since we can easily // verify every single binary16 value, this can be experimentally // optimized. size_t swift_dtoa_optimal_binary16_p(const void *f, char *dest, size_t length) { static const int significandBitCount = 10; static const uint32_t significandMask = ((uint32_t)1 << significandBitCount) - 1; static const int exponentBitCount = 5; static const int exponentMask = (1 << exponentBitCount) - 1; // See comments in swift_dtoa_optimal_binary64_p static const int64_t exponentBias = (1 << (exponentBitCount - 1)) - 2; // 14 if (length < 1) { return 0; } // Step 0: Deconstruct IEEE 754 binary16 format uint16_t raw = *(const uint16_t *)f; int exponentBitPattern = (raw >> significandBitCount) & exponentMask; uint16_t significandBitPattern = raw & significandMask; int negative = raw >> 15; // Step 1: Handle the various input cases: int binaryExponent; uint16_t significand; int isBoundary = significandBitPattern == 0; if (exponentBitPattern == exponentMask) { // NaN or Infinity if (isBoundary) { // Infinity return infinity(dest, length, negative); } else { const int quiet = (significandBitPattern >> (significandBitCount - 1)) & 1; uint16_t payload = significandBitPattern & ((1U << (significandBitCount - 2)) - 1); return nan_details(dest, length, negative, quiet, 0, payload); } } else if (exponentBitPattern == 0) { if (isBoundary) { // Zero return zero(dest, length, negative); } else { // Subnormal binaryExponent = 1 - exponentBias; significand = significandBitPattern; } } else { // normal binaryExponent = exponentBitPattern - exponentBias; uint16_t hiddenBit = (uint32_t)1 << (uint32_t)significandBitCount; uint16_t fullSignificand = significandBitPattern + hiddenBit; significand = fullSignificand; } // Step 2: Determine the exact target interval significand <<= 2; static const uint16_t halfUlp = 2; uint32_t upperMidpointExact = significand + halfUlp; static const uint16_t quarterUlp = 1; uint32_t lowerMidpointExact = significand - (isBoundary ? quarterUlp : halfUlp); // Shortest output from here is "1.0" plus null byte if (length < 4) { dest[0] = '\0'; return 0; } char *p = dest; if (negative) { *p++ = '-'; } if (binaryExponent < -13 || (binaryExponent == -13 && significand < 0x1a38)) { // Format values < 10^-5 as exponential form // We know value < 10^-5, so we can do the first scaling step unconditionally int decimalExponent = -5; uint32_t u = (upperMidpointExact << (28 - 13 + binaryExponent)) * 100000; uint32_t l = (lowerMidpointExact << (28 - 13 + binaryExponent)) * 100000; uint32_t t = (significand << (28 - 13 + binaryExponent)) * 100000; const uint32_t mask = (1 << 28) - 1; if (t < ((1 << 28) / 10)) { u *= 100; l *= 100; t *= 100; decimalExponent -= 2; } if (t < (1 << 28)) { u *= 10; l *= 10; t *= 10; decimalExponent -= 1; } const int uDigit = u >> 28, lDigit = l >> 28; if (uDigit == lDigit) { // There's more than one digit, emit a '.' and the rest if (p > dest + length - 6) { dest[0] = '\0'; return 0; } *p++ = (t >> 28) + '0'; *p++ = '.'; while (true) { u = (u & mask) * 10; l = (l & mask) * 10; const int uDigit = u >> 28, lDigit = l >> 28; if (uDigit != lDigit) { t = (t & mask) * 10; break; } t *= 10; *p++ = uDigit + '0'; } } t = (t + (1 << 27)) >> 28; // Add 1/2 to round if (p > dest + length - 6) { // Exactly 6 bytes written below dest[0] = '\0'; return 0; } *p++ = t + '0'; memcpy(p, "e-", 2); p += 2; memcpy(p, asciiDigitTable + (-decimalExponent) * 2, 2); p += 2; *p = '\0'; return p - dest; } // Format the value using decimal format // There's an integer portion of no more than 5 digits int intportion; if (binaryExponent < 13) { intportion = significand >> (13 - binaryExponent); significand -= intportion << (13 - binaryExponent); } else { intportion = significand << (binaryExponent - 13); significand -= intportion >> (binaryExponent - 13); } if (intportion < 10) { if (p > dest + length - 3) { dest[0] = '\0'; return 0; } *p++ = intportion + '0'; // One digit is the most common case } else if (intportion < 1000) { // 2 or 3 digits if (p > dest + length - 4) { dest[0] = '\0'; return 0; } if (intportion > 99) { *p++ = intportion / 100 + '0'; } memcpy(p, asciiDigitTable + (intportion % 100) * 2, 2); p += 2; } else { // 4 or 5 digits if (p > dest + length - 6) { dest[0] = '\0'; return 0; } if (intportion > 9999) { *p++ = intportion / 10000 + '0'; intportion %= 10000; } memcpy(p, asciiDigitTable + (intportion / 100) * 2, 2); memcpy(p + 2, asciiDigitTable + (intportion % 100) * 2, 2); p += 4; } if (p > dest + length - 3) { dest[0] = '\0'; return 0; } *p++ = '.'; if (significand == 0) { // No fraction, so we're done. *p++ = '0'; *p = '\0'; return p - dest; } // Format the fractional part uint32_t u = upperMidpointExact << (28 - 13 + binaryExponent); uint32_t l = lowerMidpointExact << (28 - 13 + binaryExponent); uint32_t t = significand << (28 - 13 + binaryExponent); const uint32_t mask = (1 << 28) - 1; unsigned uDigit, lDigit; while (true) { u = (u & mask) * 10; l = (l & mask) * 10; uDigit = u >> 28; lDigit = l >> 28; if (uDigit != lDigit) { t = (t & mask) * 10; break; } t *= 10; if (p > dest + length - 3) { dest[0] = '\0'; return 0; } *p++ = uDigit + '0'; } t += 1 << 27; // Add 1/2 if ((t & mask) == 0) { // Was exactly 1/2 (now zero) t = (t >> 28) & ~1; // Round even } else { t >>= 28; } if (t <= lDigit && l > 0) t += 1; *p++ = t + '0'; *p = '\0'; return p - dest; } #endif // ================================================================ // // BINARY32 // // ================================================================ #if SWIFT_DTOA_BINARY32_SUPPORT #if FLOAT_IS_BINARY32 // Format a C `float` size_t swift_dtoa_optimal_float(float d, char *dest, size_t length) { return swift_dtoa_optimal_binary32_p(&d, dest, length); } #endif // Format an IEEE 754 single-precision binary32 format floating-point number. size_t swift_dtoa_optimal_binary32_p(const void *f, char *dest, size_t length) { static const int significandBitCount = FLT_MANT_DIG - 1; static const uint32_t significandMask = ((uint32_t)1 << significandBitCount) - 1; static const int exponentBitCount = 8; static const int exponentMask = (1 << exponentBitCount) - 1; // See comments in swift_dtoa_optimal_binary64_p static const int64_t exponentBias = (1 << (exponentBitCount - 1)) - 2; // 125 // Step 0: Deconstruct the target number // Note: this strongly assumes IEEE 754 binary32 format uint32_t raw = *(const uint32_t *)f; int exponentBitPattern = (raw >> significandBitCount) & exponentMask; uint32_t significandBitPattern = raw & significandMask; int negative = raw >> 31; // Step 1: Handle the various input cases: int binaryExponent; uint32_t significand; if (length < 1) { return 0; } else if (exponentBitPattern == exponentMask) { // NaN or Infinity if (significandBitPattern == 0) { // Infinity return infinity(dest, length, negative); } else { // NaN const int quiet = (significandBitPattern >> (significandBitCount - 1)) & 1; uint32_t payload = raw & ((1UL << (significandBitCount - 2)) - 1); return nan_details(dest, length, negative, quiet != 0, 0, payload); } } else if (exponentBitPattern == 0) { if (significandBitPattern == 0) { // Zero return zero(dest, length, negative); } else { // Subnormal binaryExponent = 1 - exponentBias; significand = significandBitPattern << (32 - significandBitCount - 1); } } else { // normal binaryExponent = exponentBitPattern - exponentBias; uint32_t hiddenBit = (uint32_t)1 << (uint32_t)significandBitCount; uint32_t fullSignificand = significandBitPattern + hiddenBit; significand = fullSignificand << (32 - significandBitCount - 1); } // Step 2: Determine the exact unscaled target interval static const uint32_t halfUlp = (uint32_t)1 << (32 - significandBitCount - 2); uint64_t upperMidpointExact = (uint64_t)(significand + halfUlp); int isBoundary = significandBitPattern == 0; static const uint32_t quarterUlp = halfUlp >> 1; uint64_t lowerMidpointExact = (uint64_t)(significand - (isBoundary ? quarterUlp : halfUlp)); // Step 3: Estimate the base 10 exponent int base10Exponent = decimalExponentFor2ToThe(binaryExponent); // Step 4: Compute a power-of-10 scale factor uint64_t powerOfTenRoundedDown = 0; uint64_t powerOfTenRoundedUp = 0; int powerOfTenExponent = 0; static const int bulkFirstDigits = 1; intervalContainingPowerOf10_Binary32(-base10Exponent + bulkFirstDigits - 1, &powerOfTenRoundedDown, &powerOfTenRoundedUp, &powerOfTenExponent); const int extraBits = binaryExponent + powerOfTenExponent; // Step 5: Scale the interval (with rounding) static const int integerBits = 8; const int shift = integerBits - extraBits; const int roundUpBias = (1 << shift) - 1; static const int fractionBits = 64 - integerBits; static const uint64_t fractionMask = ((uint64_t)1 << fractionBits) - (uint64_t)1; uint64_t u, l; if (significandBitPattern & 1) { // Narrow the interval (odd significand) uint64_t u1 = multiply64x32RoundingDown(powerOfTenRoundedDown, upperMidpointExact); u = u1 >> shift; // Rounding down uint64_t l1 = multiply64x32RoundingUp(powerOfTenRoundedUp, lowerMidpointExact); l = (l1 + roundUpBias) >> shift; // Rounding Up } else { // Widen the interval (even significand) uint64_t u1 = multiply64x32RoundingUp(powerOfTenRoundedUp, upperMidpointExact); u = (u1 + roundUpBias) >> shift; // Rounding Up uint64_t l1 = multiply64x32RoundingDown(powerOfTenRoundedDown, lowerMidpointExact); l = l1 >> shift; // Rounding down } // Step 6: Align first digit, adjust exponent // In particular, this prunes leading zeros from subnormals uint64_t t = u; uint64_t delta = u - l; while (t < (uint64_t)1 << fractionBits) { base10Exponent -= 1; t *= 10; delta *= 10; } // Step 7: Generate decimal digits into the destination buffer char *p = dest; if (p > dest + length - 3) { dest[0] = '\0'; return 0; } if (negative) { *p++ = '-'; } char * const firstOutputChar = p; // Format first digit as a 2-digit value to get a leading '0' memcpy(p, asciiDigitTable + (t >> fractionBits) * 2, 2); t &= fractionMask; p += 2; // Emit two digits at a time while ((delta * 10) < ((t * 10) & fractionMask)) { if (p > dest + length - 3) { dest[0] = '\0'; return 0; } delta *= 100; t *= 100; memcpy(p, asciiDigitTable + (t >> fractionBits) * 2, 2); t &= fractionMask; p += 2; } // Emit any final digit if (delta < t) { if (p > dest + length - 2) { dest[0] = '\0'; return 0; } delta *= 10; t *= 10; *p++ = '0' + (t >> fractionBits); t &= fractionMask; } // Adjust the final digit to be closer to the original value if (delta > t + ((uint64_t)1 << fractionBits)) { uint64_t skew; if (isBoundary) { skew = delta - delta / 3 - t; } else { skew = delta / 2 - t; } uint64_t one = (uint64_t)(1) << (64 - integerBits); uint64_t lastAccurateBit = 1ULL << 24; uint64_t fractionMask = (one - 1) & ~(lastAccurateBit - 1); uint64_t oneHalf = one >> 1; if (((skew + (lastAccurateBit >> 1)) & fractionMask) == oneHalf) { // If the skew is exactly integer + 1/2, round the last // digit even after adjustment int adjust = (int)(skew >> (64 - integerBits)); p[-1] -= adjust; p[-1] &= ~1; } else { // Else round to nearest... int adjust = (int)((skew + oneHalf) >> (64 - integerBits)); p[-1] -= adjust; } } int forceExponential = binaryExponent > 25 || (binaryExponent == 25 && !isBoundary); return finishFormatting(dest, length, p, firstOutputChar, forceExponential, base10Exponent); } #endif // ================================================================ // // BINARY64 // // ================================================================ #if SWIFT_DTOA_BINARY64_SUPPORT #if LONG_DOUBLE_IS_BINARY64 size_t swift_dtoa_optimal_long_double(long double d, char *dest, size_t length) { return swift_dtoa_optimal_binary64_p(&d, dest, length); } #endif #if DOUBLE_IS_BINARY64 size_t swift_dtoa_optimal_double(double d, char *dest, size_t length) { return swift_dtoa_optimal_binary64_p(&d, dest, length); } #endif // Format an IEEE 754 double-precision binary64 format floating-point number. // The calling convention here assumes that C `double` is this format, // but otherwise, this does not utilize any floating-point arithmetic // or library routines. size_t swift_dtoa_optimal_binary64_p(const void *d, char *dest, size_t length) { // Bits in raw significand (not including hidden bit, if present) static const int significandBitCount = DBL_MANT_DIG - 1; static const uint64_t significandMask = ((uint64_t)1 << significandBitCount) - 1; // Bits in raw exponent static const int exponentBitCount = 11; static const int exponentMask = (1 << exponentBitCount) - 1; // Note: IEEE 754 conventionally uses 1023 as the exponent // bias. That's because they treat the significand as a // fixed-point number with one bit (the hidden bit) integer // portion. The logic here reconstructs the significand as a // pure fraction, so we need to accommodate that when // reconstructing the binary exponent. static const int64_t exponentBias = (1 << (exponentBitCount - 1)) - 2; // 1022 // Step 0: Deconstruct an IEEE 754 binary64 double-precision value uint64_t raw = *(const uint64_t *)d; int exponentBitPattern = (raw >> significandBitCount) & exponentMask; uint64_t significandBitPattern = raw & significandMask; int negative = raw >> 63; // Step 1: Handle the various input cases: if (length < 1) { return 0; } int binaryExponent; int isBoundary = significandBitPattern == 0; uint64_t significand; if (exponentBitPattern == exponentMask) { // NaN or Infinity if (isBoundary) { // Infinity return infinity(dest, length, negative); } else { const int quiet = (raw >> (significandBitCount - 1)) & 1; uint64_t payload = raw & ((1ull << (significandBitCount - 2)) - 1); return nan_details(dest, length, negative, quiet, 0, payload); } } else if (exponentBitPattern == 0) { if (isBoundary) { // Zero return zero(dest, length, negative); } else { // subnormal binaryExponent = 1 - exponentBias; significand = significandBitPattern << (64 - significandBitCount - 1); } } else { // normal binaryExponent = exponentBitPattern - exponentBias; uint64_t hiddenBit = (uint64_t)1 << significandBitCount; uint64_t fullSignificand = significandBitPattern + hiddenBit; significand = fullSignificand << (64 - significandBitCount - 1); } // Step 2: Determine the exact unscaled target interval // Grisu-style algorithms construct the shortest decimal digit // sequence within a specific interval. To build the appropriate // interval, we start by computing the midpoints between this // floating-point value and the adjacent ones. Note that this // step is an exact computation. uint64_t halfUlp = (uint64_t)1 << (64 - significandBitCount - 2); uint64_t quarterUlp = halfUlp >> 1; uint64_t upperMidpointExact = significand + halfUlp; uint64_t lowerMidpointExact = significand - (isBoundary ? quarterUlp : halfUlp); int isOddSignificand = (significandBitPattern & 1) != 0; // Step 3: Estimate the base 10 exponent // Grisu algorithms are based in part on a simple technique for // generating a base-10 form for a binary floating-point number. // Start with a binary floating-point number `f * 2^e` and then // estimate the decimal exponent `p`. You can then rewrite your // original number as: // // ``` // f * 2^e * 10^-p * 10^p // ``` // // The last term is part of our output, and a good estimate for // `p` will ensure that `2^e * 10^-p` is close to 1. Multiplying // the first three terms then yields a fraction suitable for // producing the decimal digits. Here we use a very fast estimate // of `p` that is never off by more than 1; we'll have // opportunities later to correct any error. int base10Exponent = decimalExponentFor2ToThe(binaryExponent); // Step 4: Compute a power-of-10 scale factor // Compute `10^-p` to 128-bit precision. We generate // both over- and under-estimates to ensure we can exactly // bound the later use of these values. swift_uint128_t powerOfTenRoundedDown; swift_uint128_t powerOfTenRoundedUp; int powerOfTenExponent = 0; static const int bulkFirstDigits = 7; static const int bulkFirstDigitFactor = 1000000; // 10^(bulkFirstDigits - 1) // Note the extra factor of 10^bulkFirstDigits -- that will give // us a headstart on digit generation later on. (In contrast, Ryu // uses an extra factor of 10^17 here to get all the digits up // front, but then has to back out any extra digits. Doing that // with a 17-digit value requires 64-bit division, which is the // root cause of Ryu's poor performance on 32-bit processors. We // also might have to back out extra digits if 7 is too many, but // will only need 32-bit division in that case.) intervalContainingPowerOf10_Binary64(-base10Exponent + bulkFirstDigits - 1, &powerOfTenRoundedDown, &powerOfTenRoundedUp, &powerOfTenExponent); const int extraBits = binaryExponent + powerOfTenExponent; // Step 5: Scale the interval (with rounding) // As mentioned above, the final digit generation works // with an interval, so we actually apply the scaling // to the upper and lower midpoint values separately. // As part of the scaling here, we'll switch from a pure // fraction with zero bit integer portion and 128-bit fraction // to a fixed-point form with 32 bits in the integer portion. static const int integerBits = 32; // We scale the interval in one of two different ways, // depending on whether the significand is even or odd... swift_uint128_t u, l; if (isOddSignificand) { // Case A: Narrow the interval (odd significand) // Loitsch' original Grisu2 always rounds so as to narrow the // interval. Since our digit generation will select a value // within the scaled interval, narrowing the interval // guarantees that we will find a digit sequence that converts // back to the original value. // This ensures accuracy but, as explained in Loitsch' paper, // this carries a risk that there will be a shorter digit // sequence outside of our narrowed interval that we will // miss. This risk obviously gets lower with increased // precision, but it wasn't until the Errol paper that anyone // had a good way to test whether a particular implementation // had sufficient precision. That paper shows a way to enumerate // the worst-case numbers; those numbers that are extremely close // to the mid-points between adjacent floating-point values. // These are the values that might sit just outside of the // narrowed interval. By testing these values, we can verify // the correctness of our implementation. // Multiply out the upper midpoint, rounding down... swift_uint128_t u1 = multiply128x64RoundingDown(powerOfTenRoundedDown, upperMidpointExact); // Account for residual binary exponent and adjust // to the fixed-point format u = shiftRightRoundingDown128(u1, integerBits - extraBits); // Conversely for the lower midpoint... swift_uint128_t l1 = multiply128x64RoundingUp(powerOfTenRoundedUp, lowerMidpointExact); l = shiftRightRoundingUp128(l1, integerBits - extraBits); } else { // Case B: Widen the interval (even significand) // As explained in Errol Theorem 6, in certain cases there is // a short decimal representation at the exact boundary of the // scaled interval. When such a number is converted back to // binary, it will get rounded to the adjacent even // significand. // So when the significand is even, we round so as to widen // the interval in order to ensure that the exact midpoints // are considered. Of couse, this ensures that we find a // short result but carries a risk of selecting a result // outside of the exact scaled interval (which would be // inaccurate). // The same testing approach described above (based on results // in the Errol paper) also applies // to this case. swift_uint128_t u1 = multiply128x64RoundingUp(powerOfTenRoundedUp, upperMidpointExact); u = shiftRightRoundingUp128(u1, integerBits - extraBits); swift_uint128_t l1 = multiply128x64RoundingDown(powerOfTenRoundedDown, lowerMidpointExact); l = shiftRightRoundingDown128(l1, integerBits - extraBits); } // Step 6: Align first digit, adjust exponent // Calculations above used an estimate for the power-of-ten scale. // Here, we compensate for any error in that estimate by testing // whether we have the expected number of digits in the integer // portion and correcting as necessary. This also serves to // prune leading zeros from subnormals. // Except for subnormals, this loop should never run more than once. // For subnormals, this might run as many as 16 + bulkFirstDigits // times. #if HAVE_UINT128_T while (u < ((__uint128_t)bulkFirstDigitFactor << (128 - integerBits))) #else while (u.high < ((uint32_t)bulkFirstDigitFactor << (32 - integerBits))) #endif { base10Exponent -= 1; multiply128xu32(&l, 10); multiply128xu32(&u, 10); } // Step 7: Produce decimal digits // One standard approach generates digits for the scaled upper and // lower boundaries and stops when at the first digit that // differs. For example, note that 0.1234 is the shortest decimal // between u = 0.123456 and l = 0.123345. // Grisu optimizes this by generating digits for the upper bound // (multiplying by 10 to isolate each digit) while simultaneously // scaling the interval width `delta`. As we remove each digit // from the upper bound, the remainder is the difference between // the base-10 value generated so far and the true upper bound. // When that remainder is less than the scaled width of the // interval, we know the current digits specify a value within the // target interval. // The logic below actually blends three different digit-generation // strategies: // * The first digits are already in the integer portion of the // fixed-point value, thanks to the `bulkFirstDigits` factor above. // We can just break those down and write them out. // * If we generated too many digits, we use a Ryu-inspired technique // to backtrack. // * If we generated too few digits (the usual case), we use an // optimized form of the Grisu2 method to produce the remaining // values. // Generate digits for `t` with interval width `delta = u - l` swift_uint128_t t = u; swift_uint128_t delta = u; subtract128x128(&delta, l); char *p = dest; if (negative) { if (p >= dest + length) { dest[0] = '\0'; return 0; } *p++ = '-'; } char * const firstOutputChar = p; // The `bulkFirstDigits` adjustment above already set up the first 7 digits // Format as 8 digits (with a leading zero that we'll exploit later on). uint32_t d12345678 = extractIntegerPart128(&t, integerBits); if (!isLessThan128x128(delta, t)) { // Oops! We have too many digits. Back out the extra ones to // get the right answer. This is similar to Ryu, but since // we've only produced seven digits, we only need 32-bit // arithmetic here. A few notes: // * Our target hardware always supports 32-bit hardware division, // so this should be reasonably fast. // * For small integers (like "2"), Ryu would have to back out 16 // digits; we only have to back out 6. // * Very few double-precision values actually need fewer than 7 // digits. So this is rarely used except in workloads that // specifically use double for small integers. This is more // common for binary32, of course. // TODO: Add benchmarking for "small integers" -1000...1000 to // verify that this does not unduly penalize those values. // Why this is critical for performance: In order to use the // 8-digits-at-a-time optimization below, we need at least 30 // bits in the integer part of our fixed-point format above. If // we only use bulkDigits = 1, that leaves only 128 - 30 = 98 // bit accuracy for our scaling step, which isn't enough // (binary64 needs ~110 bits for correctness). So we have to // use a large bulkDigits value to make full use of the 128-bit // scaling above, which forces us to have some form of logic to // handle the case of too many digits. The alternatives are to // use >128 bit values (slower) or do some complex finessing of // bit counts by working with powers of 5 instead of 10. #if HAVE_UINT128_T uint64_t uHigh = u >> 64; uint64_t lHigh = l >> 64; if (0 != (uint64_t)l) { lHigh += 1; } #else uint64_t uHigh = ((uint64_t)u.high << 32) + u.c; uint64_t lHigh = ((uint64_t)l.high << 32) + l.c; if (0 != (l.b | l.low)) { lHigh += 1; } #endif uint64_t tHigh; if (isBoundary) { tHigh = (uHigh + lHigh * 2) / 3; } else { tHigh = (uHigh + lHigh) / 2; } uint32_t u0 = uHigh >> (64 - integerBits); uint32_t l0 = lHigh >> (64 - integerBits); if ((lHigh & ((1ULL << (64 - integerBits)) - 1)) != 0) { l0 += 1; } uint32_t t0 = tHigh >> (64 - integerBits); int t0digits = 8; uint32_t u1 = u0 / 10; uint32_t l1 = (l0 + 9) / 10; int trailingZeros = is128bitZero(t); int droppedDigit = ((tHigh * 10) >> (64 - integerBits)) % 10; while (u1 >= l1 && u1 != 0) { u0 = u1; l0 = l1; trailingZeros &= droppedDigit == 0; droppedDigit = t0 % 10; t0 /= 10; t0digits--; u1 = u0 / 10; l1 = (l0 + 9) / 10; } // Correct the final digit if (droppedDigit > 5 || (droppedDigit == 5 && !trailingZeros)) { t0 += 1; } else if (droppedDigit == 5 && trailingZeros) { t0 += 1; t0 &= ~1; } // t0 has t0digits digits. Write them out if (p > dest + length - t0digits - 1) { // Make sure we have space dest[0] = '\0'; return 0; } int i = t0digits; while (i > 1) { // Write out 2 digits at a time back-to-front i -= 2; memcpy(p + i, asciiDigitTable + (t0 % 100) * 2, 2); t0 /= 100; } if (i > 0) { // Handle an odd number of digits p[0] = t0 + '0'; } p += t0digits; // Move the pointer past the digits we just wrote } else { // // Our initial scaling did not produce too many digits. // The `d12345678` value holds the first 7 digits (plus // a leading zero that will be useful later). We write // those out and then incrementally generate as many // more digits as necessary. The remainder of this // algorithm is basically just Grisu2. // if (p > dest + length - 9) { dest[0] = '\0'; return 0; } // Write out the 7 digits we got earlier + leading zero int d1234 = d12345678 / 10000; int d5678 = d12345678 % 10000; int d78 = d5678 % 100; int d56 = d5678 / 100; memcpy(p + 6, asciiDigitTable + d78 * 2, 2); memcpy(p + 4, asciiDigitTable + d56 * 2, 2); int d34 = d1234 % 100; int d12 = d1234 / 100; memcpy(p + 2, asciiDigitTable + d34 * 2, 2); memcpy(p, asciiDigitTable + d12 * 2, 2); p += 8; // Seven digits wasn't enough, so let's get some more. // Most binary64 values need >= 15 digits total. We already have seven, // so try grabbing the next 8 digits all at once. // (This is suboptimal for binary32, but the code savings // from sharing this implementation are worth it.) static const uint32_t bulkDigitFactor = 100000000; // 10^(15-bulkFirstDigits) swift_uint128_t d0 = delta; multiply128xu32(&d0, bulkDigitFactor); swift_uint128_t t0 = t; multiply128xu32(&t0, bulkDigitFactor); int bulkDigits = extractIntegerPart128(&t0, integerBits); // 9 digits if (isLessThan128x128(d0, t0)) { if (p > dest + length - 9) { dest[0] = '\0'; return 0; } // Next 8 digits are good; add them to the output int d1234 = bulkDigits / 10000; int d5678 = bulkDigits % 10000; int d78 = d5678 % 100; int d56 = d5678 / 100; memcpy(p + 6, asciiDigitTable + d78 * 2, 2); memcpy(p + 4, asciiDigitTable + d56 * 2, 2); int d34 = d1234 % 100; int d12 = d1234 / 100; memcpy(p + 2, asciiDigitTable + d34 * 2, 2); memcpy(p, asciiDigitTable + d12 * 2, 2); p += 8; t = t0; delta = d0; } // Finish up by generating and writing one digit at a time. while (isLessThan128x128(delta, t)) { if (p > dest + length - 2) { dest[0] = '\0'; return 0; } multiply128xu32(&delta, 10); multiply128xu32(&t, 10); *p++ = '0' + extractIntegerPart128(&t, integerBits); } // Adjust the final digit to be closer to the original value. This accounts // for the fact that sometimes there is more than one shortest digit // sequence. // For example, consider how the above would work if you had the // value 0.1234 and computed u = 0.1257, l = 0.1211. The above // digit generation works with `u`, so produces 0.125. But the // values 0.122, 0.123, and 0.124 are just as short and 0.123 is // therefore the best choice, since it's closest to the original // value. // We know delta and t are both less than 10.0 here, so we can // shed some excess integer bits to simplify the following: const int adjustIntegerBits = 4; // Integer bits for "adjust" phase shiftLeft128(&delta, integerBits - adjustIntegerBits); shiftLeft128(&t, integerBits - adjustIntegerBits); // Note: We've already consumed most of our available precision, // so it's okay to just work in 64 bits for this... uint64_t deltaHigh64 = extractHigh64From128(delta); uint64_t tHigh64 = extractHigh64From128(t); // If `delta < t + 1.0`, then the interval is narrower than // one decimal digit, so there is no other option. if (deltaHigh64 >= tHigh64 + ((uint64_t)1 << (64 - adjustIntegerBits))) { uint64_t skew; if (isBoundary) { // If we're at the boundary where the exponent shifts, // then the original value is 1/3 of the way from // the bottom of the interval ... skew = deltaHigh64 - deltaHigh64 / 3 - tHigh64; } else { // ... otherwise it's exactly in the middle. skew = deltaHigh64 / 2 - tHigh64; } // The `skew` above is the difference between our // computed digits and the original exact value. // Use that to offset the final digit: uint64_t one = (uint64_t)(1) << (64 - adjustIntegerBits); uint64_t fractionMask = one - 1; uint64_t oneHalf = one >> 1; if ((skew & fractionMask) == oneHalf) { int adjust = (int)(skew >> (64 - adjustIntegerBits)); // If the skew is exactly integer + 1/2, round the // last digit even after adjustment p[-1] -= adjust; p[-1] &= ~1; } else { // Else round to nearest... int adjust = (int)((skew + oneHalf) >> (64 - adjustIntegerBits)); p[-1] -= adjust; } } } // Step 8: Shuffle digits into the final textual form int forceExponential = binaryExponent > 54 || (binaryExponent == 54 && !isBoundary); return finishFormatting(dest, length, p, firstOutputChar, forceExponential, base10Exponent); } #endif // ================================================================ // // FLOAT80 // // ================================================================ #if SWIFT_DTOA_FLOAT80_SUPPORT #if LONG_DOUBLE_IS_FLOAT80 size_t swift_dtoa_optimal_long_double(long double d, char *dest, size_t length) { return swift_dtoa_optimal_float80_p(&d, dest, length); } #endif // Format an Intel x87 80-bit extended precision floating-point format // This does not rely on the C environment for floating-point arithmetic // or library support of any kind. size_t swift_dtoa_optimal_float80_p(const void *d, char *dest, size_t length) { static const int exponentBitCount = 15; static const int exponentMask = (1 << exponentBitCount) - 1; // See comments in swift_dtoa_optimal_binary64_p to understand // why we use 16,382 instead of 16,383 here. static const int64_t exponentBias = (1 << (exponentBitCount - 1)) - 2; // 16,382 // Step 0: Deconstruct the target number // Note: this strongly assumes Intel 80-bit extended format in LSB // byte order const uint64_t *raw_p = (const uint64_t *)d; int exponentBitPattern = raw_p[1] & exponentMask; int negative = (raw_p[1] >> 15) & 1; uint64_t significandBitPattern = raw_p[0]; // Step 1: Handle the various input cases: int64_t binaryExponent; uint64_t significand; int isBoundary = (significandBitPattern & 0x7fffffffffffffff) == 0; if (length < 1) { return 0; } else if (exponentBitPattern == exponentMask) { // NaN or Infinity // Following 80387 semantics as documented in Wikipedia.org "Extended Precision" // Also see Intel's "Floating Point Reference Sheet" // https://software.intel.com/content/dam/develop/external/us/en/documents/floating-point-reference-sheet.pdf int selector = significandBitPattern >> 62; // Top 2 bits uint64_t payload = significandBitPattern & (((uint64_t)1 << 62) - 1); // bottom 62 bits switch (selector) { case 0: // ∞ or snan on 287, invalid on 387 case 1: // Pseudo-NaN: snan on 287, invalid on 387 break; case 2: if (payload == 0) { // snan on 287, ∞ on 387 return infinity(dest, length, negative); } else { // snan on 287 and 387 return nan_details(dest, length, negative, 0 /* quiet */, 0, payload); } break; case 3: // Zero payload and sign bit set is "indefinite" (treated as qNaN here), // Otherwise qNan on 387, sNaN on 287 return nan_details(dest, length, negative, 1 /* quiet */, 0, payload); } // Handle "invalid" patterns as plain "nan" return nan_details(dest, length, 0 /* negative */, 1 /* quiet */, 0, payload); } else if (exponentBitPattern == 0) { if (significandBitPattern == 0) { // Zero return zero(dest, length, negative); } else { // subnormal binaryExponent = 1 - exponentBias; significand = significandBitPattern; } } else if (significandBitPattern >> 63) { // Normal binaryExponent = exponentBitPattern - exponentBias; significand = significandBitPattern; } else { // Invalid pattern rejected by 80387 and later. // Handle "invalid" patterns as plain "nan" return nan_details(dest, length, 0 /* negative */, 1 /* quiet */, 0, 0); } // Step 2: Determine the exact unscaled target interval uint64_t halfUlp = (uint64_t)1 << 63; uint64_t quarterUlp = halfUlp >> 1; uint64_t threeQuarterUlp = halfUlp + quarterUlp; swift_uint128_t upperMidpointExact, lowerMidpointExact; initialize128WithHighLow64(upperMidpointExact, significand, halfUlp); // Subtract 1/4 or 1/2 ULP by first subtracting 1 full ULP, then adding some back initialize128WithHighLow64(lowerMidpointExact, significand - 1, isBoundary ? threeQuarterUlp : halfUlp); return _swift_dtoa_256bit_backend ( dest, length, upperMidpointExact, lowerMidpointExact, negative, isBoundary, (significandBitPattern & 1) != 0, binaryExponent, binaryExponent > 65 || (binaryExponent == 65 && !isBoundary) // forceExponential ); } #endif // ================================================================ // // BINARY128 // // ================================================================ #if SWIFT_DTOA_BINARY128_SUPPORT #if LONG_DOUBLE_IS_BINARY128 size_t swift_dtoa_optimal_long_double(long double d, char *dest, size_t length) { return swift_dtoa_optimal_binary128_p(&d, dest, length); } #endif // Format an IEEE 754 binary128 quad-precision floating-point number. // This does not rely on the C environment for floating-point arithmetic // or library support of any kind. size_t swift_dtoa_optimal_binary128_p(const void *d, char *dest, size_t length) { static const int exponentBitCount = 15; static const int exponentMask = (1 << exponentBitCount) - 1; // See comments in swift_dtoa_optimal_binary64_p to understand // why we use 16,382 instead of 16,383 here. static const int64_t exponentBias = (1 << (exponentBitCount - 1)) - 2; // 16,382 // Step 0: Deconstruct the target number in IEEE 754 binary128 LSB format const uint64_t *raw_p = (const uint64_t *)d; int exponentBitPattern = (raw_p[1] >> 48) & exponentMask; int negative = (raw_p[1] >> 63) & 1; uint64_t significandHigh = raw_p[1] & 0xffffffffffffULL; uint64_t significandLow = raw_p[0]; // Step 1: Handle the various input cases: int64_t binaryExponent; int isBoundary = (significandLow == 0) && (significandHigh == 0); if (length < 1) { return 0; } else if (exponentBitPattern == exponentMask) { // NaN or Infinity if (isBoundary) { // Infinity return infinity(dest, length, negative); } else { // NaN int signaling = (significandHigh >> 47) & 1; uint64_t payloadHigh = significandHigh & 0x3fffffffffffULL; uint64_t payloadLow = significandLow; return nan_details(dest, length, negative, signaling == 0, payloadHigh, payloadLow); } } else if (exponentBitPattern == 0) { if (isBoundary) { // Zero return zero(dest, length, negative); } else { // subnormal binaryExponent = 1 - exponentBias; } } else { // Normal binaryExponent = exponentBitPattern - exponentBias; significandHigh |= (1ULL << 48); } // Align significand to 0.113 fractional form significandHigh <<= 15; significandHigh |= significandLow >> (64 - 15); significandLow <<= 15; // Step 2: Determine the exact unscaled target interval uint64_t halfUlp = (uint64_t)1 << 14; uint64_t quarterUlp = halfUlp >> 1; swift_uint128_t upperMidpointExact, lowerMidpointExact; initialize128WithHighLow64(upperMidpointExact, significandHigh, significandLow + halfUlp); // Subtract 1/4 or 1/2 ULP if (significandLow == 0) { initialize128WithHighLow64(lowerMidpointExact, significandHigh - 1, significandLow - (isBoundary ? quarterUlp : halfUlp)); } else { initialize128WithHighLow64(lowerMidpointExact, significandHigh, significandLow - (isBoundary ? quarterUlp : halfUlp)); } return _swift_dtoa_256bit_backend ( dest, length, upperMidpointExact, lowerMidpointExact, negative, isBoundary, (significandLow & 0x8000) != 0, binaryExponent, binaryExponent > 114 || (binaryExponent == 114 && !isBoundary) // forceExponential ); } #endif // ================================================================ // // FLOAT80/BINARY128 common backend // // This uses 256-bit fixed-width arithmetic to efficiently compute the // optimal form for a decomposed float80 or binary128 value. It is // less heavily commented than the 128-bit version above; see that // implementation for detailed explanation of the logic here. // // This sacrifices some performance for float80, which can be done // more efficiently with 192-bit fixed-width arithmetic. But the code // size savings from sharing this logic between float80 and binary128 // are substantial, and the resulting float80 performance is still much // better than most competing implementations. // // Also in the interest of code size savings, this eschews some of the // optimizations used by the 128-bit backend above. Those // optimizations are simple to reintroduce if you're interested in // further performance improvements. // // If you are interested in extreme code size, you can also use this // backend for binary32 and binary64, eliminating the separate 128-bit // implementation. That variation offers surprisingly reasonable // performance overall. // // ================================================================ #if SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT static size_t _swift_dtoa_256bit_backend ( char *dest, size_t length, swift_uint128_t upperMidpointExact, swift_uint128_t lowerMidpointExact, int negative, int isBoundary, int isOddSignificand, int binaryExponent, bool forceExponential ) { // Step 3: Estimate the base 10 exponent int base10Exponent = decimalExponentFor2ToThe(binaryExponent); // Step 4: Compute a power-of-10 scale factor swift_uint256_t powerOfTenRoundedDown; swift_uint256_t powerOfTenRoundedUp; int powerOfTenExponent = 0; intervalContainingPowerOf10_Binary128(-base10Exponent, &powerOfTenRoundedDown, &powerOfTenRoundedUp, &powerOfTenExponent); const int extraBits = binaryExponent + powerOfTenExponent; // Step 5: Scale the interval (with rounding) static const int integerBits = 14; // Enough for 4 decimal digits #if HAVE_UINT128_T static const int highFractionBits = 64 - integerBits; #else static const int highFractionBits = 32 - integerBits; #endif swift_uint256_t u, l; if (isOddSignificand) { // Narrow the interval (odd significand) u = powerOfTenRoundedDown; multiply256x128RoundingDown(&u, upperMidpointExact); shiftRightRoundingDown256(&u, integerBits - extraBits); l = powerOfTenRoundedUp; multiply256x128RoundingUp(&l, lowerMidpointExact); shiftRightRoundingUp256(&l, integerBits - extraBits); } else { // Widen the interval (even significand) u = powerOfTenRoundedUp; multiply256x128RoundingUp(&u, upperMidpointExact); shiftRightRoundingUp256(&u, integerBits - extraBits); l = powerOfTenRoundedDown; multiply256x128RoundingDown(&l, lowerMidpointExact); shiftRightRoundingDown256(&l, integerBits - extraBits); } // Step 6: Align first digit, adjust exponent #if HAVE_UINT128_T while (u.high < (uint64_t)1 << highFractionBits) #else while (u.elt[7] < (uint64_t)1 << highFractionBits) #endif { base10Exponent -= 1; multiply256xu32(&l, 10); multiply256xu32(&u, 10); } swift_uint256_t t = u; swift_uint256_t delta = u; subtract256x256(&delta, l); // Step 7: Generate digits char *p = dest; if (p > dest + length - 4) { // Shortest output is "1.0" (4 bytes) dest[0] = '\0'; return 0; } if (negative) { *p++ = '-'; } char * const firstOutputChar = p; // Adjustment above already set up the first digit *p++ = '0'; *p++ = '0' + extractIntegerPart256(&t, integerBits); // Generate 4 digits at a time swift_uint256_t d0 = delta; multiply256xu32(&d0, 10000); swift_uint256_t t0 = t; multiply256xu32(&t0, 10000); int d1234 = extractIntegerPart256(&t0, integerBits); while (isLessThan256x256(d0, t0)) { if (p > dest + length - 5) { dest[0] = '\0'; return 0; } int d34 = d1234 % 100; int d12 = d1234 / 100; memcpy(p + 2, asciiDigitTable + d34 * 2, 2); memcpy(p, asciiDigitTable + d12 * 2, 2); p += 4; t = t0; delta = d0; multiply256xu32(&d0, 10000); multiply256xu32(&t0, 10000); d1234 = extractIntegerPart256(&t0, integerBits); } // Generate one digit at a time... while (isLessThan256x256(delta, t)) { if (p > dest + length - 2) { dest[0] = '\0'; return 0; } multiply256xu32(&delta, 10); multiply256xu32(&t, 10); *p++ = extractIntegerPart256(&t, integerBits) + '0'; } // Adjust the final digit to be closer to the original value // We've already consumed most of our available precision, and only // need a couple of integer bits, so we can narrow down to // 64 bits here. #if HAVE_UINT128_T uint64_t deltaHigh64 = delta.high; uint64_t tHigh64 = t.high; #else uint64_t deltaHigh64 = ((uint64_t)delta.elt[7] << 32) + delta.elt[6]; uint64_t tHigh64 = ((uint64_t)t.elt[7] << 32) + t.elt[6]; #endif if (deltaHigh64 >= tHigh64 + ((uint64_t)1 << (64 - integerBits))) { uint64_t skew; if (isBoundary) { skew = deltaHigh64 - deltaHigh64 / 3 - tHigh64; } else { skew = deltaHigh64 / 2 - tHigh64; } uint64_t one = (uint64_t)(1) << (64 - integerBits); uint64_t fractionMask = one - 1; uint64_t oneHalf = one >> 1; if ((skew & fractionMask) == oneHalf) { int adjust = (int)(skew >> (64 - integerBits)); // If the skew is integer + 1/2, round the last digit even // after adjustment p[-1] -= adjust; p[-1] &= ~1; } else { // Else round to nearest... int adjust = (int)((skew + oneHalf) >> (64 - integerBits)); p[-1] -= adjust; } } return finishFormatting(dest, length, p, firstOutputChar, forceExponential, base10Exponent); } #endif #if SWIFT_DTOA_BINARY32_SUPPORT || SWIFT_DTOA_BINARY64_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT static int finishFormatting(char *dest, size_t length, char *p, char *firstOutputChar, int forceExponential, int base10Exponent) { int digitCount = p - firstOutputChar - 1; if (base10Exponent < -4 || forceExponential) { // Exponential form: convert "0123456" => "1.23456e78" firstOutputChar[0] = firstOutputChar[1]; if (digitCount > 1) { firstOutputChar[1] = '.'; } else { p--; } // Add exponent at the end if (p > dest + length - 5) { dest[0] = '\0'; return 0; } *p++ = 'e'; if (base10Exponent < 0) { *p++ = '-'; base10Exponent = -base10Exponent; } else { *p++ = '+'; } if (base10Exponent > 99) { if (base10Exponent > 999) { if (p > dest + length - 5) { dest[0] = '\0'; return 0; } memcpy(p, asciiDigitTable + (base10Exponent / 100) * 2, 2); p += 2; } else { if (p > dest + length - 4) { dest[0] = '\0'; return 0; } *p++ = (base10Exponent / 100) + '0'; } base10Exponent %= 100; } memcpy(p, asciiDigitTable + base10Exponent * 2, 2); p += 2; } else if (base10Exponent < 0) { // "0123456" => "0.00123456" // Slide digits back in buffer and prepend zeros and a period if (p > dest + length + base10Exponent - 1) { dest[0] = '\0'; return 0; } memmove(firstOutputChar - base10Exponent, firstOutputChar, p - firstOutputChar); memset(firstOutputChar, '0', -base10Exponent); firstOutputChar[1] = '.'; p += -base10Exponent; } else if (base10Exponent + 1 < digitCount) { // "0123456" => "123.456" // Slide integer digits forward and insert a '.' memmove(firstOutputChar, firstOutputChar + 1, base10Exponent + 1); firstOutputChar[base10Exponent + 1] = '.'; } else { // "0123456" => "12345600.0" // Slide digits forward 1 and append suitable zeros and '.0' if (p + base10Exponent - digitCount > dest + length - 3) { dest[0] = '\0'; return 0; } memmove(firstOutputChar, firstOutputChar + 1, p - firstOutputChar - 1); p -= 1; memset(p, '0', base10Exponent - digitCount + 1); p += base10Exponent - digitCount + 1; *p++ = '.'; *p++ = '0'; } *p = '\0'; return p - dest; } #endif // ================================================================ // // Arithmetic helpers // // ================================================================ // The core algorithm relies heavily on fixed-point arithmetic with // 128-bit and 256-bit integer values. (For binary32/64 and // float80/binary128, respectively.) They also need precise control // over all rounding. // // Note that most arithmetic operations are the same for integers and // fractions, so we can just use the normal integer operations in most // places. Multiplication however, is different for fixed-size // fractions. Integer multiplication preserves the low-order part and // discards the high-order part (ignoring overflow). Fraction // multiplication preserves the high-order part and discards the // low-order part (rounding). So most of the arithmetic helpers here // are for multiplication. // Note: With 64-bit GCC and Clang, we get a noticeable performance // gain by using `__uint128_t`. Otherwise, we have to break things // down into 32-bit chunks so we don't overflow 64-bit temporaries. #if SWIFT_DTOA_BINARY64_SUPPORT // Multiply a 128-bit fraction by a 64-bit fraction, rounding down. static swift_uint128_t multiply128x64RoundingDown(swift_uint128_t lhs, uint64_t rhs) { #if HAVE_UINT128_T uint64_t lhsl = (uint64_t)lhs; uint64_t lhsh = (uint64_t)(lhs >> 64); swift_uint128_t h = (swift_uint128_t)lhsh * rhs; swift_uint128_t l = (swift_uint128_t)lhsl * rhs; return h + (l >> 64); #else swift_uint128_t result; static const uint64_t mask32 = UINT32_MAX; uint64_t rhs0 = rhs & mask32; uint64_t rhs1 = rhs >> 32; uint64_t t = (lhs.low) * rhs0; t >>= 32; uint64_t a = (lhs.b) * rhs0; uint64_t b = (lhs.low) * rhs1; t += a + (b & mask32); t >>= 32; t += (b >> 32); a = lhs.c * rhs0; b = lhs.b * rhs1; t += (a & mask32) + (b & mask32); result.low = t; t >>= 32; t += (a >> 32) + (b >> 32); a = lhs.high * rhs0; b = lhs.c * rhs1; t += (a & mask32) + (b & mask32); result.b = t; t >>= 32; t += (a >> 32) + (b >> 32); t += lhs.high * rhs1; result.c = t; result.high = t >> 32; return result; #endif } // Multiply a 128-bit fraction by a 64-bit fraction, rounding up. static swift_uint128_t multiply128x64RoundingUp(swift_uint128_t lhs, uint64_t rhs) { #if HAVE_UINT128_T uint64_t lhsl = (uint64_t)lhs; uint64_t lhsh = (uint64_t)(lhs >> 64); swift_uint128_t h = (swift_uint128_t)lhsh * rhs; swift_uint128_t l = (swift_uint128_t)lhsl * rhs; const static __uint128_t bias = ((__uint128_t)1 << 64) - 1; return h + ((l + bias) >> 64); #else swift_uint128_t result; static const uint64_t mask32 = UINT32_MAX; uint64_t rhs0 = rhs & mask32; uint64_t rhs1 = rhs >> 32; uint64_t t = (lhs.low) * rhs0 + mask32; t >>= 32; uint64_t a = (lhs.b) * rhs0; uint64_t b = (lhs.low) * rhs1; t += (a & mask32) + (b & mask32) + mask32; t >>= 32; t += (a >> 32) + (b >> 32); a = lhs.c * rhs0; b = lhs.b * rhs1; t += (a & mask32) + (b & mask32); result.low = t; t >>= 32; t += (a >> 32) + (b >> 32); a = lhs.high * rhs0; b = lhs.c * rhs1; t += (a & mask32) + (b & mask32); result.b = t; t >>= 32; t += (a >> 32) + (b >> 32); t += lhs.high * rhs1; result.c = t; result.high = t >> 32; return result; #endif } #if !HAVE_UINT128_T // Multiply a 128-bit fraction by a 32-bit integer in a 32-bit environment. // (On 64-bit, we use a fast inline macro.) static void multiply128xu32(swift_uint128_t *lhs, uint32_t rhs) { uint64_t t = (uint64_t)(lhs->low) * rhs; lhs->low = (uint32_t)t; t = (t >> 32) + (uint64_t)(lhs->b) * rhs; lhs->b = (uint32_t)t; t = (t >> 32) + (uint64_t)(lhs->c) * rhs; lhs->c = (uint32_t)t; t = (t >> 32) + (uint64_t)(lhs->high) * rhs; lhs->high = (uint32_t)t; } // Compare two 128-bit integers in a 32-bit environment // (On 64-bit, we use a fast inline macro.) static int isLessThan128x128(swift_uint128_t lhs, swift_uint128_t rhs) { return ((lhs.high < rhs.high) || ((lhs.high == rhs.high) && ((lhs.c < rhs.c) || ((lhs.c == rhs.c) && ((lhs.b < rhs.b) || ((lhs.b == rhs.b) && (lhs.low < rhs.low))))))); } // Subtract 128-bit values in a 32-bit environment static void subtract128x128(swift_uint128_t *lhs, swift_uint128_t rhs) { uint64_t t = (uint64_t)lhs->low + (~rhs.low) + 1; lhs->low = (uint32_t)t; t = (t >> 32) + lhs->b + (~rhs.b); lhs->b = (uint32_t)t; t = (t >> 32) + lhs->c + (~rhs.c); lhs->c = (uint32_t)t; t = (t >> 32) + lhs->high + (~rhs.high); lhs->high = (uint32_t)t; } #endif #if !HAVE_UINT128_T // Shift a 128-bit integer right, rounding down. static swift_uint128_t shiftRightRoundingDown128(swift_uint128_t lhs, int shift) { // Note: Shift is always less than 32 swift_uint128_t result; uint64_t t = (uint64_t)lhs.low >> shift; t += ((uint64_t)lhs.b << (32 - shift)); result.low = t; t >>= 32; t += ((uint64_t)lhs.c << (32 - shift)); result.b = t; t >>= 32; t += ((uint64_t)lhs.high << (32 - shift)); result.c = t; t >>= 32; result.high = t; return result; } #endif #if !HAVE_UINT128_T // Shift a 128-bit integer right, rounding up. static swift_uint128_t shiftRightRoundingUp128(swift_uint128_t lhs, int shift) { swift_uint128_t result; const uint64_t bias = (1 << shift) - 1; uint64_t t = ((uint64_t)lhs.low + bias) >> shift; t += ((uint64_t)lhs.b << (32 - shift)); result.low = t; t >>= 32; t += ((uint64_t)lhs.c << (32 - shift)); result.b = t; t >>= 32; t += ((uint64_t)lhs.high << (32 - shift)); result.c = t; t >>= 32; result.high = t; return result; } #endif #endif // Shift a 128-bit integer left, discarding high bits #if (SWIFT_DTOA_BINARY32_SUPPORT || SWIFT_DTOA_BINARY64_SUPPORT) && !HAVE_UINT128_T static void shiftLeft128(swift_uint128_t *lhs, int shift) { // Note: Shift is always less than 32 uint64_t t = (uint64_t)lhs->high << (shift + 32); t += (uint64_t)lhs->c << shift; lhs->high = t >> 32; t <<= 32; t += (uint64_t)lhs->b << shift; lhs->c = t >> 32; t <<= 32; t += (uint64_t)lhs->low << shift; lhs->b = t >> 32; lhs->low = t; } #endif #if SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT // Multiply a 256-bit fraction by a 32-bit integer. // This is used in the digit generation to multiply by ten or // 10,000. Note that rounding is never an issue. // As used above, this will never overflow. static void multiply256xu32(swift_uint256_t *lhs, uint32_t rhs) { #if HAVE_UINT128_T __uint128_t t = (__uint128_t)lhs->low * rhs; lhs->low = (uint64_t)t; t = (t >> 64) + (__uint128_t)lhs->midlow * rhs; lhs->midlow = (uint64_t)t; t = (t >> 64) + (__uint128_t)lhs->midhigh * rhs; lhs->midhigh = (uint64_t)t; t = (t >> 64) + (__uint128_t)lhs->high * rhs; lhs->high = (uint64_t)t; #else uint64_t t = 0; for (int i = 0; i < 8; ++i) { t = (t >> 32) + (uint64_t)lhs->elt[i] * rhs; lhs->elt[i] = t; } #endif } // Multiply a 256-bit fraction by a 128-bit fraction, rounding down. static void multiply256x128RoundingDown(swift_uint256_t *lhs, swift_uint128_t rhs) { #if HAVE_UINT128_T // A full multiply of four 64-bit values by two 64-bit values // yields six such components. We discard the bottom two (except // for carries) to get a rounded-down four-element result. __uint128_t current = (__uint128_t)lhs->low * (uint64_t)rhs; current = (current >> 64); __uint128_t t = (__uint128_t)lhs->low * (rhs >> 64); current += (uint64_t)t; __uint128_t next = t >> 64; t = (__uint128_t)lhs->midlow * (uint64_t)rhs; current += (uint64_t)t; next += t >> 64; current = next + (current >> 64); t = (__uint128_t)lhs->midlow * (rhs >> 64); current += (uint64_t)t; next = t >> 64; t = (__uint128_t)lhs->midhigh * (uint64_t)rhs; current += (uint64_t)t; next += t >> 64; lhs->low = (uint64_t)current; current = next + (current >> 64); t = (__uint128_t)lhs->midhigh * (rhs >> 64); current += (uint64_t)t; next = t >> 64; t = (__uint128_t)lhs->high * (uint64_t)rhs; current += (uint64_t)t; next += t >> 64; lhs->midlow = (uint64_t)current; current = next + (current >> 64); t = (__uint128_t)lhs->high * (rhs >> 64); current += t; lhs->midhigh = (uint64_t)current; lhs->high = (uint64_t)(current >> 64); #else uint64_t a, b, c, d; // temporaries // Eight 32-bit values multiplied by 4 32-bit values. Oh my. static const uint64_t mask32 = UINT32_MAX; uint64_t t = 0; a = (uint64_t)lhs->elt[0] * rhs.low; t += (a & mask32); t >>= 32; t += (a >> 32); a = (uint64_t)lhs->elt[0] * rhs.b; b = (uint64_t)lhs->elt[1] * rhs.low; t += (a & mask32) + (b & mask32); t >>= 32; t += (a >> 32) + (b >> 32); a = (uint64_t)lhs->elt[0] * rhs.c; b = (uint64_t)lhs->elt[1] * rhs.b; c = (uint64_t)lhs->elt[2] * rhs.low; t += (a & mask32) + (b & mask32) + (c & mask32); t >>= 32; t += (a >> 32) + (b >> 32) + (c >> 32); a = (uint64_t)lhs->elt[0] * rhs.high; b = (uint64_t)lhs->elt[1] * rhs.c; c = (uint64_t)lhs->elt[2] * rhs.b; d = (uint64_t)lhs->elt[3] * rhs.low; t += (a & mask32) + (b & mask32) + (c & mask32) + (d & mask32); t >>= 32; t += (a >> 32) + (b >> 32) + (c >> 32) + (d >> 32); for (int i = 0; i < 4; ++i) { a = (uint64_t)lhs->elt[i + 1] * rhs.high; b = (uint64_t)lhs->elt[i + 2] * rhs.c; c = (uint64_t)lhs->elt[i + 3] * rhs.b; d = (uint64_t)lhs->elt[i + 4] * rhs.low; t += (a & mask32) + (b & mask32) + (c & mask32) + (d & mask32); lhs->elt[i] = t; t >>= 32; t += (a >> 32) + (b >> 32) + (c >> 32) + (d >> 32); } a = (uint64_t)lhs->elt[5] * rhs.high; b = (uint64_t)lhs->elt[6] * rhs.c; c = (uint64_t)lhs->elt[7] * rhs.b; t += (a & mask32) + (b & mask32) + (c & mask32); lhs->elt[4] = t; t >>= 32; t += (a >> 32) + (b >> 32) + (c >> 32); a = (uint64_t)lhs->elt[6] * rhs.high; b = (uint64_t)lhs->elt[7] * rhs.c; t += (a & mask32) + (b & mask32); lhs->elt[5] = t; t >>= 32; t += (a >> 32) + (b >> 32); t += (uint64_t)lhs->elt[7] * rhs.high; lhs->elt[6] = t; lhs->elt[7] = t >> 32; #endif } // Multiply a 256-bit fraction by a 128-bit fraction, rounding up. static void multiply256x128RoundingUp(swift_uint256_t *lhs, swift_uint128_t rhs) { #if HAVE_UINT128_T // Same as the rounding-down version, but we add // UINT128_MAX to the bottom two to force an extra // carry if they are non-zero. swift_uint128_t current = (swift_uint128_t)lhs->low * (uint64_t)rhs; current += UINT64_MAX; current = (current >> 64); swift_uint128_t t = (swift_uint128_t)lhs->low * (rhs >> 64); current += (uint64_t)t; swift_uint128_t next = t >> 64; t = (swift_uint128_t)lhs->midlow * (uint64_t)rhs; current += (uint64_t)t; next += t >> 64; // Round up by adding UINT128_MAX (upper half) current += UINT64_MAX; current = next + (current >> 64); t = (swift_uint128_t)lhs->midlow * (rhs >> 64); current += (uint64_t)t; next = t >> 64; t = (swift_uint128_t)lhs->midhigh * (uint64_t)rhs; current += (uint64_t)t; next += t >> 64; lhs->low = (uint64_t)current; current = next + (current >> 64); t = (swift_uint128_t)lhs->midhigh * (rhs >> 64); current += (uint64_t)t; next = t >> 64; t = (swift_uint128_t)lhs->high * (uint64_t)rhs; current += (uint64_t)t; next += t >> 64; lhs->midlow = (uint64_t)current; current = next + (current >> 64); t = (swift_uint128_t)lhs->high * (rhs >> 64); current += t; lhs->midhigh = (uint64_t)current; lhs->high = (uint64_t)(current >> 64); #else uint64_t a, b, c, d; // temporaries // Eight 32-bit values multiplied by 4 32-bit values. Oh my. static const uint64_t mask32 = UINT32_MAX; uint64_t t = 0; a = (uint64_t)lhs->elt[0] * rhs.low + mask32; t += (a & mask32); t >>= 32; t += (a >> 32); a = (uint64_t)lhs->elt[0] * rhs.b; b = (uint64_t)lhs->elt[1] * rhs.low; t += (a & mask32) + (b & mask32) + mask32; t >>= 32; t += (a >> 32) + (b >> 32); a = (uint64_t)lhs->elt[0] * rhs.c; b = (uint64_t)lhs->elt[1] * rhs.b; c = (uint64_t)lhs->elt[2] * rhs.low; t += (a & mask32) + (b & mask32) + (c & mask32) + mask32; t >>= 32; t += (a >> 32) + (b >> 32) + (c >> 32); a = (uint64_t)lhs->elt[0] * rhs.high; b = (uint64_t)lhs->elt[1] * rhs.c; c = (uint64_t)lhs->elt[2] * rhs.b; d = (uint64_t)lhs->elt[3] * rhs.low; t += (a & mask32) + (b & mask32) + (c & mask32) + (d & mask32) + mask32; t >>= 32; t += (a >> 32) + (b >> 32) + (c >> 32) + (d >> 32); for (int i = 0; i < 4; ++i) { a = (uint64_t)lhs->elt[i + 1] * rhs.high; b = (uint64_t)lhs->elt[i + 2] * rhs.c; c = (uint64_t)lhs->elt[i + 3] * rhs.b; d = (uint64_t)lhs->elt[i + 4] * rhs.low; t += (a & mask32) + (b & mask32) + (c & mask32) + (d & mask32); lhs->elt[i] = t; t >>= 32; t += (a >> 32) + (b >> 32) + (c >> 32) + (d >> 32); } a = (uint64_t)lhs->elt[5] * rhs.high; b = (uint64_t)lhs->elt[6] * rhs.c; c = (uint64_t)lhs->elt[7] * rhs.b; t += (a & mask32) + (b & mask32) + (c & mask32); lhs->elt[4] = t; t >>= 32; t += (a >> 32) + (b >> 32) + (c >> 32); a = (uint64_t)lhs->elt[6] * rhs.high; b = (uint64_t)lhs->elt[7] * rhs.c; t += (a & mask32) + (b & mask32); lhs->elt[5] = t; t >>= 32; t += (a >> 32) + (b >> 32); t += (uint64_t)lhs->elt[7] * rhs.high; lhs->elt[6] = t; lhs->elt[7] = t >> 32; #endif } // Subtract two 256-bit integers or fractions. static void subtract256x256(swift_uint256_t *lhs, swift_uint256_t rhs) { #if HAVE_UINT128_T swift_uint128_t t = (swift_uint128_t)lhs->low + (~rhs.low) + 1; lhs->low = t; t = (t >> 64) + lhs->midlow + (~rhs.midlow); lhs->midlow = t; t = (t >> 64) + lhs->midhigh + (~rhs.midhigh); lhs->midhigh = t; lhs->high += (t >> 64) + (~rhs.high); #else uint64_t t = ((uint64_t)1) << 32; for (int i = 0; i < 8; i++) { t = (t >> 32) + lhs->elt[i] + (~rhs.elt[i]); lhs->elt[i] = t; } #endif } // Compare two 256-bit integers or fractions. static int isLessThan256x256(swift_uint256_t lhs, swift_uint256_t rhs) { #if HAVE_UINT128_T return (lhs.high < rhs.high) || (lhs.high == rhs.high && (lhs.midhigh < rhs.midhigh || (lhs.midhigh == rhs.midhigh && (lhs.midlow < rhs.midlow || (lhs.midlow == rhs.midlow && lhs.low < rhs.low))))); #else for (int i = 7; i >= 0; i--) { if (lhs.elt[i] < rhs.elt[i]) { return true; } else if (lhs.elt[i] > rhs.elt[i]) { return false; } } return false; #endif } // Shift a 256-bit integer right (by less than 32 bits!), rounding down. static void shiftRightRoundingDown256(swift_uint256_t *lhs, int shift) { #if HAVE_UINT128_T __uint128_t t = (__uint128_t)lhs->low >> shift; t += ((__uint128_t)lhs->midlow << (64 - shift)); lhs->low = t; t >>= 64; t += ((__uint128_t)lhs->midhigh << (64 - shift)); lhs->midlow = t; t >>= 64; t += ((__uint128_t)lhs->high << (64 - shift)); lhs->midhigh = t; t >>= 64; lhs->high = t; #else uint64_t t = (uint64_t)lhs->elt[0] >> shift; for (int i = 0; i < 7; ++i) { t += ((uint64_t)lhs->elt[i + 1] << (32 - shift)); lhs->elt[i] = t; t >>= 32; } lhs->elt[7] = t; #endif } // Shift a 256-bit integer right, rounding up. // Note: The shift will always be less than 20. Someday, that // might suggest a way to further optimize this. static void shiftRightRoundingUp256(swift_uint256_t *lhs, int shift) { #if HAVE_UINT128_T const uint64_t bias = (1 << shift) - 1; __uint128_t t = ((__uint128_t)lhs->low + bias) >> shift; t += ((__uint128_t)lhs->midlow << (64 - shift)); lhs->low = t; t >>= 64; t += ((__uint128_t)lhs->midhigh << (64 - shift)); lhs->midlow = t; t >>= 64; t += ((__uint128_t)lhs->high << (64 - shift)); lhs->midhigh = t; t >>= 64; lhs->high = t; #else const uint64_t bias = (1 << shift) - 1; uint64_t t = ((uint64_t)lhs->elt[0] + bias) >> shift; for (int i = 0; i < 7; ++i) { t += ((uint64_t)lhs->elt[i + 1] << (32 - shift)); lhs->elt[i] = t; t >>= 32; } lhs->elt[7] = t; #endif } #endif // ================================================================ // // Power of 10 calculation // // ================================================================ // // ------------ Power-of-10 tables. -------------------------- // // Grisu-style algorithms rely on being able to rapidly // find a high-precision approximation of any power of 10. // These values were computed by a simple script that // relied on Python's excellent variable-length // integer support. #if SWIFT_DTOA_BINARY32_SUPPORT // Table with negative powers of 10 to 64 bits // // Table size: 320 bytes static uint64_t powersOf10_negativeBinary32[] = { 0x8b61313bbabce2c6ULL, // x 2^-132 ~= 10^-40 0xae397d8aa96c1b77ULL, // x 2^-129 ~= 10^-39 0xd9c7dced53c72255ULL, // x 2^-126 ~= 10^-38 0x881cea14545c7575ULL, // x 2^-122 ~= 10^-37 0xaa242499697392d2ULL, // x 2^-119 ~= 10^-36 0xd4ad2dbfc3d07787ULL, // x 2^-116 ~= 10^-35 0x84ec3c97da624ab4ULL, // x 2^-112 ~= 10^-34 0xa6274bbdd0fadd61ULL, // x 2^-109 ~= 10^-33 0xcfb11ead453994baULL, // x 2^-106 ~= 10^-32 0x81ceb32c4b43fcf4ULL, // x 2^-102 ~= 10^-31 0xa2425ff75e14fc31ULL, // x 2^-99 ~= 10^-30 0xcad2f7f5359a3b3eULL, // x 2^-96 ~= 10^-29 0xfd87b5f28300ca0dULL, // x 2^-93 ~= 10^-28 0x9e74d1b791e07e48ULL, // x 2^-89 ~= 10^-27 0xc612062576589ddaULL, // x 2^-86 ~= 10^-26 0xf79687aed3eec551ULL, // x 2^-83 ~= 10^-25 0x9abe14cd44753b52ULL, // x 2^-79 ~= 10^-24 0xc16d9a0095928a27ULL, // x 2^-76 ~= 10^-23 0xf1c90080baf72cb1ULL, // x 2^-73 ~= 10^-22 0x971da05074da7beeULL, // x 2^-69 ~= 10^-21 0xbce5086492111aeaULL, // x 2^-66 ~= 10^-20 0xec1e4a7db69561a5ULL, // x 2^-63 ~= 10^-19 0x9392ee8e921d5d07ULL, // x 2^-59 ~= 10^-18 0xb877aa3236a4b449ULL, // x 2^-56 ~= 10^-17 0xe69594bec44de15bULL, // x 2^-53 ~= 10^-16 0x901d7cf73ab0acd9ULL, // x 2^-49 ~= 10^-15 0xb424dc35095cd80fULL, // x 2^-46 ~= 10^-14 0xe12e13424bb40e13ULL, // x 2^-43 ~= 10^-13 0x8cbccc096f5088cbULL, // x 2^-39 ~= 10^-12 0xafebff0bcb24aafeULL, // x 2^-36 ~= 10^-11 0xdbe6fecebdedd5beULL, // x 2^-33 ~= 10^-10 0x89705f4136b4a597ULL, // x 2^-29 ~= 10^-9 0xabcc77118461cefcULL, // x 2^-26 ~= 10^-8 0xd6bf94d5e57a42bcULL, // x 2^-23 ~= 10^-7 0x8637bd05af6c69b5ULL, // x 2^-19 ~= 10^-6 0xa7c5ac471b478423ULL, // x 2^-16 ~= 10^-5 0xd1b71758e219652bULL, // x 2^-13 ~= 10^-4 0x83126e978d4fdf3bULL, // x 2^-9 ~= 10^-3 0xa3d70a3d70a3d70aULL, // x 2^-6 ~= 10^-2 0xccccccccccccccccULL, // x 2^-3 ~= 10^-1 }; #endif #if SWIFT_DTOA_BINARY32_SUPPORT || SWIFT_DTOA_BINARY64_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT // Tables with powers of 10 // // The constant powers of 10 here represent pure fractions // with a binary point at the far left. (Each number in // this first table is implicitly divided by 2^128.) // // Table size: 896 bytes // // A 64-bit significand allows us to exactly represent powers of 10 up // to 10^27. In 128 bits, we can exactly represent powers of 10 up to // 10^55. As with all of these tables, the binary exponent is not stored; // it is computed by the `binaryExponentFor10ToThe(p)` function. static const uint64_t powersOf10_Exact128[56 * 2] = { // Low order ... high order 0x0000000000000000ULL, 0x8000000000000000ULL, // x 2^1 == 10^0 exactly 0x0000000000000000ULL, 0xa000000000000000ULL, // x 2^4 == 10^1 exactly 0x0000000000000000ULL, 0xc800000000000000ULL, // x 2^7 == 10^2 exactly 0x0000000000000000ULL, 0xfa00000000000000ULL, // x 2^10 == 10^3 exactly 0x0000000000000000ULL, 0x9c40000000000000ULL, // x 2^14 == 10^4 exactly 0x0000000000000000ULL, 0xc350000000000000ULL, // x 2^17 == 10^5 exactly 0x0000000000000000ULL, 0xf424000000000000ULL, // x 2^20 == 10^6 exactly 0x0000000000000000ULL, 0x9896800000000000ULL, // x 2^24 == 10^7 exactly 0x0000000000000000ULL, 0xbebc200000000000ULL, // x 2^27 == 10^8 exactly 0x0000000000000000ULL, 0xee6b280000000000ULL, // x 2^30 == 10^9 exactly 0x0000000000000000ULL, 0x9502f90000000000ULL, // x 2^34 == 10^10 exactly 0x0000000000000000ULL, 0xba43b74000000000ULL, // x 2^37 == 10^11 exactly 0x0000000000000000ULL, 0xe8d4a51000000000ULL, // x 2^40 == 10^12 exactly 0x0000000000000000ULL, 0x9184e72a00000000ULL, // x 2^44 == 10^13 exactly 0x0000000000000000ULL, 0xb5e620f480000000ULL, // x 2^47 == 10^14 exactly 0x0000000000000000ULL, 0xe35fa931a0000000ULL, // x 2^50 == 10^15 exactly 0x0000000000000000ULL, 0x8e1bc9bf04000000ULL, // x 2^54 == 10^16 exactly 0x0000000000000000ULL, 0xb1a2bc2ec5000000ULL, // x 2^57 == 10^17 exactly 0x0000000000000000ULL, 0xde0b6b3a76400000ULL, // x 2^60 == 10^18 exactly 0x0000000000000000ULL, 0x8ac7230489e80000ULL, // x 2^64 == 10^19 exactly 0x0000000000000000ULL, 0xad78ebc5ac620000ULL, // x 2^67 == 10^20 exactly 0x0000000000000000ULL, 0xd8d726b7177a8000ULL, // x 2^70 == 10^21 exactly 0x0000000000000000ULL, 0x878678326eac9000ULL, // x 2^74 == 10^22 exactly 0x0000000000000000ULL, 0xa968163f0a57b400ULL, // x 2^77 == 10^23 exactly 0x0000000000000000ULL, 0xd3c21bcecceda100ULL, // x 2^80 == 10^24 exactly 0x0000000000000000ULL, 0x84595161401484a0ULL, // x 2^84 == 10^25 exactly 0x0000000000000000ULL, 0xa56fa5b99019a5c8ULL, // x 2^87 == 10^26 exactly 0x0000000000000000ULL, 0xcecb8f27f4200f3aULL, // x 2^90 == 10^27 exactly 0x4000000000000000ULL, 0x813f3978f8940984ULL, // x 2^94 == 10^28 exactly 0x5000000000000000ULL, 0xa18f07d736b90be5ULL, // x 2^97 == 10^29 exactly 0xa400000000000000ULL, 0xc9f2c9cd04674edeULL, // x 2^100 == 10^30 exactly 0x4d00000000000000ULL, 0xfc6f7c4045812296ULL, // x 2^103 == 10^31 exactly 0xf020000000000000ULL, 0x9dc5ada82b70b59dULL, // x 2^107 == 10^32 exactly 0x6c28000000000000ULL, 0xc5371912364ce305ULL, // x 2^110 == 10^33 exactly 0xc732000000000000ULL, 0xf684df56c3e01bc6ULL, // x 2^113 == 10^34 exactly 0x3c7f400000000000ULL, 0x9a130b963a6c115cULL, // x 2^117 == 10^35 exactly 0x4b9f100000000000ULL, 0xc097ce7bc90715b3ULL, // x 2^120 == 10^36 exactly 0x1e86d40000000000ULL, 0xf0bdc21abb48db20ULL, // x 2^123 == 10^37 exactly 0x1314448000000000ULL, 0x96769950b50d88f4ULL, // x 2^127 == 10^38 exactly 0x17d955a000000000ULL, 0xbc143fa4e250eb31ULL, // x 2^130 == 10^39 exactly 0x5dcfab0800000000ULL, 0xeb194f8e1ae525fdULL, // x 2^133 == 10^40 exactly 0x5aa1cae500000000ULL, 0x92efd1b8d0cf37beULL, // x 2^137 == 10^41 exactly 0xf14a3d9e40000000ULL, 0xb7abc627050305adULL, // x 2^140 == 10^42 exactly 0x6d9ccd05d0000000ULL, 0xe596b7b0c643c719ULL, // x 2^143 == 10^43 exactly 0xe4820023a2000000ULL, 0x8f7e32ce7bea5c6fULL, // x 2^147 == 10^44 exactly 0xdda2802c8a800000ULL, 0xb35dbf821ae4f38bULL, // x 2^150 == 10^45 exactly 0xd50b2037ad200000ULL, 0xe0352f62a19e306eULL, // x 2^153 == 10^46 exactly 0x4526f422cc340000ULL, 0x8c213d9da502de45ULL, // x 2^157 == 10^47 exactly 0x9670b12b7f410000ULL, 0xaf298d050e4395d6ULL, // x 2^160 == 10^48 exactly 0x3c0cdd765f114000ULL, 0xdaf3f04651d47b4cULL, // x 2^163 == 10^49 exactly 0xa5880a69fb6ac800ULL, 0x88d8762bf324cd0fULL, // x 2^167 == 10^50 exactly 0x8eea0d047a457a00ULL, 0xab0e93b6efee0053ULL, // x 2^170 == 10^51 exactly 0x72a4904598d6d880ULL, 0xd5d238a4abe98068ULL, // x 2^173 == 10^52 exactly 0x47a6da2b7f864750ULL, 0x85a36366eb71f041ULL, // x 2^177 == 10^53 exactly 0x999090b65f67d924ULL, 0xa70c3c40a64e6c51ULL, // x 2^180 == 10^54 exactly 0xfff4b4e3f741cf6dULL, 0xd0cf4b50cfe20765ULL, // x 2^183 == 10^55 exactly }; #endif #if SWIFT_DTOA_BINARY64_SUPPORT // Rounded values supporting the full range of binary64 // // Table size: 464 bytes // // We only store every 28th power of ten here. // We can multiply by an exact 64-bit power of // ten from the table above to reconstruct the // significand for any power of 10. static const uint64_t powersOf10_Binary64[] = { // low-order half, high-order half 0x3931b850df08e738, 0x95fe7e07c91efafa, // x 2^-1328 ~= 10^-400 0xba954f8e758fecb3, 0x9774919ef68662a3, // x 2^-1235 ~= 10^-372 0x9028bed2939a635c, 0x98ee4a22ecf3188b, // x 2^-1142 ~= 10^-344 0x47b233c92125366e, 0x9a6bb0aa55653b2d, // x 2^-1049 ~= 10^-316 0x4ee367f9430aec32, 0x9becce62836ac577, // x 2^-956 ~= 10^-288 0x6f773fc3603db4a9, 0x9d71ac8fada6c9b5, // x 2^-863 ~= 10^-260 0xc47bc5014a1a6daf, 0x9efa548d26e5a6e1, // x 2^-770 ~= 10^-232 0x80e8a40eccd228a4, 0xa086cfcd97bf97f3, // x 2^-677 ~= 10^-204 0xb8ada00e5a506a7c, 0xa21727db38cb002f, // x 2^-584 ~= 10^-176 0xc13e60d0d2e0ebba, 0xa3ab66580d5fdaf5, // x 2^-491 ~= 10^-148 0xc2974eb4ee658828, 0xa54394fe1eedb8fe, // x 2^-398 ~= 10^-120 0xcb4ccd500f6bb952, 0xa6dfbd9fb8e5b88e, // x 2^-305 ~= 10^-92 0x3f2398d747b36224, 0xa87fea27a539e9a5, // x 2^-212 ~= 10^-64 0xdde50bd1d5d0b9e9, 0xaa242499697392d2, // x 2^-119 ~= 10^-36 0xfdc20d2b36ba7c3d, 0xabcc77118461cefc, // x 2^-26 ~= 10^-8 0x0000000000000000, 0xad78ebc5ac620000, // x 2^67 == 10^20 exactly 0x9670b12b7f410000, 0xaf298d050e4395d6, // x 2^160 == 10^48 exactly 0x3b25a55f43294bcb, 0xb0de65388cc8ada8, // x 2^253 ~= 10^76 0x58edec91ec2cb657, 0xb2977ee300c50fe7, // x 2^346 ~= 10^104 0x29babe4598c311fb, 0xb454e4a179dd1877, // x 2^439 ~= 10^132 0x577b986b314d6009, 0xb616a12b7fe617aa, // x 2^532 ~= 10^160 0x0c11ed6d538aeb2f, 0xb7dcbf5354e9bece, // x 2^625 ~= 10^188 0x6d953e2bd7173692, 0xb9a74a0637ce2ee1, // x 2^718 ~= 10^216 0x9d6d1ad41abe37f1, 0xbb764c4ca7a4440f, // x 2^811 ~= 10^244 0x4b2d8644d8a74e18, 0xbd49d14aa79dbc82, // x 2^904 ~= 10^272 0xe0470a63e6bd56c3, 0xbf21e44003acdd2c, // x 2^997 ~= 10^300 0x505f522e53053ff2, 0xc0fe908895cf3b44, // x 2^1090 ~= 10^328 0xcca845ab2beafa9a, 0xc2dfe19c8c055535, // x 2^1183 ~= 10^356 0x1027fff56784f444, 0xc4c5e310aef8aa17, // x 2^1276 ~= 10^384 }; #endif #if SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT // Every 56th power of 10 across the range of Float80/Binary128 // // Table size: 5,728 bytes // // Note: We could cut this in half at the cost of one additional // 256-bit multiply by only storing the positive values and // multiplying by 10^-4984 to obtain the negative ones. static const uint64_t powersOf10_Binary128[] = { // Low-order ... high-order 0xaec2e6aff96b46aeULL, 0xf91044c2eff84750ULL, 0x2b55c9e70e00c557ULL, 0xb6536903bf8f2bdaULL, // x 2^-16556 ~= 10^-4984 0xda1b3c3dd3889587ULL, 0x73a7380aba84a6b1ULL, 0xbddb2dfde3f8a6e3ULL, 0xb9e5428330737362ULL, // x 2^-16370 ~= 10^-4928 0xa2d23c57cfebb9ecULL, 0x9f165c039ead6d77ULL, 0x88227fdfc13ab53dULL, 0xbd89006346a9a34dULL, // x 2^-16184 ~= 10^-4872 0x333d510cf27e5a5ULL, 0x4e3cc383eaa17b7bULL, 0xe05fe4207ca3d508ULL, 0xc13efc51ade7df64ULL, // x 2^-15998 ~= 10^-4816 0xff242c569bc1f539ULL, 0x5c67ba58680c4cceULL, 0x3c55f3f947fef0e9ULL, 0xc50791bd8dd72edbULL, // x 2^-15812 ~= 10^-4760 0xe4b75ae27bec50bfULL, 0x25b0419765fdfcdbULL, 0x915564d8ab057eeULL, 0xc8e31de056f89c19ULL, // x 2^-15626 ~= 10^-4704 0x548b1e80a94f3434ULL, 0xe418e9217ce83755ULL, 0x801e38463183fc88ULL, 0xccd1ffc6bba63e21ULL, // x 2^-15440 ~= 10^-4648 0x541950a0fdc2b4d9ULL, 0xeea173da1f0eb7b4ULL, 0xcfadf6b2aa7c4f43ULL, 0xd0d49859d60d40a3ULL, // x 2^-15254 ~= 10^-4592 0x7e64501be95ad76bULL, 0x451e855d8acef835ULL, 0x9e601e707a2c3488ULL, 0xd4eb4a687c0253e8ULL, // x 2^-15068 ~= 10^-4536 0xdadd9645f360cb51ULL, 0xf290163350ecb3ebULL, 0xa8edffdccfe4db4bULL, 0xd9167ab0c1965798ULL, // x 2^-14882 ~= 10^-4480 0x7e447db3018ffbdfULL, 0x4fa1860c08a85923ULL, 0xb17cd86e7fcece75ULL, 0xdd568fe9ab559344ULL, // x 2^-14696 ~= 10^-4424 0x61cd4655bf64d265ULL, 0xb19fd88fe285b3bcULL, 0x1151250681d59705ULL, 0xe1abf2cd11206610ULL, // x 2^-14510 ~= 10^-4368 0xa5703f5ce7a619ecULL, 0x361243a84b55574dULL, 0x25a8e1e5dbb41d6ULL, 0xe6170e21b2910457ULL, // x 2^-14324 ~= 10^-4312 0xb93897a6cf5d3e61ULL, 0x18746fcc6a190db9ULL, 0x66e849253e5da0c2ULL, 0xea984ec57de69f13ULL, // x 2^-14138 ~= 10^-4256 0x309043d12ab5b0acULL, 0x79c93cff11f09319ULL, 0xf5a7800f23ef67b8ULL, 0xef3023b80a732d93ULL, // x 2^-13952 ~= 10^-4200 0xa3baa84c049b52b9ULL, 0xbec466ee1b586342ULL, 0xe85fc7f4edbd3caULL, 0xf3defe25478e074aULL, // x 2^-13766 ~= 10^-4144 0xd1f4628316b15c7aULL, 0xae16192410d3135eULL, 0x4268a54f70bd28c4ULL, 0xf8a551706112897cULL, // x 2^-13580 ~= 10^-4088 0x9eb9296cc5749dbaULL, 0x48324e275376dfddULL, 0x5052e9289f0f2333ULL, 0xfd83933eda772c0bULL, // x 2^-13394 ~= 10^-4032 0xff6aae669a5a0d8aULL, 0x24fed95087b9006eULL, 0x1b02378a405b421ULL, 0x813d1dc1f0c754d6ULL, // x 2^-13207 ~= 10^-3976 0xf993f18de00dc89bULL, 0x15617da021b89f92ULL, 0xb782db1fc6aba49bULL, 0x83c4e245ed051dc1ULL, // x 2^-13021 ~= 10^-3920 0xc6a0d64a712172b1ULL, 0x2217669197ac1504ULL, 0x4250be2eeba87d15ULL, 0x86595584116caf3cULL, // x 2^-12835 ~= 10^-3864 0xbdc0c67a220687bULL, 0x44a66a6d6fd6537bULL, 0x3f1f93f1943ca9b6ULL, 0x88fab70d8b44952aULL, // x 2^-12649 ~= 10^-3808 0xb60b57164ad28122ULL, 0xde5bd4572c25a830ULL, 0x2c87f18b39478aa2ULL, 0x8ba947b223e5783eULL, // x 2^-12463 ~= 10^-3752 0xbd59568efdb9bfeeULL, 0x292f8f2c98d7f44cULL, 0x4054f5360249ebd1ULL, 0x8e6549867da7d11aULL, // x 2^-12277 ~= 10^-3696 0x9fa0721e66791accULL, 0x1789061d717d454cULL, 0xc1187fa0c18adbbeULL, 0x912effea7015b2c5ULL, // x 2^-12091 ~= 10^-3640 0x982b64e953ac4e27ULL, 0x45efb05f20cf48b3ULL, 0x4b4de34e0ebc3e06ULL, 0x9406af8f83fd6265ULL, // x 2^-11905 ~= 10^-3584 0xa53f5950eec21dcaULL, 0x3bd8754763bdbca1ULL, 0xac73f0226eff5ea1ULL, 0x96ec9e7f9004839bULL, // x 2^-11719 ~= 10^-3528 0x320e19f88f1161b7ULL, 0x72e93fe0cce7cfd9ULL, 0x2184706ea46a4c38ULL, 0x99e11423765ec1d0ULL, // x 2^-11533 ~= 10^-3472 0x491aba48dfc0e36eULL, 0xd3de560ee34022b2ULL, 0xddadb80577b906bdULL, 0x9ce4594a044e0f1bULL, // x 2^-11347 ~= 10^-3416 0x6789d038697142fULL, 0x7a466a75be73db21ULL, 0x60dbd8aa443b560fULL, 0x9ff6b82ef415d222ULL, // x 2^-11161 ~= 10^-3360 0x40ed8056af76ac43ULL, 0x8251c601e346456ULL, 0x7401c6f091f87727ULL, 0xa3187c82120dace6ULL, // x 2^-10975 ~= 10^-3304 0x8c643ee307bffec6ULL, 0xf369a11c6f66c05aULL, 0x4d5b32f713d7f476ULL, 0xa649f36e8583e81aULL, // x 2^-10789 ~= 10^-3248 0xe32f5e080e36b4beULL, 0x3adf30ff2eb163d4ULL, 0xb4b39dd9ddb8d317ULL, 0xa98b6ba23e2300c7ULL, // x 2^-10603 ~= 10^-3192 0x6b9d538c192cfb1bULL, 0x1c5af3bd4d2c60b5ULL, 0xec41c1793d69d0d1ULL, 0xacdd3555869159d1ULL, // x 2^-10417 ~= 10^-3136 0x1adadaeedf7d699cULL, 0x71043692494aa743ULL, 0x3ca5a7540d9d56c9ULL, 0xb03fa252bd05a815ULL, // x 2^-10231 ~= 10^-3080 0xec3e4e5fc6b03617ULL, 0x47c9b16afe8fdf74ULL, 0x92e1bc1fbb33f18dULL, 0xb3b305fe328e571fULL, // x 2^-10045 ~= 10^-3024 0x1d42fa68b12bdb23ULL, 0xac46a7b3f2b4b34eULL, 0xa908fd4a88728b6aULL, 0xb737b55e31cdde04ULL, // x 2^-9859 ~= 10^-2968 0x887dede507f2b618ULL, 0x359a8fa0d014b9a7ULL, 0x7c4c65d15c614c56ULL, 0xbace07232df1c802ULL, // x 2^-9673 ~= 10^-2912 0x504708e718b4b669ULL, 0xfb4d9440822af452ULL, 0xef84cc99cb4c5d17ULL, 0xbe7653b01aae13e5ULL, // x 2^-9487 ~= 10^-2856 0x5b7977525516bff0ULL, 0x75913092420c9b35ULL, 0xcfc147ade4843a24ULL, 0xc230f522ee0a7fc2ULL, // x 2^-9301 ~= 10^-2800 0xad5d11883cc1302bULL, 0x860a754894b9a0bcULL, 0x4668677d5f46c29bULL, 0xc5fe475d4cd35cffULL, // x 2^-9115 ~= 10^-2744 0x42032f9f971bfc07ULL, 0x9fb576046ab35018ULL, 0x474b3cb1fe1d6a7fULL, 0xc9dea80d6283a34cULL, // x 2^-8929 ~= 10^-2688 0xd3e7fbb72403a4ddULL, 0x8ca223055819af54ULL, 0xd6ea3b733029ef0bULL, 0xcdd276b6e582284fULL, // x 2^-8743 ~= 10^-2632 0xba2431d885f2b7d9ULL, 0xc9879fc42869f610ULL, 0x3736730a9e47fef8ULL, 0xd1da14bc489025eaULL, // x 2^-8557 ~= 10^-2576 0xa11edbcd65dd1844ULL, 0xcb8edae81a295887ULL, 0x3d24e68dc1027246ULL, 0xd5f5e5681a4b9285ULL, // x 2^-8371 ~= 10^-2520 0xa0f076652f69ad08ULL, 0x9d19c341f5f42f2aULL, 0x742ab8f3864562c8ULL, 0xda264df693ac3e30ULL, // x 2^-8185 ~= 10^-2464 0x29f760ef115f2824ULL, 0xe0ee47c041c9de0fULL, 0x8c119f3680212413ULL, 0xde6bb59f56672cdaULL, // x 2^-7999 ~= 10^-2408 0x8b90230b3409c9d3ULL, 0x9d76eef2c1543e65ULL, 0x43190b523f872b9cULL, 0xe2c6859f5c284230ULL, // x 2^-7813 ~= 10^-2352 0xd44ce9993bc6611eULL, 0x777c9b2dfbede079ULL, 0x2a0969bf88679396ULL, 0xe7372943179706fcULL, // x 2^-7627 ~= 10^-2296 0xe8c5f5a63fd0fbd1ULL, 0xccc12293f1d7a58ULL, 0x131565be33dda91aULL, 0xebbe0df0c8201ac5ULL, // x 2^-7441 ~= 10^-2240 0xdb97988dd6b776f4ULL, 0xeb2106f435f7e1d5ULL, 0xccfb1cc2ef1f44deULL, 0xf05ba3330181c750ULL, // x 2^-7255 ~= 10^-2184 0x2fcbc8df94a1d54bULL, 0x796d0a8120801513ULL, 0x5f8385b3a882ff4cULL, 0xf5105ac3681f2716ULL, // x 2^-7069 ~= 10^-2128 0xc8700c11071a40f5ULL, 0x23cb9e9df9331fe4ULL, 0x166c15f456786c27ULL, 0xf9dca895a3226409ULL, // x 2^-6883 ~= 10^-2072 0x9589f4637a50cbb5ULL, 0xea8242b0030e4a51ULL, 0x6c656c3b1f2c9d91ULL, 0xfec102e2857bc1f9ULL, // x 2^-6697 ~= 10^-2016 0xc4be56c83349136cULL, 0x6188db81ac8e775dULL, 0xfa70b9a2ca60b004ULL, 0x81def119b76837c8ULL, // x 2^-6510 ~= 10^-1960 0xb85d39054658b363ULL, 0xe7df06bc613fda21ULL, 0x6a22490e8e9ec98bULL, 0x8469e0b6f2b8bd9bULL, // x 2^-6324 ~= 10^-1904 0x800b1e1349fef248ULL, 0x469cfd2e6ca32a77ULL, 0x69138459b0fa72d4ULL, 0x87018eefb53c6325ULL, // x 2^-6138 ~= 10^-1848 0xb62593291c768919ULL, 0xc098e6ed0bfbd6f6ULL, 0x6c83ad1260ff20f4ULL, 0x89a63ba4c497b50eULL, // x 2^-5952 ~= 10^-1792 0x92ee7fce474479d3ULL, 0xe02017175bf040c6ULL, 0xd82ef2860273de8dULL, 0x8c5827f711735b46ULL, // x 2^-5766 ~= 10^-1736 0x7b0e6375ca8c77d9ULL, 0x5f07e1e10097d47fULL, 0x416d7f9ab1e67580ULL, 0x8f17964dfc3961f2ULL, // x 2^-5580 ~= 10^-1680 0xc8d869ed561af1ceULL, 0x8b6648e941de779bULL, 0x56700866b85d57feULL, 0x91e4ca5db93dbfecULL, // x 2^-5394 ~= 10^-1624 0xfc04df783488a410ULL, 0x64d1f15da2c146b1ULL, 0x43cf71d5c4fd7868ULL, 0x94c0092dd4ef9511ULL, // x 2^-5208 ~= 10^-1568 0xfbaf03b48a965a64ULL, 0x9b6122aa2b72a13cULL, 0x387898a6e22f821bULL, 0x97a9991fd8b3afc0ULL, // x 2^-5022 ~= 10^-1512 0x50f7f7c13119aaddULL, 0xe415d8b25694250aULL, 0x8f8857e875e7774eULL, 0x9aa1c1f6110c0dd0ULL, // x 2^-4836 ~= 10^-1456 0xce214403545fd685ULL, 0xf36d1ad779b90e09ULL, 0xa5c58d5f91a476d7ULL, 0x9da8ccda75b341b5ULL, // x 2^-4650 ~= 10^-1400 0x63ddfb68f971b0c5ULL, 0x2822e38faf74b26eULL, 0x6e1f7f1642ebaac8ULL, 0xa0bf0465b455e921ULL, // x 2^-4464 ~= 10^-1344 0xf0d00cec9daf7444ULL, 0x6bf3eea6f661a32aULL, 0xfad2be1679765f27ULL, 0xa3e4b4a65e97b76aULL, // x 2^-4278 ~= 10^-1288 0x463b4ab4bd478f57ULL, 0x6f6583b5b36d5426ULL, 0x800cfab80c4e2eb1ULL, 0xa71a2b283c14fba6ULL, // x 2^-4092 ~= 10^-1232 0xef163df2fa96e983ULL, 0xa825f32bc8f6b080ULL, 0x850b0c5976b21027ULL, 0xaa5fb6fbc115010bULL, // x 2^-3906 ~= 10^-1176 0x7db1b3f8e100eb43ULL, 0x2862b1f61d64ddc3ULL, 0x61363686961a41e5ULL, 0xadb5a8bdaaa53051ULL, // x 2^-3720 ~= 10^-1120 0xfd349cf00ba1e09aULL, 0x6d282fe1b7112879ULL, 0xc6f075c4b81fc72dULL, 0xb11c529ec0d87268ULL, // x 2^-3534 ~= 10^-1064 0xf7221741b221cf6fULL, 0x3739f15b06ac3c76ULL, 0xb4e4be5b6455ef96ULL, 0xb494086bbfea00c3ULL, // x 2^-3348 ~= 10^-1008 0xc4e5a2f864c403bbULL, 0x6e33cdcda4367276ULL, 0x24d256c540a50309ULL, 0xb81d1f9569068d8eULL, // x 2^-3162 ~= 10^-952 0x276e3f0f67f0553bULL, 0xde73d9d5be6974ULL, 0x6d4aa5b50bb5dc0dULL, 0xbbb7ef38bb827f2dULL, // x 2^-2976 ~= 10^-896 0x51a34a3e674484edULL, 0x1fb6069f8b26f840ULL, 0x925624c0d7d93317ULL, 0xbf64d0275747de70ULL, // x 2^-2790 ~= 10^-840 0xcc775c8cb6de1dbcULL, 0x6d60d02eac6309eeULL, 0x8e5a2e5116baf191ULL, 0xc3241cf0094a8e70ULL, // x 2^-2604 ~= 10^-784 0x6023c8fa17d7b105ULL, 0x69cf8f51d2e5e65ULL, 0xb0560c246f90e9e8ULL, 0xc6f631e782d57096ULL, // x 2^-2418 ~= 10^-728 0x92c17acb2d08d5fdULL, 0xc26ffb8e81532725ULL, 0x2ffff1289a804c5aULL, 0xcadb6d313c8736fcULL, // x 2^-2232 ~= 10^-672 0x47df78ab9e92897aULL, 0xc02b302a892b81dcULL, 0xa855e127113c887bULL, 0xced42ec885d9dbbeULL, // x 2^-2046 ~= 10^-616 0xdaf2dec03ec0c322ULL, 0x72db3bc15b0c7014ULL, 0xe00bad8dfc0d8c8eULL, 0xd2e0d889c213fd60ULL, // x 2^-1860 ~= 10^-560 0xd3a04799e4473ac8ULL, 0xa116409a2fdf1e9eULL, 0xc654d07271e6c39fULL, 0xd701ce3bd387bf47ULL, // x 2^-1674 ~= 10^-504 0x5c8a5dc65d745a24ULL, 0x2726c48a85389fa7ULL, 0x84c663cee6b86e7cULL, 0xdb377599b6074244ULL, // x 2^-1488 ~= 10^-448 0xd7ebc61ba77a9e66ULL, 0x8bf77d4bc59b35b1ULL, 0xcb285ceb2fed040dULL, 0xdf82365c497b5453ULL, // x 2^-1302 ~= 10^-392 0x744ce999bfed213aULL, 0x363b1f2c568dc3e2ULL, 0xfd1b1b2308169b25ULL, 0xe3e27a444d8d98b7ULL, // x 2^-1116 ~= 10^-336 0x6a40608fe10de7e7ULL, 0xf910f9f648232f14ULL, 0xd1b3400f8f9cff68ULL, 0xe858ad248f5c22c9ULL, // x 2^-930 ~= 10^-280 0x9bdbfc21260dd1adULL, 0x4609ac5c7899ca36ULL, 0xa4f8bf5635246428ULL, 0xece53cec4a314ebdULL, // x 2^-744 ~= 10^-224 0xd88181aad19d7454ULL, 0xf80f36174730ca34ULL, 0xdc44e6c3cb279ac1ULL, 0xf18899b1bc3f8ca1ULL, // x 2^-558 ~= 10^-168 0xee19bfa6947f8e02ULL, 0xaa09501d5954a559ULL, 0x4d4617b5ff4a16d5ULL, 0xf64335bcf065d37dULL, // x 2^-372 ~= 10^-112 0xebbc75a03b4d60e6ULL, 0xac2e4f162cfad40aULL, 0xeed6e2f0f0d56712ULL, 0xfb158592be068d2eULL, // x 2^-186 ~= 10^-56 0x0ULL, 0x0ULL, 0x0ULL, 0x8000000000000000ULL, // x 2^1 == 10^0 exactly 0x0ULL, 0x2000000000000000ULL, 0xbff8f10e7a8921a4ULL, 0x82818f1281ed449fULL, // x 2^187 == 10^56 exactly 0x51775f71e92bf2f2ULL, 0x74a7ef0198791097ULL, 0x3e2cf6bc604ddb0ULL, 0x850fadc09923329eULL, // x 2^373 ~= 10^112 0xb204b3d9686f55b5ULL, 0xfb118fc9c217a1d2ULL, 0x90fb44d2f05d0842ULL, 0x87aa9aff79042286ULL, // x 2^559 ~= 10^168 0xd7924bff833149faULL, 0xbc10c5c5cda97c8dULL, 0x82bd6b70d99aaa6fULL, 0x8a5296ffe33cc92fULL, // x 2^745 ~= 10^224 0xa67d072d3c7fa14bULL, 0x7ec63730f500b406ULL, 0xdb0b487b6423e1e8ULL, 0x8d07e33455637eb2ULL, // x 2^931 ~= 10^280 0x546f2a35dc367e47ULL, 0x949063d8a46f0c0eULL, 0x213a4f0aa5e8a7b1ULL, 0x8fcac257558ee4e6ULL, // x 2^1117 ~= 10^336 0x50611a621c0ee3aeULL, 0x202d895116aa96beULL, 0x1c306f5d1b0b5fdfULL, 0x929b7871de7f22b9ULL, // x 2^1303 ~= 10^392 0xffa6738a27dcf7a3ULL, 0x3c11d8430d5c4802ULL, 0xa7ea9c8838ce9437ULL, 0x957a4ae1ebf7f3d3ULL, // x 2^1489 ~= 10^448 0x5bf36c0f40bde99dULL, 0x284ba600ee9f6303ULL, 0xbf1d49cacccd5e68ULL, 0x9867806127ece4f4ULL, // x 2^1675 ~= 10^504 0xa6e937834ed12e58ULL, 0x73f26eb82f6b8066ULL, 0x655494c5c95d77f2ULL, 0x9b63610bb9243e46ULL, // x 2^1861 ~= 10^560 0xcd4b7660adc6930ULL, 0x8f868688f8eb79ebULL, 0x2e008393fd60b55ULL, 0x9e6e366733f85561ULL, // x 2^2047 ~= 10^616 0x3efb9807d86d3c6aULL, 0x84c10a1d22f5adc5ULL, 0x55e04dba4b3bd4ddULL, 0xa1884b69ade24964ULL, // x 2^2233 ~= 10^672 0xf065089401df33b4ULL, 0x1fc02370c451a755ULL, 0x44b222741eb1ebbfULL, 0xa4b1ec80f47c84adULL, // x 2^2419 ~= 10^728 0xa62d0da836fce7d5ULL, 0x75933380ceb5048cULL, 0x1cf4a5c3bc09fa6fULL, 0xa7eb6799e8aec999ULL, // x 2^2605 ~= 10^784 0x7a400df820f096c2ULL, 0x802c4085068d2dd5ULL, 0x3c4a575151b294dcULL, 0xab350c27feb90accULL, // x 2^2791 ~= 10^840 0xf48b51375df06e86ULL, 0x412fe9e72afd355eULL, 0x870a8d87239d8f35ULL, 0xae8f2b2ce3d5dbe9ULL, // x 2^2977 ~= 10^896 0x881883521930127cULL, 0xe53fd3fcb5b4df25ULL, 0xdd929f09c3eff5acULL, 0xb1fa17404a30e5e8ULL, // x 2^3163 ~= 10^952 0x270cd9f1348eb326ULL, 0x37ed82fe9c75fccfULL, 0x1931b583a9431d7eULL, 0xb5762497dbf17a9eULL, // x 2^3349 ~= 10^1008 0x8919b01a5b3d9ec1ULL, 0x6a7669bdfc6f699cULL, 0xe30db03e0f8dd286ULL, 0xb903a90f561d25e2ULL, // x 2^3535 ~= 10^1064 0xf0461526b4201aa5ULL, 0x7fe40defe17e55f5ULL, 0x9eb5cb19647508c5ULL, 0xbca2fc30cc19f090ULL, // x 2^3721 ~= 10^1120 0xd67bf35422978bbfULL, 0xdbb1c416ebe661fULL, 0x24bd4c00042ad125ULL, 0xc054773d149bf26bULL, // x 2^3907 ~= 10^1176 0xdd093192ef5508d0ULL, 0x6eac3085943ccc0fULL, 0x7ea30dbd7ea479e3ULL, 0xc418753460cdcca9ULL, // x 2^4093 ~= 10^1232 0xfe4ff20db6d25dc2ULL, 0x5d5d5a9519e34a42ULL, 0x764f4cf916b4deceULL, 0xc7ef52defe87b751ULL, // x 2^4279 ~= 10^1288 0xd8adfb2e00494c5eULL, 0x72435286baf0e84eULL, 0xbeb7fbdc1cbe8b37ULL, 0xcbd96ed6466cf081ULL, // x 2^4465 ~= 10^1344 0xe07c1e4384f594afULL, 0xc6b90b8874d5189ULL, 0xdce472c619aa3f63ULL, 0xcfd7298db6cb9672ULL, // x 2^4651 ~= 10^1400 0x5dd902c68fa448cfULL, 0xea8d16bd9544e48eULL, 0xe47defc14a406e4fULL, 0xd3e8e55c3c1f43d0ULL, // x 2^4837 ~= 10^1456 0x1223d79357bedca8ULL, 0xeae6c2843752ac35ULL, 0xb7157c60a24a0569ULL, 0xd80f0685a81b2a81ULL, // x 2^5023 ~= 10^1512 0xcff72d64bc79e429ULL, 0xccc52c236decd778ULL, 0xfb0b98f6bbc4f0cbULL, 0xdc49f3445824e360ULL, // x 2^5209 ~= 10^1568 0x3731f76b905dffbbULL, 0x5e2bddd7d12a9e42ULL, 0xc6c6c1764e047e15ULL, 0xe09a13d30c2dba62ULL, // x 2^5395 ~= 10^1624 0xeb58d8ef2ada7c09ULL, 0xbc1a3b726b789947ULL, 0x87e8dcfc09dbc33aULL, 0xe4ffd276eedce658ULL, // x 2^5581 ~= 10^1680 0x249a5c06dc5d5db7ULL, 0xa8f09440be97bfe6ULL, 0xb1a3642a8da3cf4fULL, 0xe97b9b89d001dab3ULL, // x 2^5767 ~= 10^1736 0xbf34ff7963028cd9ULL, 0xc20578fa3851488bULL, 0x2d4070f33b21ab7bULL, 0xee0ddd84924ab88cULL, // x 2^5953 ~= 10^1792 0x2d0511317361d5ULL, 0xd6919e041129a1a7ULL, 0xa2bf0c63a814e04eULL, 0xf2b70909cd3fd35cULL, // x 2^6139 ~= 10^1848 0x1fa87f28acf1dcd2ULL, 0xe7a0a88981d1a0f9ULL, 0x8f13995cf9c2747ULL, 0xf77790f0a48a45ceULL, // x 2^6325 ~= 10^1904 0x1b6ff8afbe589b72ULL, 0xc851bb3f9aeb1211ULL, 0x7a37993eb21444faULL, 0xfc4fea4fd590b40aULL, // x 2^6511 ~= 10^1960 0xef23a4cbc039f0c2ULL, 0xbb3f8498a972f18eULL, 0xb7b1ada9cdeba84dULL, 0x80a046447e3d49f1ULL, // x 2^6698 ~= 10^2016 0x2cc44f2b602b6231ULL, 0xf231f4b7996b7278ULL, 0xcc6866c5d69b2cbULL, 0x8324f8aa08d7d411ULL, // x 2^6884 ~= 10^2072 0x822c97629a3a4c69ULL, 0x8a9afcdbc940e6f9ULL, 0x7fe2b4308dcbf1a3ULL, 0x85b64a659077660eULL, // x 2^7070 ~= 10^2128 0xf66cfcf42d4896b0ULL, 0x1f11852a20ed33c5ULL, 0x1d73ef3eaac3c964ULL, 0x88547abb1d8e5bd9ULL, // x 2^7256 ~= 10^2184 0x63093ad0caadb06cULL, 0x31be1482014cdaf0ULL, 0x1e34291b1ef566c7ULL, 0x8affca2bd1f88549ULL, // x 2^7442 ~= 10^2240 0xab50f69048738e9aULL, 0xa126c32ff4882be8ULL, 0x9e9383d73d486881ULL, 0x8db87a7c1e56d873ULL, // x 2^7628 ~= 10^2296 0xe57e659432b0a73eULL, 0x47a0e15dfc7986b8ULL, 0x9cc5ee51962c011aULL, 0x907eceba168949b3ULL, // x 2^7814 ~= 10^2352 0x8a6ff950599f8ae5ULL, 0xd1cbbb7d005a76d3ULL, 0x413407cfeeac9743ULL, 0x93530b43e5e2c129ULL, // x 2^8000 ~= 10^2408 0xd4e6b6e847550caaULL, 0x56a3106227b87706ULL, 0x7efa7d29c44e11b7ULL, 0x963575ce63b6332dULL, // x 2^8186 ~= 10^2464 0xd835c90b09842263ULL, 0xb69f01a641da2a42ULL, 0x5a848859645d1c6fULL, 0x9926556bc8defe43ULL, // x 2^8372 ~= 10^2520 0x9b0ae73c204ecd61ULL, 0x794fd5e5a51ac2fULL, 0x51edea897b34601fULL, 0x9c25f29286e9ddb6ULL, // x 2^8558 ~= 10^2576 0x3130484fb0a61d89ULL, 0x32b7105223a27365ULL, 0xb50008d92529e91fULL, 0x9f3497244186fca4ULL, // x 2^8744 ~= 10^2632 0x8cd036553f38a1e8ULL, 0x5e997e9f45d7897dULL, 0xf09e780bcc8238d9ULL, 0xa2528e74eaf101fcULL, // x 2^8930 ~= 10^2688 0xe1f8b43b08b5d0efULL, 0xa0eaf3f62dc1777cULL, 0x3a5828869701a165ULL, 0xa580255203f84b47ULL, // x 2^9116 ~= 10^2744 0x3c7f62e3154fa708ULL, 0x5786f3927eb15bd5ULL, 0x8b231a70eb5444ceULL, 0xa8bdaa0a0064fa44ULL, // x 2^9302 ~= 10^2800 0x1ebc24a19cd70a2aULL, 0x843fddd10c7006b8ULL, 0xfa1bde1f473556a4ULL, 0xac0b6c73d065f8ccULL, // x 2^9488 ~= 10^2856 0x46b6aae34cfd26fcULL, 0xdb7d919b136c68ULL, 0x7730e00421da4d55ULL, 0xaf69bdf68fc6a740ULL, // x 2^9674 ~= 10^2912 0x1c4edcb83fc4c49dULL, 0x61c0edd56bbcb3e8ULL, 0x7f959cb702329d14ULL, 0xb2d8f1915ba88ca5ULL, // x 2^9860 ~= 10^2968 0x428c840d247382feULL, 0x9cc3b1569b1325a4ULL, 0x40c3a071220f5567ULL, 0xb6595be34f821493ULL, // x 2^10046 ~= 10^3024 0xbeb82e734787ec63ULL, 0xbeff12280d5a1676ULL, 0x11c48d02b8326bd3ULL, 0xb9eb5333aa272e9bULL, // x 2^10232 ~= 10^3080 0x302349e12f45c73fULL, 0xb494bcc96d53e49cULL, 0x566765461bd2f61bULL, 0xbd8f2f7a1ba47d6dULL, // x 2^10418 ~= 10^3136 0x5704ebf5f16946ceULL, 0x431388ec68ac7a26ULL, 0xb889018e4f6e9a52ULL, 0xc1454a673cb9b1ceULL, // x 2^10604 ~= 10^3192 0x5a30431166af9b23ULL, 0x132d031fc1d1fec0ULL, 0xf85333a94848659fULL, 0xc50dff6d30c3aefcULL, // x 2^10790 ~= 10^3248 0x7573d4b3ffe4ba3bULL, 0xf888498a40220657ULL, 0x1a1aeae7cf8a9d3dULL, 0xc8e9abc872eb2bc1ULL, // x 2^10976 ~= 10^3304 0xb5eaef7441511eb9ULL, 0xc9cf998035a91664ULL, 0x12e29f09d9061609ULL, 0xccd8ae88cf70ad84ULL, // x 2^11162 ~= 10^3360 0x73aed4f1908f4d01ULL, 0x8c53e7beeca4578fULL, 0xdf7601457ca20b35ULL, 0xd0db689a89f2f9b1ULL, // x 2^11348 ~= 10^3416 0x5adbd55696e1cdd9ULL, 0x4949d09424b87626ULL, 0xcbdcd02f23cc7690ULL, 0xd4f23ccfb1916df5ULL, // x 2^11534 ~= 10^3472 0x3f500ccf4ea03593ULL, 0x9b80aac81b50762aULL, 0x44289dd21b589d7aULL, 0xd91d8fe9a3d019ccULL, // x 2^11720 ~= 10^3528 0x134ca67a679b84aeULL, 0x8909e424a112a3cdULL, 0x95aa118ec1d08317ULL, 0xdd5dc8a2bf27f3f7ULL, // x 2^11906 ~= 10^3584 0xe89e3cf733d9ff40ULL, 0x14344660a175c36ULL, 0x72c4d2cad73b0a7bULL, 0xe1b34fb846321d04ULL, // x 2^12092 ~= 10^3640 0x68c0a2c6c02dae9aULL, 0xb11160a6edb5f57ULL, 0xe20a88f1134f906dULL, 0xe61e8ff47461cda9ULL, // x 2^12278 ~= 10^3696 0x47fa54906741561aULL, 0xaa13acba1e5511f5ULL, 0xc7c91d5c341ed39dULL, 0xea9ff638c54554e1ULL, // x 2^12464 ~= 10^3752 0x365460ed91271c24ULL, 0xabe33496aff629b4ULL, 0xf659ede2159a45ecULL, 0xef37f1886f4b6690ULL, // x 2^12650 ~= 10^3808 0xe4cbf4acc7fba37fULL, 0x350e915f7055b1b8ULL, 0x78d946bab954b82fULL, 0xf3e6f313130ef0efULL, // x 2^12836 ~= 10^3864 0xe692accdfa5bd859ULL, 0xf4d4d3202379829eULL, 0xc9b1474d8f89c269ULL, 0xf8ad6e3fa030bd15ULL, // x 2^13022 ~= 10^3920 0xeca0018ea3b8d1b4ULL, 0xe878edb67072c26dULL, 0x6b1d2745340e7b14ULL, 0xfd8bd8b770cb469eULL, // x 2^13208 ~= 10^3976 0xce5fec949ab87cf7ULL, 0x151dcd7a53488c3ULL, 0xf22e502fcdd4bca2ULL, 0x81415538ce493bd5ULL, // x 2^13395 ~= 10^4032 0x5e1731fbff8c032eULL, 0xe752f53c2f8fa6c1ULL, 0x7c1735fc3b813c8cULL, 0x83c92edf425b292dULL, // x 2^13581 ~= 10^4088 0xb552102ea83f47e6ULL, 0xdf0fd2002ff6b3a3ULL, 0x367500a8e9a178fULL, 0x865db7a9ccd2839eULL, // x 2^13767 ~= 10^4144 0x76507bafe00ec873ULL, 0x71b256ecd954434cULL, 0xc9ac50475e25293aULL, 0x88ff2f2bade74531ULL, // x 2^13953 ~= 10^4200 0x5e2075ba289a360bULL, 0xac376f28b45e5accULL, 0x879b2e5f6ee8b1cULL, 0x8badd636cc48b341ULL, // x 2^14139 ~= 10^4256 0xab87d85e6311e801ULL, 0xb7f786d14d58173dULL, 0x2f33c652bd12fab7ULL, 0x8e69eee1f23f2be5ULL, // x 2^14325 ~= 10^4312 0x7fed9b68d77255beULL, 0x35dc241819de7182ULL, 0xad6a6308a8e8b557ULL, 0x9133bc8f2a130fe5ULL, // x 2^14511 ~= 10^4368 0x728ae72899d4bd12ULL, 0xe5413d9414142a55ULL, 0x9dbaa465efe141a0ULL, 0x940b83f23a55842aULL, // x 2^14697 ~= 10^4424 0xf7740145246fb8fULL, 0x186ef2c39acb4103ULL, 0x888c9ab2fc5b3437ULL, 0x96f18b1742aad751ULL, // x 2^14883 ~= 10^4480 0xd8bb0fba2183c6efULL, 0xbf66d66cc34f0197ULL, 0xba00864671d1053fULL, 0x99e6196979b978f1ULL, // x 2^15069 ~= 10^4536 0x9b71ed2ceb790e49ULL, 0x6faac32d59cc1f5dULL, 0x61d59d402aae4feaULL, 0x9ce977ba0ce3a0bdULL, // x 2^15255 ~= 10^4592 0xa0aa6d5e63991cfbULL, 0x19482fa0ac45669cULL, 0x803c1cd864033781ULL, 0x9ffbf04722750449ULL, // x 2^15441 ~= 10^4648 0x95a9949e04b8bff3ULL, 0x900aa3c2f02ac9d4ULL, 0xa28a151725a55e10ULL, 0xa31dcec2fef14b30ULL, // x 2^15627 ~= 10^4704 0x3acf9496dade0ce9ULL, 0xbd8ecf923d23bec0ULL, 0x5b8452af2302fe13ULL, 0xa64f605b4e3352cdULL, // x 2^15813 ~= 10^4760 0x6204425d2b58e822ULL, 0xdee162a8a1248550ULL, 0x82b84cabc828bf93ULL, 0xa990f3c09110c544ULL, // x 2^15999 ~= 10^4816 0x91a2658e0639f32ULL, 0x66fa2184cee0b861ULL, 0x8d29dd5122e4278dULL, 0xace2d92db0390b59ULL, // x 2^16185 ~= 10^4872 0x80acda113324758aULL, 0xded179c26d9ab828ULL, 0x58f8fde02c03a6c6ULL, 0xb045626fb50a35e7ULL, // x 2^16371 ~= 10^4928 0x7128a8aad239ce8fULL, 0x8737bd250290cd5bULL, 0xd950102978dbd0ffULL, 0xb3b8e2eda91a232dULL, // x 2^16557 ~= 10^4984 }; #endif #if SWIFT_DTOA_BINARY32_SUPPORT // Given a power `p`, this returns three values: // * 64-bit fractions `lower` and `upper` // * integer `exponent` // // The returned values satisfy the following: // ``` // lower * 2^exponent <= 10^p <= upper * 2^exponent // ``` // // Note: Max(*upper - *lower) = 3 static void intervalContainingPowerOf10_Binary32(int p, uint64_t *lower, uint64_t *upper, int *exponent) { if (p >= 0) { uint64_t base = powersOf10_Exact128[p * 2 + 1]; *lower = base; if (p < 28) { *upper = base; } else { *upper = base + 1; } } else { uint64_t base = powersOf10_negativeBinary32[p + 40]; *lower = base; *upper = base + 1; } *exponent = binaryExponentFor10ToThe(p); } #endif #if SWIFT_DTOA_BINARY64_SUPPORT // Given a power `p`, this returns three values: // * 128-bit fractions `lower` and `upper` // * integer `exponent` // // Note: This function takes on average about 10% of the total runtime // for formatting a double, as the general case here requires several // multiplications to accurately reconstruct the lower and upper // bounds. // // The returned values satisfy the following: // ``` // lower * 2^exponent <= 10^p <= upper * 2^exponent // ``` // // Note: Max(*upper - *lower) = 3 static void intervalContainingPowerOf10_Binary64(int p, swift_uint128_t *lower, swift_uint128_t *upper, int *exponent) { if (p >= 0 && p <= 55) { // Use one 64-bit exact value swift_uint128_t exact; initialize128WithHighLow64(exact, powersOf10_Exact128[p * 2 + 1], powersOf10_Exact128[p * 2]); *upper = exact; *lower = exact; *exponent = binaryExponentFor10ToThe(p); return; } // Multiply a 128-bit approximate value with a 64-bit exact value int index = p + 400; // Copy a pair of uint64_t into a swift_uint128_t int mainPower = index / 28; const uint64_t *base_p = powersOf10_Binary64 + mainPower * 2; swift_uint128_t base; initialize128WithHighLow64(base, base_p[1], base_p[0]); int extraPower = index - mainPower * 28; int baseExponent = binaryExponentFor10ToThe(p - extraPower); int e = baseExponent; if (extraPower == 0) { // We're using a tightly-rounded lower bound, so +1 gives a tightly-rounded upper bound *lower = base; #if HAVE_UINT128_T *upper = *lower + 1; #else *upper = *lower; upper->low += 1; #endif } else { // We need to multiply two values to get a lower bound int64_t extra = powersOf10_Exact128[extraPower * 2 + 1]; e += binaryExponentFor10ToThe(extraPower); *lower = multiply128x64RoundingDown(base, extra); // +2 is enough to get an upper bound // (Verified through exhaustive testing.) #if HAVE_UINT128_T *upper = *lower + 2; #else *upper = *lower; upper->low += 2; #endif } *exponent = e; } #endif #if SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT // As above, but returning 256-bit fractions suitable for // converting float80/binary128. static void intervalContainingPowerOf10_Binary128(int p, swift_uint256_t *lower, swift_uint256_t *upper, int *exponent) { if (p >= 0 && p <= 55) { // We have an exact form, return a zero-width interval // and avoid the multiplication. uint64_t exactLow = powersOf10_Exact128[p * 2]; uint64_t exactHigh = powersOf10_Exact128[p * 2 + 1]; initialize256WithHighMidLow64(*lower, exactHigh, exactLow, 0, 0); *upper = *lower; *exponent = binaryExponentFor10ToThe(p); return; } int index = p + 4984; const uint64_t *base_p = powersOf10_Binary128 + (index / 56) * 4; // The values in the table are always tightly rounded down, so we use that // directly as a lower bound. initialize256WithHighMidLow64(*lower, base_p[3], base_p[2], base_p[1], base_p[0]); int extraPower = index % 56; int e = binaryExponentFor10ToThe(p - extraPower); if (extraPower > 0) { swift_uint128_t extra; initialize128WithHighLow64(extra, powersOf10_Exact128[extraPower * 2 + 1], powersOf10_Exact128[extraPower * 2]); multiply256x128RoundingDown(lower, extra); e += binaryExponentFor10ToThe(extraPower); } // We could compute upper similar to lower using rounding-up // multiplications, but this is faster. // Since there's just one multiplication, we can prove that 2 is // enough to get a true upper bound, and we've verified (through // exhaustive testing) that the least-significant component never // wraps. *upper = *lower; #if HAVE_UINT128_T upper->low += 2; #else upper->elt[0] += 2; #endif *exponent = e; } #endif