diff --git a/README.md b/README.md index 3080c14..00a81c1 100644 --- a/README.md +++ b/README.md @@ -118,10 +118,6 @@ bool isok = fast_double_parser::parse_number(string, &x); You must check the value of the boolean (`isok`): if it is false, then the function refused to parse. - -There is also a (slightly slower) version of the function called `parse_numer_inplace` that takes -three parameters: the initial pointer, the end-of-string pointer and the reference to the result. - ## Users The library has been reimplemented in [Google wuffs](https://github.com/google/wuffs/). diff --git a/include/fast_double_parser.h b/include/fast_double_parser.h index 6776383..d53c641 100644 --- a/include/fast_double_parser.h +++ b/include/fast_double_parser.h @@ -1073,71 +1073,9 @@ static bool parse_float_strtod(const char *ptr, double *outDouble) { return true; } -// same as above but makes a temporary copy so as not to read past the end of the buffer when parsing in-place -static bool parse_float_strtod_copy(const char *ptr, const char *pe, double *outDouble) { - static constexpr size_t TEMP_STRING_MAX_LEN = 64; - char temp_stack[TEMP_STRING_MAX_LEN]; - char * temp = temp_stack; - if ((size_t)(pe-ptr) >= TEMP_STRING_MAX_LEN) { - temp = (char*)malloc((size_t)(pe-ptr+1)); - if(temp == nullptr) return false; // couldn't parse due to memory allocation failure. - } - std::memcpy(temp, ptr, (pe - ptr)); - temp[(pe - ptr)] = 0; - char *endptr; -#ifdef __CYGWIN__ - // workround for cygwin - static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL); - *outDouble = cygwin_strtod_l(ptr, &endptr, c_locale); -#elif defined(_WIN32) - static _locale_t c_locale = _create_locale(LC_ALL, "C"); - *outDouble = _strtod_l(temp, &endptr, c_locale); -#else - static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL); - *outDouble = strtod_l(temp, &endptr, c_locale); -#endif - if(temp != temp_stack) free(temp); - if ((endptr == temp) || (!std::isfinite(*outDouble))) { - return false; - } - return true; -} - - -#if ( __cplusplus < 201703L ) -template -struct one_of_impl -{ - really_inline static bool call(char v) - { - return First == v || one_of_impl::call(v); - } -}; -template -struct one_of_impl -{ - really_inline static bool call(char v) - { - return First == v; - } -}; -template -really_inline bool is_one_of(char v) -{ - return one_of_impl::call(v); -} -#else -template -bool is_one_of(char v) -{ - return ((v == Values) || ...); -} -#endif - // parse the number at p -template WARN_UNUSED -really_inline bool parse_number_base(const char *p, double *outDouble) { +really_inline bool parse_number(const char *p, double *outDouble) { const char *pinit = p; bool found_minus = (*p == '-'); bool negative = false; @@ -1176,7 +1114,7 @@ really_inline bool parse_number_base(const char *p, double *outDouble) { } int64_t exponent = 0; const char *first_after_period = NULL; - if (is_one_of(*p)) { + if (*p == '.') { ++p; first_after_period = p; if (is_integer(*p)) { @@ -1240,7 +1178,7 @@ really_inline bool parse_number_base(const char *p, double *outDouble) { // It is possible that the integer had an overflow. // We have to handle the case where we have 0.0000somenumber. const char *start = start_digits; - while (*start == '0' || is_one_of(*start)) { + while (*start == '0' || (*start == '.')) { start++; } // we over-decrement by one when there is a decimal separator @@ -1272,167 +1210,6 @@ really_inline bool parse_number_base(const char *p, double *outDouble) { return true; } -// this version should be used when parsing data in place from streams. It wont read past pe -// it is mostly a copy-paste from above -template -WARN_UNUSED -really_inline bool parse_number_inplace_base(const char *p, const char *pe, double *outDouble) { - if (!p || !pe || (p == pe)) - return false; - const char *pinit = p; - bool found_minus = (*p == '-'); - bool negative = false; - if (found_minus) { - ++p; - if (p == pe) - return false; - - negative = true; - if (!is_integer(*p)) { // a negative sign must be followed by an integer - return false; - } - } - const char *const start_digits = p; - - uint64_t i; // an unsigned int avoids signed overflows (which are bad) - if (*p == '0') { // 0 cannot be followed by an integer - ++p; - if (p == pe) - return false; - if (is_integer(*p)) { - return false; - } - i = 0; - } - else { - if (!(is_integer(*p))) { // must start with an integer - return false; - } - unsigned char digit = *p - '0'; - i = digit; - p++; - // the is_made_of_eight_digits_fast routine is unlikely to help here because - // we rarely see large integer parts like 123456789 - while ((p != pe) && is_integer(*p)) { - digit = *p - '0'; - // a multiplication by 10 is cheaper than an arbitrary integer - // multiplication - i = 10 * i + digit; // might overflow, we will handle the overflow later - ++p; - } - } - int64_t exponent = 0; - const char *first_after_period = NULL; - if ((p != pe) && is_one_of(*p)) { - ++p; - if (p == pe) - return false; // no digits after separator - first_after_period = p; - if (is_integer(*p)) { - unsigned char digit = *p - '0'; - ++p; - i = i * 10 + digit; // might overflow + multiplication by 10 is likely - // cheaper than arbitrary mult. - // we will handle the overflow later - } - else { - return false; - } - while ((p != pe) && is_integer(*p)) { - unsigned char digit = *p - '0'; - ++p; - i = i * 10 + digit; // in rare cases, this will overflow, but that's ok - // because we have parse_highprecision_float later. - } - exponent = first_after_period - p; - } - int digit_count = - int(p - start_digits - 1); // used later to guard against overflows - int64_t exp_number = 0; // exponential part - if ((p != pe) && (('e' == *p) || ('E' == *p))) { - ++p; - if (p == pe) - return false; // ill formed scientific notation - bool neg_exp = false; - if ('-' == *p) { - neg_exp = true; - ++p; - } - else if ('+' == *p) { - ++p; - } - if ((p == pe) || !is_integer(*p)) { - return false; // ill formed scientific notation - } - unsigned char digit = *p - '0'; - exp_number = digit; - p++; - if ((p != pe) && is_integer(*p)) { - digit = *p - '0'; - exp_number = 10 * exp_number + digit; - ++p; - } - if ((p != pe) && is_integer(*p)) { - digit = *p - '0'; - exp_number = 10 * exp_number + digit; - ++p; - } - while ((p != pe) && is_integer(*p)) { - digit = *p - '0'; - if (exp_number < 0x100000000) { // we need to check for overflows - exp_number = 10 * exp_number + digit; - } - ++p; - } - exponent += (neg_exp ? -exp_number : exp_number); - } - // If we frequently had to deal with long strings of digits, - // we could extend our code by using a 128-bit integer instead - // of a 64-bit integer. However, this is uncommon. - if (unlikely((digit_count >= 19))) { // this is uncommon - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - const char *start = start_digits; - while (*start == '0' || is_one_of(*start)) { - start++; - } - // we over-decrement by one when there is a decimal separator - digit_count -= int(start - start_digits); - if (digit_count >= 19) { - // Chances are good that we had an overflow! - // We start anew. - // This will happen in the following examples: - // 10000000000000000000000000000000000000000000e+308 - // 3.1415926535897932384626433832795028841971693993751 - // - return parse_float_strtod_copy(pinit, pe, outDouble); - } - } - if (unlikely(exponent < FASTFLOAT_SMALLEST_POWER) || - (exponent > FASTFLOAT_LARGEST_POWER)) { - // this is almost never going to get called!!! - // exponent could be as low as 325 - return parse_float_strtod_copy(pinit, pe, outDouble); - } - // from this point forward, exponent >= FASTFLOAT_SMALLEST_POWER and - // exponent <= FASTFLOAT_LARGEST_POWER - bool success = true; - *outDouble = compute_float_64(exponent, i, negative, &success); - if (!success) { - // we are almost never going to get here. - return parse_float_strtod_copy(pinit, pe, outDouble); - } - return true; -} - -typedef bool (*parser_function_t)(const char *p, double *outDouble); - -typedef bool(*inplace_parser_function_t)(const char *p, const char *pe, double *outDouble); - -constexpr parser_function_t parse_number WARN_UNUSED = parse_number_base<'.'>; - -constexpr inplace_parser_function_t parse_number_inplace WARN_UNUSED = parse_number_inplace_base<'.'>; - } // namespace fast_double_parser #endif diff --git a/tests/unit.cpp b/tests/unit.cpp index 7d51a83..775e577 100644 --- a/tests/unit.cpp +++ b/tests/unit.cpp @@ -66,25 +66,6 @@ void check(double d) { } } -void check_inplace(double d) { - std::string s(64, '\0'); - auto written = std::snprintf(&s[0], s.size(), "%.*e", DBL_DIG + 1, d); - s.resize(written); - double x; - bool isok = fast_double_parser::parse_number_inplace(s.data(), s.data()+written, &x); - if (!isok) { - printf("fast_double_parser_inplace refused to parse %s\n", s.c_str()); - throw std::runtime_error("fast_double_parser refused to parse"); - } - if (d != x) { - std::cerr << "fast_double_parser disagrees" << std::endl; - printf("fast_double_parser: %.*e\n", DBL_DIG + 1, x); - printf("reference: %.*e\n", DBL_DIG + 1, d); - printf("string: %s\n", s.c_str()); - printf("f64_ulp_dist = %d\n", (int)f64_ulp_dist(x, d)); - throw std::runtime_error("fast_double_parser disagrees"); - } -} void check_string(std::string s) { double x; @@ -110,41 +91,15 @@ void check_string(std::string s) { } } -void check_string_inplace(std::string s) { - double x; - bool isok = fast_double_parser::parse_number_inplace(s.data(),s.data()+s.size(), &x); - if (!isok) { - printf("fast_double_parser refused to parse %s\n", s.c_str()); - throw std::runtime_error("fast_double_parser refused to parse"); - } -#ifdef _WIN32 - static _locale_t c_locale = _create_locale(LC_ALL, "C"); - double d = _strtod_l(s.data(), nullptr, c_locale); -#else - static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL); - double d = strtod_l(s.data(), nullptr, c_locale); -#endif - if (d != x) { - std::cerr << "fast_double_parser disagrees" << std::endl; - printf("fast_double_parser: %.*e\n", DBL_DIG + 1, x); - printf("reference: %.*e\n", DBL_DIG + 1, d); - printf("string: %s\n", s.c_str()); - printf("f64_ulp_dist = %d\n", (int)f64_ulp_dist(x, d)); - throw std::runtime_error("fast_double_parser disagrees"); - } -} - void unit_tests() { for (std::string s : {"7.3177701707893310e+15","1e23", "9007199254740995","7e23"}) { check_string(s); - check_string_inplace(s); } for (double d : {-65.613616999999977, 7.2057594037927933e+16, 1.0e-308, 0.1e-308, 0.01e-307, 1.79769e+308, 2.22507e-308, -1.79769e+308, -2.22507e-308, 1e-308}) { check(d); - check_inplace(d); } uint64_t offset = 1190; size_t howmany = 10000000; @@ -163,7 +118,6 @@ void unit_tests() { ::memcpy(&d, &x, sizeof(double)); } check(d); - check_inplace(d); } printf("Unit tests ok\n"); @@ -249,6 +203,14 @@ void issue13() { std::cout << "zero maps to zero" << std::endl; } +void issue32() { + std::string a = "-0"; + double x; + bool ok = fast_double_parser::parse_number(a.c_str(), &x); + if(!ok) throw std::runtime_error("could not parse -zero."); + if(x != 0) throw std::runtime_error("-zero does not map to zero."); + std::cout << "0zero maps to zero" << std::endl; +} void issue23() { std::string a = "0e+42949672970"; @@ -312,6 +274,7 @@ int main() { Assert(basic_test_64bit("0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000022250738585072008890245868760858598876504231122409594654935248025624400092282356951787758888037591552642309780950434312085877387158357291821993020294379224223559819827501242041788969571311791082261043971979604000454897391938079198936081525613113376149842043271751033627391549782731594143828136275113838604094249464942286316695429105080201815926642134996606517803095075913058719846423906068637102005108723282784678843631944515866135041223479014792369585208321597621066375401613736583044193603714778355306682834535634005074073040135602968046375918583163124224521599262546494300836851861719422417646455137135420132217031370496583210154654068035397417906022589503023501937519773030945763173210852507299305089761582519159720757232455434770912461317493580281734466552734375", 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000022250738585072008890245868760858598876504231122409594654935248025624400092282356951787758888037591552642309780950434312085877387158357291821993020294379224223559819827501242041788969571311791082261043971979604000454897391938079198936081525613113376149842043271751033627391549782731594143828136275113838604094249464942286316695429105080201815926642134996606517803095075913058719846423906068637102005108723282784678843631944515866135041223479014792369585208321597621066375401613736583044193603714778355306682834535634005074073040135602968046375918583163124224521599262546494300836851861719422417646455137135420132217031370496583210154654068035397417906022589503023501937519773030945763173210852507299305089761582519159720757232455434770912461317493580281734466552734375)); issue23(); + issue32(); issue23_2(); unit_tests(); for (int p = -306; p <= 308; p++) {