mirror of
https://github.com/lemire/fast_double_parser.git
synced 2025-04-03 20:04:57 +00:00
Saving...
This commit is contained in:
parent
0619359d97
commit
31f4039d6d
9 changed files with 111759 additions and 51 deletions
6
.gitmodules
vendored
Normal file
6
.gitmodules
vendored
Normal file
|
@ -0,0 +1,6 @@
|
|||
[submodule "benchmark/dependencies/abseil-cpp"]
|
||||
path = benchmarks/dependencies/abseil-cpp
|
||||
url = https://github.com/abseil/abseil-cpp.git
|
||||
[submodule "benchmark/dependencies/double-conversion"]
|
||||
path = benchmarks/dependencies/double-conversion
|
||||
url = https://github.com/google/double-conversion.git
|
36
Makefile
Normal file
36
Makefile
Normal file
|
@ -0,0 +1,36 @@
|
|||
all: benchmark unit
|
||||
|
||||
LIBABSEIL:=benchmarks/dependencies/abseil-cpp/build/absl/strings/libabsl_strings.a
|
||||
LIBABSEIL_INCLUDE:=-Ibenchmarks/dependencies/abseil-cpp
|
||||
LIBABSEIL_LIBS:=-Lbenchmarks/dependencies/abseil-cpp/build/absl/base -Lbenchmarks/dependencies/abseil-cpp/build/absl/strings -Lbenchmarks/dependencies/abseil-cpp/build/absl/numeric/ -labsl_strings -labsl_raw_logging_internal -labsl_throw_delegate -labsl_int128
|
||||
|
||||
|
||||
LIBDOUBLE:=benchmarks/dependencies/double-conversion/libdouble-conversion.a
|
||||
LIBDOUBLE_INCLUDE:=-Ibenchmarks/dependencies/double-conversion
|
||||
LIBDOUBLE_LIBS:=-Lbenchmarks/dependencies/double-conversion -ldouble-conversion
|
||||
|
||||
|
||||
headers:= include/fast_double_parser.h
|
||||
|
||||
benchmark: ./benchmarks/benchmark.cpp $(headers) $(LIBABSEIL) $(LIBDOUBLE) $(headers)
|
||||
$(CXX) -O2 -std=c++17 -march=haswell -o benchmark ./benchmarks/benchmark.cpp -Wall -Iinclude $(LIBABSEIL_INCLUDE) $(LIBDOUBLE_INCLUDE) $(LIBDOUBLE_LIBS) $(LIBABSEIL_LIBS) -lm
|
||||
|
||||
|
||||
unit: ./tests/unit.cpp $(headers)
|
||||
$(CXX) -O2 -std=c++17 -march=native -o unit ./tests/unit.cpp -Wall -Iinclude
|
||||
|
||||
|
||||
|
||||
submodules:
|
||||
-git submodule update --init --recursive
|
||||
-touch submodules
|
||||
|
||||
|
||||
$(LIBABSEIL):submodules
|
||||
rm -r -f benchmarks/dependencies/abseil-cpp/build && cd benchmarks/dependencies/abseil-cpp && mkdir build && cd build && cmake .. -DABSL_RUN_TESTS=OFF -DABSL_USE_GOOGLETEST_HEAD=OFF -DCMAKE_CXX_STANDARD=17 -DCMAKE_BUILD_TYPE=Release && cmake --build . --target base && cmake --build . --target strings
|
||||
|
||||
$(LIBDOUBLE):submodules
|
||||
cd benchmarks/dependencies/double-conversion && cmake . -DCMAKE_BUILD_TYPE=Release && make
|
||||
|
||||
clean:
|
||||
rm -r -f benchmark unit benchmarks/dependencies/abseil-cpp/build
|
69
README.md
69
README.md
|
@ -3,7 +3,7 @@ Fast function to parse strings containing decimal numbers into double-precision
|
|||
|
||||
## Why should I expect this function to be faster?
|
||||
|
||||
Parsing strings into binary numbers (IEEE 754) is surprisingly difficult. Parsing a single number can take hundreds of instructions and CPU cycles, if not thousands.
|
||||
Parsing strings into binary numbers (IEEE 754) is surprisingly difficult. Parsing a single number can take hundreds of instructions and CPU cycles, if not thousands. It is relatively easy to parse numbers faster if you sacrifice accuracy (e.g., tolerate 1 ULP errors), but we are interested in "perfect" parsing.
|
||||
|
||||
Instead of trying to solve the general problem, we cover what we believe are the most common scenarios, providing really fast parsing. We fall back on the standard library for the difficult cases.
|
||||
|
||||
|
@ -16,16 +16,75 @@ You should be able to just drop the header file into your project, it is a head
|
|||
If you want to run our benchmarks, you should have
|
||||
|
||||
- Linux or macOS
|
||||
- A recent cmake (one dependency requires cmake 3.5 or better)
|
||||
- A recent compiler
|
||||
- git
|
||||
- A recent C++ compiler with make
|
||||
- A recent cmake (one dependency requires cmake 3.5 or better) is necessary for the benchmarks
|
||||
|
||||
## Usage (benchmarks)
|
||||
|
||||
```
|
||||
make
|
||||
./unit
|
||||
./benchmark
|
||||
./benchmark data/canada.txt
|
||||
./benchmark benchmarks/data/canada.txt
|
||||
```
|
||||
## Sample results
|
||||
|
||||
|
||||
```
|
||||
$ ./benchmark
|
||||
parsing random integers in the range [0,1)
|
||||
|
||||
|
||||
=== trial 1 ===
|
||||
fast_double_parser 460.64 MB/s
|
||||
strtod 186.90 MB/s
|
||||
abslfromch 168.61 MB/s
|
||||
absl 140.62 MB/s
|
||||
double-conv 206.15 MB/s
|
||||
|
||||
|
||||
=== trial 2 ===
|
||||
fast_double_parser 449.76 MB/s
|
||||
strtod 174.59 MB/s
|
||||
abslfromch 152.68 MB/s
|
||||
absl 157.52 MB/s
|
||||
double-conv 193.97 MB/s
|
||||
|
||||
|
||||
```
|
||||
|
||||
```
|
||||
$ ./benchmark benchmarks/data/canada.txt
|
||||
read 111126 lines
|
||||
|
||||
|
||||
=== trial 1 ===
|
||||
fast_double_parser 662.01 MB/s
|
||||
strtod 69.73 MB/s
|
||||
abslfromch 341.74 MB/s
|
||||
absl 325.23 MB/s
|
||||
double-conv 249.68 MB/s
|
||||
|
||||
|
||||
=== trial 2 ===
|
||||
fast_double_parser 611.56 MB/s
|
||||
strtod 69.53 MB/s
|
||||
abslfromch 330.00 MB/s
|
||||
absl 328.45 MB/s
|
||||
double-conv 243.90 MB/s
|
||||
```
|
||||
|
||||
## API
|
||||
|
||||
The current API is simple enough:
|
||||
|
||||
```C++
|
||||
#include "fast_double_parser.h" // the file is in the include directory
|
||||
|
||||
|
||||
double x;
|
||||
char * string = ...
|
||||
bool isok = fast_double_parser::parse_number(string, &x);
|
||||
```
|
||||
|
||||
## Credit
|
||||
|
|
274
benchmarks/benchmark.cpp
Normal file
274
benchmarks/benchmark.cpp
Normal file
|
@ -0,0 +1,274 @@
|
|||
|
||||
#include "absl/strings/charconv.h"
|
||||
#include "absl/strings/numbers.h"
|
||||
#include "fast_double_parser.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <climits>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <ctype.h>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
#include "double-conversion/ieee.h"
|
||||
#include "double-conversion/string-to-double.h"
|
||||
|
||||
double findmax_fast_double_parser(std::vector<std::string> s) {
|
||||
double answer = 0;
|
||||
double x;
|
||||
for (std::string st : s) {
|
||||
bool isok = fast_double_parser::parse_number(st.c_str(), &x);
|
||||
if (!isok)
|
||||
throw std::runtime_error("bug in findmax_fast_double_parser");
|
||||
answer = answer > x ? answer : x;
|
||||
}
|
||||
return answer;
|
||||
}
|
||||
|
||||
double findmax_strtod(std::vector<std::string> s) {
|
||||
double answer = 0;
|
||||
double x = 0;
|
||||
for (std::string st : s) {
|
||||
char *pr = (char *)st.data();
|
||||
x = strtod(st.data(), &pr);
|
||||
if ((pr == nullptr) || (pr == st.data())) {
|
||||
throw std::runtime_error("bug in findmax_strtod");
|
||||
}
|
||||
answer = answer > x ? answer : x;
|
||||
}
|
||||
return answer;
|
||||
}
|
||||
|
||||
double findmax_absl(std::vector<std::string> s) {
|
||||
double answer = 0;
|
||||
double x = 0;
|
||||
for (std::string st : s) {
|
||||
bool isok = absl::SimpleAtod(st, &x);
|
||||
if (!isok) {
|
||||
throw std::runtime_error("bug in findmax_absl");
|
||||
}
|
||||
answer = answer > x ? answer : x;
|
||||
}
|
||||
return answer;
|
||||
}
|
||||
|
||||
double findmax_absl_from_chars(std::vector<std::string> s) {
|
||||
double answer = 0;
|
||||
double x = 0;
|
||||
for (std::string st : s) {
|
||||
auto[p, ec] = absl::from_chars(st.data(), st.data() + st.size(), x);
|
||||
if (p == st.data()) {
|
||||
throw std::runtime_error("bug in findmax_absl_from_chars");
|
||||
}
|
||||
answer = answer > x ? answer : x;
|
||||
}
|
||||
return answer;
|
||||
}
|
||||
|
||||
double findmax_doubleconversion(std::vector<std::string> s) {
|
||||
double answer = 0;
|
||||
double x;
|
||||
int flags = double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES |
|
||||
double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK |
|
||||
double_conversion::StringToDoubleConverter::ALLOW_TRAILING_SPACES;
|
||||
double empty_string_value = 0.0;
|
||||
uc16 separator = double_conversion::StringToDoubleConverter::kNoSeparator;
|
||||
double_conversion::StringToDoubleConverter converter(
|
||||
flags, empty_string_value, double_conversion::Double::NaN(), NULL, NULL,
|
||||
separator);
|
||||
int processed_characters_count;
|
||||
for (std::string st : s) {
|
||||
x = converter.StringToDouble(st.data(), st.size(),
|
||||
&processed_characters_count);
|
||||
if (processed_characters_count == 0) {
|
||||
throw std::runtime_error("bug in findmax_doubleconversion");
|
||||
}
|
||||
answer = answer > x ? answer : x;
|
||||
}
|
||||
return answer;
|
||||
}
|
||||
|
||||
// ulp distance
|
||||
// Marc B. Reynolds, 2016-2019
|
||||
// Public Domain under http://unlicense.org, see link for details.
|
||||
// adapted by D. Lemire
|
||||
inline uint64_t f64_ulp_dist(double a, double b) {
|
||||
uint64_t ua, ub;
|
||||
memcpy(&ua, &a, sizeof(ua));
|
||||
memcpy(&ub, &b, sizeof(ub));
|
||||
if ((int64_t)(ub ^ ua) >= 0)
|
||||
return (int64_t)(ua - ub) >= 0 ? (ua - ub) : (ub - ua);
|
||||
return ua + ub + 0x80000000;
|
||||
}
|
||||
|
||||
void validate(std::vector<std::string> s) {
|
||||
|
||||
double x, xref;
|
||||
int flags = double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES |
|
||||
double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK |
|
||||
double_conversion::StringToDoubleConverter::ALLOW_TRAILING_SPACES;
|
||||
double empty_string_value = 0.0;
|
||||
uc16 separator = double_conversion::StringToDoubleConverter::kNoSeparator;
|
||||
double_conversion::StringToDoubleConverter converter(
|
||||
flags, empty_string_value, double_conversion::Double::NaN(), NULL, NULL,
|
||||
separator);
|
||||
int processed_characters_count;
|
||||
for (std::string st : s) {
|
||||
xref = strtod(st.data(), NULL);
|
||||
x = converter.StringToDouble(st.data(), st.size(),
|
||||
&processed_characters_count);
|
||||
if (xref != x) {
|
||||
std::cerr << "double conversion disagrees" << std::endl;
|
||||
printf("double conversion: %.*e\n", DBL_DIG + 1, x);
|
||||
printf("reference: %.*e\n", DBL_DIG + 1, xref);
|
||||
printf("string: %s\n", st.c_str());
|
||||
printf("f64_ulp_dist = %d\n", (int)f64_ulp_dist(x, xref));
|
||||
throw std::runtime_error("double conversion disagrees");
|
||||
}
|
||||
absl::from_chars(st.data(), st.data() + st.size(), x);
|
||||
if (xref != x) {
|
||||
std::cerr << "abseil from_chars disagrees" << std::endl;
|
||||
printf("abseil from_chars: %.*e\n", DBL_DIG + 1, x);
|
||||
printf("reference: %.*e\n", DBL_DIG + 1, xref);
|
||||
printf("string: %s\n", st.c_str());
|
||||
printf("f64_ulp_dist = %d\n", (int)f64_ulp_dist(x, xref));
|
||||
throw std::runtime_error("abseil from_chars disagrees");
|
||||
}
|
||||
bool isok = absl::SimpleAtod(st, &x);
|
||||
if (!isok) {
|
||||
throw std::runtime_error("bug in absl::SimpleAtod");
|
||||
}
|
||||
if (xref != x) {
|
||||
std::cerr << "abseil disagrees" << std::endl;
|
||||
printf("abseil: %.*e\n", DBL_DIG + 1, x);
|
||||
printf("reference: %.*e\n", DBL_DIG + 1, xref);
|
||||
printf("string: %s\n", st.c_str());
|
||||
printf("f64_ulp_dist = %d\n", (int)f64_ulp_dist(x, xref));
|
||||
throw std::runtime_error("abseil disagrees");
|
||||
}
|
||||
isok = fast_double_parser::parse_number(st.c_str(), &x);
|
||||
if (!isok) {
|
||||
printf("fast_double_parser refused to parse %s\n", st.c_str());
|
||||
throw std::runtime_error("fast_double_parser refused to parse");
|
||||
}
|
||||
if (xref != x) {
|
||||
std::cerr << "fast_double_parser disagrees" << std::endl;
|
||||
printf("fast_double_parser: %.*e\n", DBL_DIG + 1, x);
|
||||
printf("reference: %.*e\n", DBL_DIG + 1, xref);
|
||||
printf("string: %s\n", st.c_str());
|
||||
printf("f64_ulp_dist = %d\n", (int)f64_ulp_dist(x, xref));
|
||||
throw std::runtime_error("fast_double_parser disagrees");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void printvec(std::vector<unsigned long long> evts, size_t volume) {
|
||||
printf("%.2f cycles %.2f instr %.4f branch miss %.2f cache ref %.2f cache "
|
||||
"miss \n",
|
||||
evts[0] * 1.0 / volume, evts[1] * 1.0 / volume, evts[2] * 1.0 / volume,
|
||||
evts[3] * 1.0 / volume, evts[4] * 1.0 / volume);
|
||||
}
|
||||
|
||||
void process(std::vector<std::string> lines, size_t volume) {
|
||||
double volumeMB = volume / (1024. * 1024.);
|
||||
// size_t howmany = lines.size();
|
||||
std::chrono::high_resolution_clock::time_point t1, t2;
|
||||
double dif, ts;
|
||||
for(size_t i = 0; i < 3; i++) {
|
||||
if(i > 0) printf("=== trial %zu ===\n", i);
|
||||
|
||||
|
||||
|
||||
t1 = std::chrono::high_resolution_clock::now();
|
||||
ts = findmax_fast_double_parser(lines);
|
||||
t2 = std::chrono::high_resolution_clock::now();
|
||||
if (ts == 0)
|
||||
printf("bug\n");
|
||||
dif = std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count();
|
||||
if(i > 0 ) printf("fast_double_parser %.2f MB/s\n", volumeMB * 1000000000 / dif);
|
||||
t1 = std::chrono::high_resolution_clock::now();
|
||||
ts = findmax_strtod(lines);
|
||||
t2 = std::chrono::high_resolution_clock::now();
|
||||
if (ts == 0)
|
||||
printf("bug\n");
|
||||
dif = std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count();
|
||||
if(i > 0 ) printf("strtod %.2f MB/s\n", volumeMB * 1000000000 / dif);
|
||||
t1 = std::chrono::high_resolution_clock::now();
|
||||
ts = findmax_absl_from_chars(lines);
|
||||
t2 = std::chrono::high_resolution_clock::now();
|
||||
if (ts == 0)
|
||||
printf("bug\n");
|
||||
dif = std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count();
|
||||
if(i > 0 ) printf("abslfromch %.2f MB/s\n", volumeMB * 1000000000 / dif);
|
||||
t1 = std::chrono::high_resolution_clock::now();
|
||||
ts = findmax_absl(lines);
|
||||
t2 = std::chrono::high_resolution_clock::now();
|
||||
if (ts == 0)
|
||||
printf("bug\n");
|
||||
dif = std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count();
|
||||
if(i > 0 ) printf("absl %.2f MB/s\n", volumeMB * 1000000000 / dif);
|
||||
t1 = std::chrono::high_resolution_clock::now();
|
||||
ts = findmax_doubleconversion(lines);
|
||||
t2 = std::chrono::high_resolution_clock::now();
|
||||
if (ts == 0)
|
||||
printf("bug\n");
|
||||
dif = std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count();
|
||||
if(i > 0 ) printf("double-conv %.2f MB/s\n", volumeMB * 1000000000 / dif);
|
||||
printf("\n\n");
|
||||
}
|
||||
}
|
||||
|
||||
void fileload(char *filename) {
|
||||
|
||||
std::ifstream inputfile(filename);
|
||||
if (!inputfile) {
|
||||
std::cerr << "can't open " << filename << std::endl;
|
||||
return;
|
||||
}
|
||||
std::string line;
|
||||
std::vector<std::string> lines;
|
||||
lines.reserve(10000); // let us reserve plenty of memory.
|
||||
size_t volume = 0;
|
||||
while (getline(inputfile, line)) {
|
||||
volume += line.size();
|
||||
lines.push_back(line);
|
||||
}
|
||||
std::cout << "read " << lines.size() << " lines " << std::endl;
|
||||
validate(lines);
|
||||
process(lines, volume);
|
||||
}
|
||||
|
||||
void demo(size_t howmany) {
|
||||
std::cout << "parsing random integers in the range [0,1)" << std::endl;
|
||||
std::vector<std::string> lines;
|
||||
lines.reserve(howmany); // let us reserve plenty of memory.
|
||||
size_t volume = 0;
|
||||
for (size_t i = 0; i < howmany; i++) {
|
||||
double x = (double)rand() / RAND_MAX;
|
||||
std::string line = std::to_string(x);
|
||||
volume += line.size();
|
||||
lines.push_back(line);
|
||||
}
|
||||
validate(lines);
|
||||
process(lines, volume);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
if (argc == 1) {
|
||||
demo(100 * 1000);
|
||||
std::cout << "You can also provide a filename: it should contain one "
|
||||
"string per line corresponding to a number"
|
||||
<< std::endl;
|
||||
} else {
|
||||
fileload(argv[1]);
|
||||
}
|
||||
}
|
111126
benchmarks/data/canada.txt
Normal file
111126
benchmarks/data/canada.txt
Normal file
File diff suppressed because it is too large
Load diff
1
benchmarks/dependencies/abseil-cpp
Submodule
1
benchmarks/dependencies/abseil-cpp
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit d936052d32a5b7ca08b0199a6724724aea432309
|
1
benchmarks/dependencies/double-conversion
Submodule
1
benchmarks/dependencies/double-conversion
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit f4cb2384efa55dee0e6652f8674b05763441ab09
|
|
@ -18,17 +18,18 @@ namespace fast_double_parser {
|
|||
#ifdef _MSC_VER
|
||||
#ifndef really_inline
|
||||
#define really_inline __forceinline
|
||||
#endif
|
||||
#endif // really_inline
|
||||
#ifndef unlikely
|
||||
#define unlikely(x) x
|
||||
#endif
|
||||
#else
|
||||
#endif // unlikely
|
||||
#else // _MSC_VER
|
||||
#ifndef unlikely
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
#endif
|
||||
#endif // unlikely
|
||||
#ifndef really_inline
|
||||
#define really_inline __attribute__((always_inline))
|
||||
#endif
|
||||
#endif // really_inline
|
||||
#endif // _MSC_VER
|
||||
|
||||
/* result might be undefined when input_num is zero */
|
||||
int leading_zeroes(uint64_t input_num) {
|
||||
|
@ -45,23 +46,17 @@ int leading_zeroes(uint64_t input_num) {
|
|||
#endif // _MSC_VER
|
||||
}
|
||||
|
||||
|
||||
// Precomputed powers of ten from 10^0 to 10^22. These
|
||||
// can be represented exactly using the double type.
|
||||
static const double power_of_ten[] = {
|
||||
1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11,
|
||||
1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22};
|
||||
|
||||
|
||||
}
|
||||
|
||||
static inline bool is_integer(char c) {
|
||||
return (c >= '0' && c <= '9');
|
||||
// this gets compiled to (uint8_t)(c - '0') <= 9 on all decent compilers
|
||||
}
|
||||
|
||||
|
||||
|
||||
// the mantissas of powers of ten from -308 to 308, extended out to sixty four
|
||||
// bits
|
||||
// This struct will likely get padded to 16 bytes.
|
||||
|
@ -1039,7 +1034,7 @@ const uint64_t mantissa_128[] = {0x419ea3bd35385e2d,
|
|||
// We assume that power is in the [FASTFLOAT_SMALLEST_POWER,
|
||||
// FASTFLOAT_LARGEST_POWER] interval: the caller is responsible for this check.
|
||||
// We assume that i is non-zero: the caller is responsible for this check.
|
||||
static really_inline double compute_float_64(int64_t power, uint64_t i,
|
||||
really_inline double compute_float_64(int64_t power, uint64_t i,
|
||||
bool negative, bool *success) {
|
||||
// we start with a fast path
|
||||
if (-22 <= power && power <= 22 && i <= 9007199254740991) {
|
||||
|
@ -1161,10 +1156,8 @@ static really_inline double compute_float_64(int64_t power, uint64_t i,
|
|||
return d;
|
||||
}
|
||||
|
||||
static bool parse_float_strtod(const uint8_t *const buf, const uint32_t offset,
|
||||
const char *float_end, double *outDouble) {
|
||||
static bool parse_float_strtod(const char *ptr, double *outDouble) {
|
||||
char *endptr;
|
||||
char *ptr = (char *)(buf + offset);
|
||||
*outDouble = strtod(ptr, &endptr);
|
||||
// Some libraries will set errno = ERANGE when the value is subnormal,
|
||||
// yet we may want to be able to parse subnormal values.
|
||||
|
@ -1196,10 +1189,9 @@ is_not_structural_or_whitespace_or_exponent_or_decimal(unsigned char c) {
|
|||
return structural_or_whitespace_or_exponent_or_decimal_negated[c];
|
||||
}
|
||||
|
||||
|
||||
// parse the number at p
|
||||
really_inline bool parse_number(const char *p, double *outDouble) {
|
||||
const char *p = reinterpret_cast<const char *>(buf + offset);
|
||||
really_inline bool parse_number(const char *p, double *outDouble) {
|
||||
const char *pinit = p;
|
||||
bool found_minus = (*p == '-');
|
||||
bool negative = false;
|
||||
if (found_minus) {
|
||||
|
@ -1296,38 +1288,40 @@ really_inline bool parse_number(const char *p, double *outDouble) {
|
|||
}
|
||||
exponent += (neg_exp ? -exp_number : exp_number);
|
||||
}
|
||||
if (unlikely((digit_count >= 19))) { // this is uncommon
|
||||
// It is possible that the integer had an overflow.
|
||||
// We have to handle the case where we have 0.0000somenumber.
|
||||
const char *start = start_digits;
|
||||
while ((*start == '0') || (*start == '.')) {
|
||||
start++;
|
||||
}
|
||||
// we over-decrement by one when there is a '.'
|
||||
digit_count -= (start - start_digits);
|
||||
if (digit_count >= 19) {
|
||||
return parse_float_strtod(buf, offset, p, outDouble);
|
||||
}
|
||||
if (unlikely((digit_count >= 19))) { // this is uncommon
|
||||
// It is possible that the integer had an overflow.
|
||||
// We have to handle the case where we have 0.0000somenumber.
|
||||
const char *start = start_digits;
|
||||
while ((*start == '0') || (*start == '.')) {
|
||||
start++;
|
||||
}
|
||||
if (unlikely(exponent < FASTFLOAT_SMALLEST_POWER) ||
|
||||
(exponent > FASTFLOAT_LARGEST_POWER)) {
|
||||
// this is almost never going to get called!!!
|
||||
// exponent could be as low as 325
|
||||
return parse_float_strtod(buf, offset, p, outDouble);
|
||||
// we over-decrement by one when there is a '.'
|
||||
digit_count -= (start - start_digits);
|
||||
if (digit_count >= 19) {
|
||||
return parse_float_strtod(pinit, outDouble);
|
||||
}
|
||||
// from this point forward, exponent >= FASTFLOAT_SMALLEST_POWER and
|
||||
// exponent <= FASTFLOAT_LARGEST_POWER
|
||||
double d = 0;
|
||||
if (likely(i != 0)) {
|
||||
bool success = true;
|
||||
d = compute_float_64(exponent, i, negative, &success);
|
||||
if (!success) {
|
||||
// we are almost never going to get here.
|
||||
return parse_float_strtod(buf, offset, p, outDouble);
|
||||
}
|
||||
}
|
||||
if (unlikely(exponent < FASTFLOAT_SMALLEST_POWER) ||
|
||||
(exponent > FASTFLOAT_LARGEST_POWER)) {
|
||||
// this is almost never going to get called!!!
|
||||
// exponent could be as low as 325
|
||||
return parse_float_strtod(pinit, outDouble);
|
||||
}
|
||||
// from this point forward, exponent >= FASTFLOAT_SMALLEST_POWER and
|
||||
// exponent <= FASTFLOAT_LARGEST_POWER
|
||||
double d = 0;
|
||||
if (i != 0) {
|
||||
bool success = true;
|
||||
d = compute_float_64(exponent, i, negative, &success);
|
||||
if (!success) {
|
||||
// we are almost never going to get here.
|
||||
return parse_float_strtod(pinit, outDouble);
|
||||
}
|
||||
*outDouble = d;
|
||||
}
|
||||
*outDouble = d;
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace fast_double_parser
|
||||
|
||||
#endif
|
211
tests/unit.cpp
Normal file
211
tests/unit.cpp
Normal file
|
@ -0,0 +1,211 @@
|
|||
#include "fast_double_parser.h"
|
||||
|
||||
//#include <algorithm>
|
||||
/*#include <chrono>
|
||||
#include <climits>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <ctype.h>
|
||||
|
||||
#include <iomanip>
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <vector>*/
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
// ulp distance
|
||||
// Marc B. Reynolds, 2016-2019
|
||||
// Public Domain under http://unlicense.org, see link for details.
|
||||
// adapted by D. Lemire
|
||||
inline uint64_t f64_ulp_dist(double a, double b) {
|
||||
uint64_t ua, ub;
|
||||
memcpy(&ua, &a, sizeof(ua));
|
||||
memcpy(&ub, &b, sizeof(ub));
|
||||
if ((int64_t)(ub ^ ua) >= 0)
|
||||
return (int64_t)(ua - ub) >= 0 ? (ua - ub) : (ub - ua);
|
||||
return ua + ub + 0x80000000;
|
||||
}
|
||||
|
||||
static inline uint64_t rng(uint64_t h) {
|
||||
h ^= h >> 33;
|
||||
h *= UINT64_C(0xff51afd7ed558ccd);
|
||||
h ^= h >> 33;
|
||||
h *= UINT64_C(0xc4ceb9fe1a85ec53);
|
||||
h ^= h >> 33;
|
||||
return h;
|
||||
}
|
||||
|
||||
std::string randomfloats(uint64_t howmany) {
|
||||
std::stringstream out;
|
||||
uint64_t offset = 1190;
|
||||
for (size_t i = 1; i <= howmany; i++) {
|
||||
uint64_t x = rng(i + offset);
|
||||
double d;
|
||||
::memcpy(&d, &x, sizeof(double));
|
||||
// paranoid
|
||||
while (!std::isfinite(d)) {
|
||||
offset++;
|
||||
x = rng(i + offset);
|
||||
::memcpy(&d, &x, sizeof(double));
|
||||
}
|
||||
out << std::setprecision(DBL_DIG) << d << " ";
|
||||
}
|
||||
return out.str();
|
||||
}
|
||||
|
||||
void check(double d) {
|
||||
std::string s(64, '\0');
|
||||
auto written = std::snprintf(&s[0], s.size(), "%.*e", DBL_DIG + 1, d);
|
||||
s.resize(written);
|
||||
double x;
|
||||
bool isok = fast_double_parser::parse_number(s.data(), &x);
|
||||
if (!isok) {
|
||||
printf("fast_double_parser refused to parse %s\n", s.c_str());
|
||||
throw std::runtime_error("fast_double_parser refused to parse");
|
||||
}
|
||||
if (d != x) {
|
||||
std::cerr << "fast_double_parser disagrees" << std::endl;
|
||||
printf("fast_double_parser: %.*e\n", DBL_DIG + 1, x);
|
||||
printf("reference: %.*e\n", DBL_DIG + 1, d);
|
||||
printf("string: %s\n", s.c_str());
|
||||
printf("f64_ulp_dist = %d\n", (int)f64_ulp_dist(x, d));
|
||||
throw std::runtime_error("fast_double_parser disagrees");
|
||||
}
|
||||
}
|
||||
|
||||
void unit_tests() {
|
||||
printf("Running unit tests\n");
|
||||
for (double d :
|
||||
{-65.613616999999977, 7.2057594037927933e+16, 1.0e-308, 0.1e-308, 0.01e-307, 1.79769e+308,
|
||||
2.22507e-308, -1.79769e+308, -2.22507e-308, 1e-308}) {
|
||||
check(d);
|
||||
}
|
||||
uint64_t offset = 1190;
|
||||
size_t howmany = 10000000;
|
||||
for (size_t i = 1; i <= howmany; i++) {
|
||||
if ((i % 10000) == 0) {
|
||||
printf(".");
|
||||
fflush(NULL);
|
||||
}
|
||||
uint64_t x = rng(i + offset);
|
||||
double d;
|
||||
::memcpy(&d, &x, sizeof(double));
|
||||
// paranoid
|
||||
while ((!std::isnormal(d)) || std::isnan(d) || std::isinf(d)) {
|
||||
offset++;
|
||||
x = rng(i + offset);
|
||||
::memcpy(&d, &x, sizeof(double));
|
||||
}
|
||||
check(d);
|
||||
}
|
||||
|
||||
printf("Unit tests ok\n");
|
||||
}
|
||||
static const double testing_power_of_ten[] = {
|
||||
1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300, 1e-299,
|
||||
1e-298, 1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291, 1e-290,
|
||||
1e-289, 1e-288, 1e-287, 1e-286, 1e-285, 1e-284, 1e-283, 1e-282, 1e-281,
|
||||
1e-280, 1e-279, 1e-278, 1e-277, 1e-276, 1e-275, 1e-274, 1e-273, 1e-272,
|
||||
1e-271, 1e-270, 1e-269, 1e-268, 1e-267, 1e-266, 1e-265, 1e-264, 1e-263,
|
||||
1e-262, 1e-261, 1e-260, 1e-259, 1e-258, 1e-257, 1e-256, 1e-255, 1e-254,
|
||||
1e-253, 1e-252, 1e-251, 1e-250, 1e-249, 1e-248, 1e-247, 1e-246, 1e-245,
|
||||
1e-244, 1e-243, 1e-242, 1e-241, 1e-240, 1e-239, 1e-238, 1e-237, 1e-236,
|
||||
1e-235, 1e-234, 1e-233, 1e-232, 1e-231, 1e-230, 1e-229, 1e-228, 1e-227,
|
||||
1e-226, 1e-225, 1e-224, 1e-223, 1e-222, 1e-221, 1e-220, 1e-219, 1e-218,
|
||||
1e-217, 1e-216, 1e-215, 1e-214, 1e-213, 1e-212, 1e-211, 1e-210, 1e-209,
|
||||
1e-208, 1e-207, 1e-206, 1e-205, 1e-204, 1e-203, 1e-202, 1e-201, 1e-200,
|
||||
1e-199, 1e-198, 1e-197, 1e-196, 1e-195, 1e-194, 1e-193, 1e-192, 1e-191,
|
||||
1e-190, 1e-189, 1e-188, 1e-187, 1e-186, 1e-185, 1e-184, 1e-183, 1e-182,
|
||||
1e-181, 1e-180, 1e-179, 1e-178, 1e-177, 1e-176, 1e-175, 1e-174, 1e-173,
|
||||
1e-172, 1e-171, 1e-170, 1e-169, 1e-168, 1e-167, 1e-166, 1e-165, 1e-164,
|
||||
1e-163, 1e-162, 1e-161, 1e-160, 1e-159, 1e-158, 1e-157, 1e-156, 1e-155,
|
||||
1e-154, 1e-153, 1e-152, 1e-151, 1e-150, 1e-149, 1e-148, 1e-147, 1e-146,
|
||||
1e-145, 1e-144, 1e-143, 1e-142, 1e-141, 1e-140, 1e-139, 1e-138, 1e-137,
|
||||
1e-136, 1e-135, 1e-134, 1e-133, 1e-132, 1e-131, 1e-130, 1e-129, 1e-128,
|
||||
1e-127, 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120, 1e-119,
|
||||
1e-118, 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111, 1e-110,
|
||||
1e-109, 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102, 1e-101,
|
||||
1e-100, 1e-99, 1e-98, 1e-97, 1e-96, 1e-95, 1e-94, 1e-93, 1e-92,
|
||||
1e-91, 1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 1e-85, 1e-84, 1e-83,
|
||||
1e-82, 1e-81, 1e-80, 1e-79, 1e-78, 1e-77, 1e-76, 1e-75, 1e-74,
|
||||
1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65,
|
||||
1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56,
|
||||
1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47,
|
||||
1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39, 1e-38,
|
||||
1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29,
|
||||
1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20,
|
||||
1e-19, 1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11,
|
||||
1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2,
|
||||
1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7,
|
||||
1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16,
|
||||
1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23, 1e24, 1e25,
|
||||
1e26, 1e27, 1e28, 1e29, 1e30, 1e31, 1e32, 1e33, 1e34,
|
||||
1e35, 1e36, 1e37, 1e38, 1e39, 1e40, 1e41, 1e42, 1e43,
|
||||
1e44, 1e45, 1e46, 1e47, 1e48, 1e49, 1e50, 1e51, 1e52,
|
||||
1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59, 1e60, 1e61,
|
||||
1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69, 1e70,
|
||||
1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, 1e79,
|
||||
1e80, 1e81, 1e82, 1e83, 1e84, 1e85, 1e86, 1e87, 1e88,
|
||||
1e89, 1e90, 1e91, 1e92, 1e93, 1e94, 1e95, 1e96, 1e97,
|
||||
1e98, 1e99, 1e100, 1e101, 1e102, 1e103, 1e104, 1e105, 1e106,
|
||||
1e107, 1e108, 1e109, 1e110, 1e111, 1e112, 1e113, 1e114, 1e115,
|
||||
1e116, 1e117, 1e118, 1e119, 1e120, 1e121, 1e122, 1e123, 1e124,
|
||||
1e125, 1e126, 1e127, 1e128, 1e129, 1e130, 1e131, 1e132, 1e133,
|
||||
1e134, 1e135, 1e136, 1e137, 1e138, 1e139, 1e140, 1e141, 1e142,
|
||||
1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, 1e150, 1e151,
|
||||
1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159, 1e160,
|
||||
1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168, 1e169,
|
||||
1e170, 1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177, 1e178,
|
||||
1e179, 1e180, 1e181, 1e182, 1e183, 1e184, 1e185, 1e186, 1e187,
|
||||
1e188, 1e189, 1e190, 1e191, 1e192, 1e193, 1e194, 1e195, 1e196,
|
||||
1e197, 1e198, 1e199, 1e200, 1e201, 1e202, 1e203, 1e204, 1e205,
|
||||
1e206, 1e207, 1e208, 1e209, 1e210, 1e211, 1e212, 1e213, 1e214,
|
||||
1e215, 1e216, 1e217, 1e218, 1e219, 1e220, 1e221, 1e222, 1e223,
|
||||
1e224, 1e225, 1e226, 1e227, 1e228, 1e229, 1e230, 1e231, 1e232,
|
||||
1e233, 1e234, 1e235, 1e236, 1e237, 1e238, 1e239, 1e240, 1e241,
|
||||
1e242, 1e243, 1e244, 1e245, 1e246, 1e247, 1e248, 1e249, 1e250,
|
||||
1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258, 1e259,
|
||||
1e260, 1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, 1e268,
|
||||
1e269, 1e270, 1e271, 1e272, 1e273, 1e274, 1e275, 1e276, 1e277,
|
||||
1e278, 1e279, 1e280, 1e281, 1e282, 1e283, 1e284, 1e285, 1e286,
|
||||
1e287, 1e288, 1e289, 1e290, 1e291, 1e292, 1e293, 1e294, 1e295,
|
||||
1e296, 1e297, 1e298, 1e299, 1e300, 1e301, 1e302, 1e303, 1e304,
|
||||
1e305, 1e306, 1e307, 1e308};
|
||||
|
||||
int main() {
|
||||
unit_tests();
|
||||
for (int p = -306; p <= 308; p++) {
|
||||
if (p == 23)
|
||||
p++;
|
||||
printf(".");
|
||||
fflush(NULL);
|
||||
bool success;
|
||||
double d = fast_double_parser::compute_float_64(p, 1, false, &success);
|
||||
if (!success) {
|
||||
printf("failed to parse\n");
|
||||
printf(" 10 ^ %d ", p);
|
||||
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
if (d != testing_power_of_ten[p + 307]) {
|
||||
printf(" 10 ^ %d ", p);
|
||||
|
||||
printf("bad parsing\n");
|
||||
printf("got: %.*e\n", DBL_DIG + 1, d);
|
||||
printf("reference: %.*e\n", DBL_DIG + 1, testing_power_of_ten[p + 307]);
|
||||
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
// double last_normal =std::numeric_limits<double>::min();// 2.22507e-308;
|
||||
|
||||
printf("Good!\n");
|
||||
return EXIT_SUCCESS;
|
||||
}
|
Loading…
Add table
Reference in a new issue