mirror of
https://github.com/lemire/fast_double_parser.git
synced 2025-04-03 20:04:57 +00:00
Removing stats
This commit is contained in:
parent
4ee538be1b
commit
aae457d607
3 changed files with 0 additions and 378 deletions
|
@ -12,7 +12,6 @@ option(FAST_DOUBLE_PARSER_SANITIZE "Sanitize addresses" OFF)
|
|||
|
||||
set(headers include/fast_double_parser.h)
|
||||
set(unit_src tests/unit.cpp)
|
||||
set(stats_src tests/stats.cpp)
|
||||
|
||||
set(benchmark_src benchmarks/benchmark.cpp)
|
||||
|
||||
|
@ -30,8 +29,6 @@ target_include_directories(unit PUBLIC include)
|
|||
enable_testing()
|
||||
add_test(unit unit)
|
||||
|
||||
add_executable(stats ${stats_src})
|
||||
target_include_directories(stats PUBLIC include)
|
||||
|
||||
|
||||
|
||||
|
|
2
Makefile
2
Makefile
|
@ -19,8 +19,6 @@ benchmark: ./benchmarks/benchmark.cpp $(headers) $(LIBABSEIL) $(LIBDOUBLE) $(he
|
|||
unit: ./tests/unit.cpp $(headers)
|
||||
$(CXX) -O2 -std=c++14 -march=native -o unit ./tests/unit.cpp -Wall -Iinclude
|
||||
|
||||
stats: ./tests/stats.cpp $(headers)
|
||||
$(CXX) -O2 -std=c++14 -march=native -o stats ./tests/stats.cpp -Wall -Iinclude
|
||||
|
||||
bench: benchmark
|
||||
./benchmark
|
||||
|
|
373
tests/stats.cpp
373
tests/stats.cpp
|
@ -1,373 +0,0 @@
|
|||
#include "fast_double_parser.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
static inline uint64_t rng(uint64_t h) {
|
||||
h ^= h >> 33;
|
||||
h *= UINT64_C(0xff51afd7ed558ccd);
|
||||
h ^= h >> 33;
|
||||
h *= UINT64_C(0xc4ceb9fe1a85ec53);
|
||||
h ^= h >> 33;
|
||||
return h;
|
||||
}
|
||||
enum {
|
||||
FAST_PATH = 0,
|
||||
ZERO_PATH = 1,
|
||||
SLOW_PATH = 2,
|
||||
SLOWER_PATH = 3,
|
||||
FAILURE = 4,
|
||||
COULD_NOT_ROUND = 5,
|
||||
EXPONENT_FAILURE = 6,
|
||||
EARLY_STRTOD = 7
|
||||
};
|
||||
|
||||
size_t compute_float_64_stats(int64_t power, uint64_t i) {
|
||||
|
||||
// we start with a fast path
|
||||
// It was described in
|
||||
// Clinger WD. How to read floating point numbers accurately.
|
||||
// ACM SIGPLAN Notices. 1990
|
||||
if (-22 <= power && power <= 22 && i <= 9007199254740991) {
|
||||
return FAST_PATH;
|
||||
}
|
||||
if (i == 0) {
|
||||
return ZERO_PATH;
|
||||
}
|
||||
|
||||
// We are going to need to do some 64-bit arithmetic to get a more precise
|
||||
// product. We use a table lookup approach.
|
||||
fast_double_parser::components c = fast_double_parser::power_of_ten_components
|
||||
[power -
|
||||
FASTFLOAT_SMALLEST_POWER]; // safe because
|
||||
// power >= FASTFLOAT_SMALLEST_POWER
|
||||
// and power <= FASTFLOAT_LARGEST_POWER
|
||||
// we recover the mantissa of the power, it has a leading 1. It is always
|
||||
// rounded down.
|
||||
uint64_t factor_mantissa = c.mantissa;
|
||||
// We want the most significant bit of i to be 1. Shift if needed.
|
||||
int lz = fast_double_parser::leading_zeroes(i);
|
||||
i <<= lz;
|
||||
// We want the most significant 64 bits of the product. We know
|
||||
// this will be non-zero because the most significant bit of i is
|
||||
// 1.
|
||||
fast_double_parser::value128 product =
|
||||
fast_double_parser::full_multiplication(i, factor_mantissa);
|
||||
uint64_t lower = product.low;
|
||||
uint64_t upper = product.high;
|
||||
// We know that upper has at most one leading zero because
|
||||
// both i and factor_mantissa have a leading one. This means
|
||||
// that the result is at least as large as ((1<<63)*(1<<63))/(1<<64).
|
||||
|
||||
// As long as the first 9 bits of "upper" are not "1", then we
|
||||
// know that we have an exact computed value for the leading
|
||||
// 55 bits because any imprecision would play out as a +1, in
|
||||
// the worst case.
|
||||
// We expect this next branch to be rarely taken (say 1% of the time).
|
||||
// When (upper & 0x1FF) == 0x1FF, it can be common for
|
||||
// lower + i < lower to be true (proba. much higher than 1%).
|
||||
int answer = SLOW_PATH;
|
||||
if (unlikely((upper & 0x1FF) == 0x1FF) && (lower + i < lower)) {
|
||||
uint64_t factor_mantissa_low =
|
||||
fast_double_parser::mantissa_128[power - FASTFLOAT_SMALLEST_POWER];
|
||||
// next, we compute the 64-bit x 128-bit multiplication, getting a 192-bit
|
||||
// result (three 64-bit values)
|
||||
product = fast_double_parser::full_multiplication(i, factor_mantissa_low);
|
||||
uint64_t product_low = product.low;
|
||||
uint64_t product_middle2 = product.high;
|
||||
uint64_t product_middle1 = lower;
|
||||
uint64_t product_high = upper;
|
||||
uint64_t product_middle = product_middle1 + product_middle2;
|
||||
if (product_middle < product_middle1) {
|
||||
product_high++; // overflow carry
|
||||
}
|
||||
// we want to check whether mantissa *i + i would affect our result
|
||||
// This does happen, e.g. with 7.3177701707893310e+15
|
||||
if (((product_middle + 1 == 0) && ((product_high & 0x1FF) == 0x1FF) &&
|
||||
(product_low + i < product_low))) { // let us be prudent and bail out.
|
||||
return FAILURE;
|
||||
}
|
||||
upper = product_high;
|
||||
lower = product_middle;
|
||||
answer = SLOWER_PATH;
|
||||
}
|
||||
// The final mantissa should be 53 bits with a leading 1.
|
||||
// We shift it so that it occupies 54 bits with a leading 1.
|
||||
///////
|
||||
uint64_t upperbit = upper >> 63;
|
||||
uint64_t mantissa = upper >> (upperbit + 9);
|
||||
lz += int(1 ^ upperbit);
|
||||
// Here we have mantissa < (1<<54).
|
||||
|
||||
// We have to round to even. The "to even" part
|
||||
// is only a problem when we are right in between two floats
|
||||
// which we guard against.
|
||||
// If we have lots of trailing zeros, we may fall right between two
|
||||
// floating-point values.
|
||||
if (unlikely((lower == 0) && ((upper & 0x1FF) == 0) &&
|
||||
((mantissa & 3) == 1))) {
|
||||
return COULD_NOT_ROUND;
|
||||
}
|
||||
mantissa += mantissa & 1;
|
||||
mantissa >>= 1;
|
||||
// Here we have mantissa < (1<<53), unless there was an overflow
|
||||
if (mantissa >= (1ULL << 53)) {
|
||||
//////////
|
||||
// This will happen when parsing values such as 7.2057594037927933e+16
|
||||
////////
|
||||
mantissa = (1ULL << 52);
|
||||
lz--; // undo previous addition
|
||||
}
|
||||
mantissa &= ~(1ULL << 52);
|
||||
uint64_t real_exponent = c.exp - lz;
|
||||
// we have to check that real_exponent is in range, otherwise we bail out
|
||||
if (unlikely((real_exponent < 1) || (real_exponent > 2046))) {
|
||||
return EXPONENT_FAILURE;
|
||||
}
|
||||
mantissa |= real_exponent << 52;
|
||||
double d;
|
||||
memcpy(&d, &mantissa, sizeof(d));
|
||||
// printf("answer = %zu\n", answer);
|
||||
return answer;
|
||||
}
|
||||
|
||||
// parse the number at p
|
||||
size_t parse_number_stats(const char *p) {
|
||||
bool found_minus = (*p == '-');
|
||||
bool negative = false;
|
||||
if (found_minus) {
|
||||
++p;
|
||||
negative = true;
|
||||
if (!fast_double_parser::is_integer(
|
||||
*p)) { // a negative sign must be followed by an integer
|
||||
return false;
|
||||
}
|
||||
}
|
||||
const char *const start_digits = p;
|
||||
|
||||
uint64_t i; // an unsigned int avoids signed overflows (which are bad)
|
||||
if (*p == '0') { // 0 cannot be followed by an integer
|
||||
++p;
|
||||
if (fast_double_parser::
|
||||
is_integer(*p)) {
|
||||
return false;
|
||||
}
|
||||
i = 0;
|
||||
} else {
|
||||
if (!(fast_double_parser::is_integer(*p))) { // must start with an integer
|
||||
return false;
|
||||
}
|
||||
unsigned char digit = *p - '0';
|
||||
i = digit;
|
||||
p++;
|
||||
// the is_made_of_eight_digits_fast routine is unlikely to help here because
|
||||
// we rarely see large integer parts like 123456789
|
||||
while (fast_double_parser::is_integer(*p)) {
|
||||
digit = *p - '0';
|
||||
// a multiplication by 10 is cheaper than an arbitrary integer
|
||||
// multiplication
|
||||
i = 10 * i + digit; // might overflow, we will handle the overflow later
|
||||
++p;
|
||||
}
|
||||
}
|
||||
int64_t exponent = 0;
|
||||
const char *first_after_period = NULL;
|
||||
if ('.' == *p) {
|
||||
++p;
|
||||
first_after_period = p;
|
||||
if (fast_double_parser::is_integer(*p)) {
|
||||
unsigned char digit = *p - '0';
|
||||
++p;
|
||||
i = i * 10 + digit; // might overflow + multiplication by 10 is likely
|
||||
// cheaper than arbitrary mult.
|
||||
// we will handle the overflow later
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
while (fast_double_parser::is_integer(*p)) {
|
||||
unsigned char digit = *p - '0';
|
||||
++p;
|
||||
i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
|
||||
// because we have parse_highprecision_float later.
|
||||
}
|
||||
exponent = first_after_period - p;
|
||||
}
|
||||
int digit_count =
|
||||
int(p - start_digits - 1); // used later to guard against overflows
|
||||
int64_t exp_number = 0; // exponential part
|
||||
if (('e' == *p) || ('E' == *p)) {
|
||||
++p;
|
||||
bool neg_exp = false;
|
||||
if ('-' == *p) {
|
||||
neg_exp = true;
|
||||
++p;
|
||||
} else if ('+' == *p) {
|
||||
++p;
|
||||
}
|
||||
if (!fast_double_parser::is_integer(*p)) {
|
||||
return false;
|
||||
}
|
||||
unsigned char digit = *p - '0';
|
||||
exp_number = digit;
|
||||
p++;
|
||||
if (fast_double_parser::is_integer(*p)) {
|
||||
digit = *p - '0';
|
||||
exp_number = 10 * exp_number + digit;
|
||||
++p;
|
||||
}
|
||||
if (fast_double_parser::is_integer(*p)) {
|
||||
digit = *p - '0';
|
||||
exp_number = 10 * exp_number + digit;
|
||||
++p;
|
||||
}
|
||||
while (fast_double_parser::is_integer(*p)) {
|
||||
if (exp_number > 0x100000000) { // we need to check for overflows
|
||||
// we refuse to parse this
|
||||
return false;
|
||||
}
|
||||
digit = *p - '0';
|
||||
exp_number = 10 * exp_number + digit;
|
||||
++p;
|
||||
}
|
||||
exponent += (neg_exp ? -exp_number : exp_number);
|
||||
}
|
||||
// If we frequently had to deal with long strings of digits,
|
||||
// we could extend our code by using a 128-bit integer instead
|
||||
// of a 64-bit integer. However, this is uncommon.
|
||||
if (unlikely((digit_count >= 19))) { // this is uncommon
|
||||
// It is possible that the integer had an overflow.
|
||||
// We have to handle the case where we have 0.0000somenumber.
|
||||
const char *start = start_digits;
|
||||
while ((*start == '0') || (*start == '.')) {
|
||||
start++;
|
||||
}
|
||||
// we over-decrement by one when there is a '.'
|
||||
digit_count -= int(start - start_digits);
|
||||
if (digit_count >= 19) {
|
||||
// Chances are good that we had an overflow!
|
||||
// We start anew.
|
||||
// This will happen in the following examples:
|
||||
// 10000000000000000000000000000000000000000000e+308
|
||||
// 3.1415926535897932384626433832795028841971693993751
|
||||
//
|
||||
return EARLY_STRTOD;
|
||||
}
|
||||
}
|
||||
if (unlikely(exponent < FASTFLOAT_SMALLEST_POWER) ||
|
||||
(exponent > FASTFLOAT_LARGEST_POWER)) {
|
||||
// this is almost never going to get called!!!
|
||||
// exponent could be as low as 325
|
||||
return EARLY_STRTOD;
|
||||
}
|
||||
// from this point forward, exponent >= FASTFLOAT_SMALLEST_POWER and
|
||||
// exponent <= FASTFLOAT_LARGEST_POWER
|
||||
return compute_float_64_stats(exponent, i);
|
||||
}
|
||||
|
||||
void random_floats(bool ininterval) {
|
||||
printf("** Generating random floats ");
|
||||
if (ininterval)
|
||||
printf("in interval [0,1]\n");
|
||||
else
|
||||
printf(" (all normals)\n");
|
||||
size_t counters[] = {0, 0, 0, 0, 0, 0, 0, 0};
|
||||
uint64_t offset = 1190;
|
||||
size_t howmany = 10000000;
|
||||
for (size_t i = 1; i <= howmany; i++) {
|
||||
if ((i % 100000) == 0) {
|
||||
printf(".");
|
||||
fflush(NULL);
|
||||
}
|
||||
uint64_t x = rng(i + offset);
|
||||
double d;
|
||||
|
||||
if (ininterval) {
|
||||
x &= 9007199254740991;
|
||||
d = (double)x / 9007199254740992.;
|
||||
} else {
|
||||
::memcpy(&d, &x, sizeof(double));
|
||||
// paranoid
|
||||
while ((!std::isnormal(d)) || std::isnan(d) || std::isinf(d)) {
|
||||
offset++;
|
||||
x = rng(i + offset);
|
||||
::memcpy(&d, &x, sizeof(double));
|
||||
}
|
||||
}
|
||||
std::string s(64, '\0');
|
||||
auto written = std::snprintf(&s[0], s.size(), "%.*e", DBL_DIG + 1, d);
|
||||
s.resize(written);
|
||||
size_t state = parse_number_stats(s.data());
|
||||
counters[state] += 1;
|
||||
}
|
||||
size_t count = howmany;
|
||||
printf("==========\n");
|
||||
printf("fast path %zu (%.5f %%) \n", counters[FAST_PATH],
|
||||
counters[FAST_PATH] * 100. / count);
|
||||
printf("zero path %zu (%.5f %%) \n", counters[ZERO_PATH],
|
||||
counters[ZERO_PATH] * 100. / count);
|
||||
printf("slow path %zu (%.5f %%) \n", counters[SLOW_PATH],
|
||||
counters[SLOW_PATH] * 100. / count);
|
||||
printf("slower path %zu (%.5f %%) \n", counters[SLOWER_PATH],
|
||||
counters[SLOWER_PATH] * 100. / count);
|
||||
printf("failure %zu (%.5f %%) \n", counters[FAILURE],
|
||||
counters[FAILURE] * 100. / count);
|
||||
printf("could not round %zu (%.5f %%) \n", counters[COULD_NOT_ROUND],
|
||||
counters[COULD_NOT_ROUND] * 100. / count);
|
||||
printf("exponent failure %zu (%.5f %%) \n", counters[EXPONENT_FAILURE],
|
||||
counters[EXPONENT_FAILURE] * 100. / count);
|
||||
printf("early bail %zu (%.5f %%) \n", counters[EARLY_STRTOD],
|
||||
counters[EARLY_STRTOD] * 100. / count);
|
||||
}
|
||||
|
||||
void fileload(char *filename) {
|
||||
size_t counters[] = {0, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
std::ifstream inputfile(filename);
|
||||
if (!inputfile) {
|
||||
std::cerr << "can't open " << filename << std::endl;
|
||||
return;
|
||||
}
|
||||
std::string line;
|
||||
size_t count = 0;
|
||||
while (getline(inputfile, line)) {
|
||||
size_t state = parse_number_stats(line.data());
|
||||
counters[state] += 1;
|
||||
count++;
|
||||
}
|
||||
std::cout << "read " << count << " lines " << std::endl;
|
||||
|
||||
printf("==========\n");
|
||||
printf("fast path %zu (%.5f %%) \n", counters[FAST_PATH],
|
||||
counters[FAST_PATH] * 100. / count);
|
||||
printf("zero path %zu (%.5f %%) \n", counters[ZERO_PATH],
|
||||
counters[ZERO_PATH] * 100. / count);
|
||||
printf("slow path %zu (%.5f %%) \n", counters[SLOW_PATH],
|
||||
counters[SLOW_PATH] * 100. / count);
|
||||
printf("slower path %zu (%.5f %%) \n", counters[SLOWER_PATH],
|
||||
counters[SLOWER_PATH] * 100. / count);
|
||||
printf("failure %zu (%.5f %%) \n", counters[FAILURE],
|
||||
counters[FAILURE] * 100. / count);
|
||||
printf("could not round %zu (%.5f %%) \n", counters[COULD_NOT_ROUND],
|
||||
counters[COULD_NOT_ROUND] * 100. / count);
|
||||
printf("exponent failure %zu (%.5f %%) \n", counters[EXPONENT_FAILURE],
|
||||
counters[EXPONENT_FAILURE] * 100. / count);
|
||||
printf("early bail %zu (%.5f %%) \n", counters[EARLY_STRTOD],
|
||||
counters[EARLY_STRTOD] * 100. / count);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
if (argc == 1) {
|
||||
random_floats(false);
|
||||
random_floats(true);
|
||||
std::cout << "You can also provide a filename: it should contain one "
|
||||
"string per line corresponding to a number"
|
||||
<< std::endl;
|
||||
} else {
|
||||
fileload(argv[1]);
|
||||
}
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
Loading…
Add table
Reference in a new issue