Removing mistakenly merged contribution.

This commit is contained in:
Daniel Lemire 2020-10-02 13:01:14 -04:00
parent 0a1e601d18
commit 3224a04edb
3 changed files with 12 additions and 276 deletions

View file

@ -118,10 +118,6 @@ bool isok = fast_double_parser::parse_number(string, &x);
You must check the value of the boolean (`isok`): if it is false, then the function refused to parse.
There is also a (slightly slower) version of the function called `parse_numer_inplace` that takes
three parameters: the initial pointer, the end-of-string pointer and the reference to the result.
## Users
The library has been reimplemented in [Google wuffs](https://github.com/google/wuffs/).

View file

@ -1073,71 +1073,9 @@ static bool parse_float_strtod(const char *ptr, double *outDouble) {
return true;
}
// same as above but makes a temporary copy so as not to read past the end of the buffer when parsing in-place
static bool parse_float_strtod_copy(const char *ptr, const char *pe, double *outDouble) {
static constexpr size_t TEMP_STRING_MAX_LEN = 64;
char temp_stack[TEMP_STRING_MAX_LEN];
char * temp = temp_stack;
if ((size_t)(pe-ptr) >= TEMP_STRING_MAX_LEN) {
temp = (char*)malloc((size_t)(pe-ptr+1));
if(temp == nullptr) return false; // couldn't parse due to memory allocation failure.
}
std::memcpy(temp, ptr, (pe - ptr));
temp[(pe - ptr)] = 0;
char *endptr;
#ifdef __CYGWIN__
// workround for cygwin
static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
*outDouble = cygwin_strtod_l(ptr, &endptr, c_locale);
#elif defined(_WIN32)
static _locale_t c_locale = _create_locale(LC_ALL, "C");
*outDouble = _strtod_l(temp, &endptr, c_locale);
#else
static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
*outDouble = strtod_l(temp, &endptr, c_locale);
#endif
if(temp != temp_stack) free(temp);
if ((endptr == temp) || (!std::isfinite(*outDouble))) {
return false;
}
return true;
}
#if ( __cplusplus < 201703L )
template <char First, char... Rest>
struct one_of_impl
{
really_inline static bool call(char v)
{
return First == v || one_of_impl<Rest...>::call(v);
}
};
template<char First>
struct one_of_impl<First>
{
really_inline static bool call(char v)
{
return First == v;
}
};
template <char... Values>
really_inline bool is_one_of(char v)
{
return one_of_impl<Values...>::call(v);
}
#else
template <char... Values>
bool is_one_of(char v)
{
return ((v == Values) || ...);
}
#endif
// parse the number at p
template <char... DecSeparators>
WARN_UNUSED
really_inline bool parse_number_base(const char *p, double *outDouble) {
really_inline bool parse_number(const char *p, double *outDouble) {
const char *pinit = p;
bool found_minus = (*p == '-');
bool negative = false;
@ -1176,7 +1114,7 @@ really_inline bool parse_number_base(const char *p, double *outDouble) {
}
int64_t exponent = 0;
const char *first_after_period = NULL;
if (is_one_of<DecSeparators...>(*p)) {
if (*p == '.') {
++p;
first_after_period = p;
if (is_integer(*p)) {
@ -1240,7 +1178,7 @@ really_inline bool parse_number_base(const char *p, double *outDouble) {
// It is possible that the integer had an overflow.
// We have to handle the case where we have 0.0000somenumber.
const char *start = start_digits;
while (*start == '0' || is_one_of<DecSeparators...>(*start)) {
while (*start == '0' || (*start == '.')) {
start++;
}
// we over-decrement by one when there is a decimal separator
@ -1272,167 +1210,6 @@ really_inline bool parse_number_base(const char *p, double *outDouble) {
return true;
}
// this version should be used when parsing data in place from streams. It wont read past pe
// it is mostly a copy-paste from above
template <char... DecSeparators>
WARN_UNUSED
really_inline bool parse_number_inplace_base(const char *p, const char *pe, double *outDouble) {
if (!p || !pe || (p == pe))
return false;
const char *pinit = p;
bool found_minus = (*p == '-');
bool negative = false;
if (found_minus) {
++p;
if (p == pe)
return false;
negative = true;
if (!is_integer(*p)) { // a negative sign must be followed by an integer
return false;
}
}
const char *const start_digits = p;
uint64_t i; // an unsigned int avoids signed overflows (which are bad)
if (*p == '0') { // 0 cannot be followed by an integer
++p;
if (p == pe)
return false;
if (is_integer(*p)) {
return false;
}
i = 0;
}
else {
if (!(is_integer(*p))) { // must start with an integer
return false;
}
unsigned char digit = *p - '0';
i = digit;
p++;
// the is_made_of_eight_digits_fast routine is unlikely to help here because
// we rarely see large integer parts like 123456789
while ((p != pe) && is_integer(*p)) {
digit = *p - '0';
// a multiplication by 10 is cheaper than an arbitrary integer
// multiplication
i = 10 * i + digit; // might overflow, we will handle the overflow later
++p;
}
}
int64_t exponent = 0;
const char *first_after_period = NULL;
if ((p != pe) && is_one_of<DecSeparators...>(*p)) {
++p;
if (p == pe)
return false; // no digits after separator
first_after_period = p;
if (is_integer(*p)) {
unsigned char digit = *p - '0';
++p;
i = i * 10 + digit; // might overflow + multiplication by 10 is likely
// cheaper than arbitrary mult.
// we will handle the overflow later
}
else {
return false;
}
while ((p != pe) && is_integer(*p)) {
unsigned char digit = *p - '0';
++p;
i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
// because we have parse_highprecision_float later.
}
exponent = first_after_period - p;
}
int digit_count =
int(p - start_digits - 1); // used later to guard against overflows
int64_t exp_number = 0; // exponential part
if ((p != pe) && (('e' == *p) || ('E' == *p))) {
++p;
if (p == pe)
return false; // ill formed scientific notation
bool neg_exp = false;
if ('-' == *p) {
neg_exp = true;
++p;
}
else if ('+' == *p) {
++p;
}
if ((p == pe) || !is_integer(*p)) {
return false; // ill formed scientific notation
}
unsigned char digit = *p - '0';
exp_number = digit;
p++;
if ((p != pe) && is_integer(*p)) {
digit = *p - '0';
exp_number = 10 * exp_number + digit;
++p;
}
if ((p != pe) && is_integer(*p)) {
digit = *p - '0';
exp_number = 10 * exp_number + digit;
++p;
}
while ((p != pe) && is_integer(*p)) {
digit = *p - '0';
if (exp_number < 0x100000000) { // we need to check for overflows
exp_number = 10 * exp_number + digit;
}
++p;
}
exponent += (neg_exp ? -exp_number : exp_number);
}
// If we frequently had to deal with long strings of digits,
// we could extend our code by using a 128-bit integer instead
// of a 64-bit integer. However, this is uncommon.
if (unlikely((digit_count >= 19))) { // this is uncommon
// It is possible that the integer had an overflow.
// We have to handle the case where we have 0.0000somenumber.
const char *start = start_digits;
while (*start == '0' || is_one_of<DecSeparators...>(*start)) {
start++;
}
// we over-decrement by one when there is a decimal separator
digit_count -= int(start - start_digits);
if (digit_count >= 19) {
// Chances are good that we had an overflow!
// We start anew.
// This will happen in the following examples:
// 10000000000000000000000000000000000000000000e+308
// 3.1415926535897932384626433832795028841971693993751
//
return parse_float_strtod_copy(pinit, pe, outDouble);
}
}
if (unlikely(exponent < FASTFLOAT_SMALLEST_POWER) ||
(exponent > FASTFLOAT_LARGEST_POWER)) {
// this is almost never going to get called!!!
// exponent could be as low as 325
return parse_float_strtod_copy(pinit, pe, outDouble);
}
// from this point forward, exponent >= FASTFLOAT_SMALLEST_POWER and
// exponent <= FASTFLOAT_LARGEST_POWER
bool success = true;
*outDouble = compute_float_64(exponent, i, negative, &success);
if (!success) {
// we are almost never going to get here.
return parse_float_strtod_copy(pinit, pe, outDouble);
}
return true;
}
typedef bool (*parser_function_t)(const char *p, double *outDouble);
typedef bool(*inplace_parser_function_t)(const char *p, const char *pe, double *outDouble);
constexpr parser_function_t parse_number WARN_UNUSED = parse_number_base<'.'>;
constexpr inplace_parser_function_t parse_number_inplace WARN_UNUSED = parse_number_inplace_base<'.'>;
} // namespace fast_double_parser
#endif

View file

@ -66,25 +66,6 @@ void check(double d) {
}
}
void check_inplace(double d) {
std::string s(64, '\0');
auto written = std::snprintf(&s[0], s.size(), "%.*e", DBL_DIG + 1, d);
s.resize(written);
double x;
bool isok = fast_double_parser::parse_number_inplace(s.data(), s.data()+written, &x);
if (!isok) {
printf("fast_double_parser_inplace refused to parse %s\n", s.c_str());
throw std::runtime_error("fast_double_parser refused to parse");
}
if (d != x) {
std::cerr << "fast_double_parser disagrees" << std::endl;
printf("fast_double_parser: %.*e\n", DBL_DIG + 1, x);
printf("reference: %.*e\n", DBL_DIG + 1, d);
printf("string: %s\n", s.c_str());
printf("f64_ulp_dist = %d\n", (int)f64_ulp_dist(x, d));
throw std::runtime_error("fast_double_parser disagrees");
}
}
void check_string(std::string s) {
double x;
@ -110,41 +91,15 @@ void check_string(std::string s) {
}
}
void check_string_inplace(std::string s) {
double x;
bool isok = fast_double_parser::parse_number_inplace(s.data(),s.data()+s.size(), &x);
if (!isok) {
printf("fast_double_parser refused to parse %s\n", s.c_str());
throw std::runtime_error("fast_double_parser refused to parse");
}
#ifdef _WIN32
static _locale_t c_locale = _create_locale(LC_ALL, "C");
double d = _strtod_l(s.data(), nullptr, c_locale);
#else
static locale_t c_locale = newlocale(LC_ALL_MASK, "C", NULL);
double d = strtod_l(s.data(), nullptr, c_locale);
#endif
if (d != x) {
std::cerr << "fast_double_parser disagrees" << std::endl;
printf("fast_double_parser: %.*e\n", DBL_DIG + 1, x);
printf("reference: %.*e\n", DBL_DIG + 1, d);
printf("string: %s\n", s.c_str());
printf("f64_ulp_dist = %d\n", (int)f64_ulp_dist(x, d));
throw std::runtime_error("fast_double_parser disagrees");
}
}
void unit_tests() {
for (std::string s : {"7.3177701707893310e+15","1e23", "9007199254740995","7e23"}) {
check_string(s);
check_string_inplace(s);
}
for (double d : {-65.613616999999977, 7.2057594037927933e+16, 1.0e-308,
0.1e-308, 0.01e-307, 1.79769e+308, 2.22507e-308,
-1.79769e+308, -2.22507e-308, 1e-308}) {
check(d);
check_inplace(d);
}
uint64_t offset = 1190;
size_t howmany = 10000000;
@ -163,7 +118,6 @@ void unit_tests() {
::memcpy(&d, &x, sizeof(double));
}
check(d);
check_inplace(d);
}
printf("Unit tests ok\n");
@ -249,6 +203,14 @@ void issue13() {
std::cout << "zero maps to zero" << std::endl;
}
void issue32() {
std::string a = "-0";
double x;
bool ok = fast_double_parser::parse_number(a.c_str(), &x);
if(!ok) throw std::runtime_error("could not parse -zero.");
if(x != 0) throw std::runtime_error("-zero does not map to zero.");
std::cout << "0zero maps to zero" << std::endl;
}
void issue23() {
std::string a = "0e+42949672970";
@ -312,6 +274,7 @@ int main() {
Assert(basic_test_64bit("0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000022250738585072008890245868760858598876504231122409594654935248025624400092282356951787758888037591552642309780950434312085877387158357291821993020294379224223559819827501242041788969571311791082261043971979604000454897391938079198936081525613113376149842043271751033627391549782731594143828136275113838604094249464942286316695429105080201815926642134996606517803095075913058719846423906068637102005108723282784678843631944515866135041223479014792369585208321597621066375401613736583044193603714778355306682834535634005074073040135602968046375918583163124224521599262546494300836851861719422417646455137135420132217031370496583210154654068035397417906022589503023501937519773030945763173210852507299305089761582519159720757232455434770912461317493580281734466552734375", 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000022250738585072008890245868760858598876504231122409594654935248025624400092282356951787758888037591552642309780950434312085877387158357291821993020294379224223559819827501242041788969571311791082261043971979604000454897391938079198936081525613113376149842043271751033627391549782731594143828136275113838604094249464942286316695429105080201815926642134996606517803095075913058719846423906068637102005108723282784678843631944515866135041223479014792369585208321597621066375401613736583044193603714778355306682834535634005074073040135602968046375918583163124224521599262546494300836851861719422417646455137135420132217031370496583210154654068035397417906022589503023501937519773030945763173210852507299305089761582519159720757232455434770912461317493580281734466552734375));
issue23();
issue32();
issue23_2();
unit_tests();
for (int p = -306; p <= 308; p++) {