fast_double_parser/benchmarks/benchmark.cpp


#include "absl/strings/charconv.h"
#include "absl/strings/numbers.h"
#include "fast_double_parser.h"

#include <algorithm>
#include <chrono>
#include <climits>
#include <cmath>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <ctype.h>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <sstream>
#include <stdio.h>
#include <vector>

#include "double-conversion/ieee.h"
#include "double-conversion/string-to-double.h"

double findmax_fast_double_parser(std::vector<std::string> s) {
  double answer = 0;
  double x;
  for (std::string st : s) {
    bool isok = fast_double_parser::parse_number(st.c_str(), &x);
    if (!isok)
      throw std::runtime_error("bug in findmax_fast_double_parser");
    answer = answer > x ? answer : x;
  }
  return answer;
}

double findmax_strtod(std::vector<std::string> s) {
  double answer = 0;
  double x = 0;
  for (std::string st : s) {
    char *pr = (char *)st.data();
    x = strtod(st.data(), &pr);
    if ((pr == nullptr) || (pr == st.data())) {
      throw std::runtime_error("bug in findmax_strtod");
    }
    answer = answer > x ? answer : x;
  }
  return answer;
}

double findmax_absl(std::vector<std::string> s) {
  double answer = 0;
  double x = 0;
  for (std::string st : s) {
    bool isok = absl::SimpleAtod(st, &x);
    if (!isok) {
      throw std::runtime_error("bug in findmax_absl");
    }
    answer = answer > x ? answer : x;
  }
  return answer;
}

double findmax_absl_from_chars(std::vector<std::string> s) {
  double answer = 0;
  double x = 0;
  for (std::string st : s) {
    auto[p, ec] = absl::from_chars(st.data(), st.data() + st.size(), x);
    if (p == st.data()) {
      throw std::runtime_error("bug in findmax_absl_from_chars");
    }
    answer = answer > x ? answer : x;
  }
  return answer;
}

double findmax_doubleconversion(std::vector<std::string> s) {
  double answer = 0;
  double x;
  int flags = double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES |
              double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK |
              double_conversion::StringToDoubleConverter::ALLOW_TRAILING_SPACES;
  double empty_string_value = 0.0;
  uc16 separator = double_conversion::StringToDoubleConverter::kNoSeparator;
  double_conversion::StringToDoubleConverter converter(
      flags, empty_string_value, double_conversion::Double::NaN(), NULL, NULL,
      separator);
  int processed_characters_count;
  for (std::string st : s) {
    x = converter.StringToDouble(st.data(), st.size(),
                                 &processed_characters_count);
    if (processed_characters_count == 0) {
      throw std::runtime_error("bug in findmax_doubleconversion");
    }
    answer = answer > x ? answer : x;
  }
  return answer;
}

// ulp distance
// Marc B. Reynolds, 2016-2019
// Public Domain under http://unlicense.org, see link for details.
// adapted by D. Lemire
inline uint64_t f64_ulp_dist(double a, double b) {
  uint64_t ua, ub;
  memcpy(&ua, &a, sizeof(ua));
  memcpy(&ub, &b, sizeof(ub));
  if ((int64_t)(ub ^ ua) >= 0)
    return (int64_t)(ua - ub) >= 0 ? (ua - ub) : (ub - ua);
  return ua + ub + 0x80000000;
}

void validate(std::vector<std::string> s) {

  double x, xref;
  int flags = double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES |
              double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK |
              double_conversion::StringToDoubleConverter::ALLOW_TRAILING_SPACES;
  double empty_string_value = 0.0;
  uc16 separator = double_conversion::StringToDoubleConverter::kNoSeparator;
  double_conversion::StringToDoubleConverter converter(
      flags, empty_string_value, double_conversion::Double::NaN(), NULL, NULL,
      separator);
  int processed_characters_count;
  for (std::string st : s) {
    xref = strtod(st.data(), NULL);
    x = converter.StringToDouble(st.data(), st.size(),
                                 &processed_characters_count);
    if (xref != x) {
      std::cerr << "double conversion disagrees" << std::endl;
      printf("double conversion: %.*e\n", DBL_DIG + 1, x);
      printf("reference: %.*e\n", DBL_DIG + 1, xref);
      printf("string: %s\n", st.c_str());
      printf("f64_ulp_dist = %d\n", (int)f64_ulp_dist(x, xref));
      throw std::runtime_error("double conversion disagrees");
    }
    absl::from_chars(st.data(), st.data() + st.size(), x);
    if (xref != x) {
      std::cerr << "abseil from_chars disagrees" << std::endl;
      printf("abseil from_chars: %.*e\n", DBL_DIG + 1, x);
      printf("reference: %.*e\n", DBL_DIG + 1, xref);
      printf("string: %s\n", st.c_str());
      printf("f64_ulp_dist = %d\n", (int)f64_ulp_dist(x, xref));
      throw std::runtime_error("abseil from_chars disagrees");
    }
    bool isok = absl::SimpleAtod(st, &x);
    if (!isok) {
      throw std::runtime_error("bug in absl::SimpleAtod");
    }
    if (xref != x) {
      std::cerr << "abseil disagrees" << std::endl;
      printf("abseil: %.*e\n", DBL_DIG + 1, x);
      printf("reference: %.*e\n", DBL_DIG + 1, xref);
      printf("string: %s\n", st.c_str());
      printf("f64_ulp_dist = %d\n", (int)f64_ulp_dist(x, xref));
      throw std::runtime_error("abseil disagrees");
    }
    isok = fast_double_parser::parse_number(st.c_str(), &x);
    if (!isok) {
      printf("fast_double_parser refused to parse %s\n", st.c_str());
      throw std::runtime_error("fast_double_parser refused to parse");
    }
    if (xref != x) {
      std::cerr << "fast_double_parser disagrees" << std::endl;
      printf("fast_double_parser: %.*e\n", DBL_DIG + 1, x);
      printf("reference: %.*e\n", DBL_DIG + 1, xref);
      printf("string: %s\n", st.c_str());
      printf("f64_ulp_dist = %d\n", (int)f64_ulp_dist(x, xref));
      throw std::runtime_error("fast_double_parser disagrees");
    }
  }
}

void printvec(std::vector<unsigned long long> evts, size_t volume) {
  printf("%.2f cycles  %.2f instr  %.4f branch miss  %.2f cache ref %.2f cache "
         "miss \n",
         evts[0] * 1.0 / volume, evts[1] * 1.0 / volume, evts[2] * 1.0 / volume,
         evts[3] * 1.0 / volume, evts[4] * 1.0 / volume);
}

void process(std::vector<std::string> lines, size_t volume) {
  double volumeMB = volume / (1024. * 1024.);
  // size_t howmany = lines.size();
  std::chrono::high_resolution_clock::time_point t1, t2;
  double dif, ts;
  for(size_t i = 0; i < 3; i++) {
    if(i > 0) printf("=== trial %zu ===\n", i);


    t1 = std::chrono::high_resolution_clock::now();
    ts = findmax_fast_double_parser(lines);
    t2 = std::chrono::high_resolution_clock::now();
    if (ts == 0)
      printf("bug\n");
    dif = std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count();
    if(i > 0 ) printf("fast_double_parser  %.2f MB/s\n", volumeMB * 1000000000 / dif);
    t1 = std::chrono::high_resolution_clock::now();
    ts = findmax_strtod(lines);
    t2 = std::chrono::high_resolution_clock::now();
    if (ts == 0)
      printf("bug\n");
    dif = std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count();
    if(i > 0 ) printf("strtod         %.2f MB/s\n", volumeMB * 1000000000 / dif);
    t1 = std::chrono::high_resolution_clock::now();
    ts = findmax_absl_from_chars(lines);
    t2 = std::chrono::high_resolution_clock::now();
    if (ts == 0)
      printf("bug\n");
    dif = std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count();
    if(i > 0 ) printf("abslfromch     %.2f MB/s\n", volumeMB * 1000000000 / dif);
    t1 = std::chrono::high_resolution_clock::now();
    ts = findmax_absl(lines);
    t2 = std::chrono::high_resolution_clock::now();
    if (ts == 0)
      printf("bug\n");
    dif = std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count();
    if(i > 0 ) printf("absl           %.2f MB/s\n", volumeMB * 1000000000 / dif);
    t1 = std::chrono::high_resolution_clock::now();
    ts = findmax_doubleconversion(lines);
    t2 = std::chrono::high_resolution_clock::now();
    if (ts == 0)
      printf("bug\n");
    dif = std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count();
    if(i > 0 ) printf("double-conv    %.2f MB/s\n", volumeMB * 1000000000 / dif);
    printf("\n\n");
  }
}

void fileload(char *filename) {

  std::ifstream inputfile(filename);
  if (!inputfile) {
    std::cerr << "can't open " << filename << std::endl;
    return;
  }
  std::string line;
  std::vector<std::string> lines;
  lines.reserve(10000); // let us reserve plenty of memory.
  size_t volume = 0;
  while (getline(inputfile, line)) {
    volume += line.size();
    lines.push_back(line);
  }
  std::cout << "read " << lines.size() << " lines " << std::endl;
  validate(lines);
  process(lines, volume);
}

void demo(size_t howmany) {
  std::cout << "parsing random integers in the range [0,1)" << std::endl;
  std::vector<std::string> lines;
  lines.reserve(howmany); // let us reserve plenty of memory.
  size_t volume = 0;
  for (size_t i = 0; i < howmany; i++) {
    double x = (double)rand() / RAND_MAX;
    std::string line = std::to_string(x);
    volume += line.size();
    lines.push_back(line);
  }
  validate(lines);
  process(lines, volume);
}

int main(int argc, char **argv) {
  if (argc == 1) {
    demo(100 * 1000);
    std::cout << "You can also provide a filename: it should contain one "
                 "string per line corresponding to a number"
              << std::endl;
  } else {
    fileload(argv[1]);
  }
}