Compare commits

...

27 commits

Author SHA1 Message Date
Daniel Lemire
bc93aee338
Merge pull request from BYVoid/bench
Add benchmark binary to Bazel BUILD.
2024-10-31 11:14:09 -04:00
Carbo Kuo
f5096c7a94 Add benchmark binary to Bazel BUILD.
To build it, run:
bazel build --compilation_mode=opt //:benchmark

To run the benchmark, run:
bazel-bin/benchmark benchmarks/data/canada.txt

Result from my computer:

read 111126 lines

=== trial 1 ===
fast_double_parser  1315.70 MB/s
strtod         722.10 MB/s
abslfromch     810.26 MB/s
absl           799.89 MB/s
double-conv    396.05 MB/s

=== trial 2 ===
fast_double_parser  1416.58 MB/s
strtod         751.43 MB/s
abslfromch     841.83 MB/s
absl           838.71 MB/s
double-conv    415.33 MB/s
2024-10-31 10:39:29 -04:00
Daniel Lemire
22ac46158c
Merge pull request from BYVoid/bazel-build
Add a simple Bazel module and build file.
2024-10-31 09:48:55 -04:00
Carbo Kuo
3aaddfe336 Add a simple Bazel module and build file. 2024-10-31 08:44:43 -04:00
Daniel Lemire
252029ddac
Update README.md 2024-05-07 10:01:17 -04:00
Daniel Lemire
d971fa76ee update 2024-05-07 09:59:45 -04:00
Daniel Lemire
305e6c8170 better cmake 2024-03-18 10:17:36 -04:00
Daniel Lemire
d61b589f65 tuning. 2024-03-18 10:03:14 -04:00
Daniel Lemire
b92d89b2c4 updating 2024-03-18 10:01:21 -04:00
Daniel Lemire
ca05d13e26
Merge pull request from Osyotr/cmake-exports
Export cmake config and add option to build unit tests
2024-03-18 09:03:29 -04:00
Osyotr
2a3319d698 Export cmake config and add option to build unit tests 2024-03-17 22:45:44 +03:00
Osyotr
468ee515f4 Add visual studio files to gitignore 2024-03-17 22:18:48 +03:00
Daniel Lemire
172f42bccf
Merge pull request from barracuda156/apple
CMakeLists: do not use Linux linker flag on macOS
2024-03-12 13:15:34 -04:00
Sergey Fedorov
da4ba02ea7
CMakeLists: do not use Linux linker flag on macOS 2024-03-10 01:17:32 +07:00
Daniel Lemire
4f5e530b02
Update README.md 2023-12-26 16:06:21 -05:00
Daniel Lemire
15f93a61bb
Update README.md 2023-12-26 16:05:15 -05:00
Daniel Lemire
3489ead54c
Merge pull request from BebeSparkelSparkel/master
Deprecation notice at top
2023-12-26 16:04:07 -05:00
William Rusnack
b94e499b22
Deprecation notice at top 2023-12-26 11:16:07 -05:00
Daniel Lemire
d2e8f223e7
Merge pull request from DavidKorczynski/clusterfuzzlite-integration
Add ClusterFuzzLite integration
2023-12-21 09:48:50 -05:00
David Korczynski
06bc1ea3ed Add ClusterFuzzLite integration
Signed-off-by: David Korczynski <david@adalogics.com>
2023-12-21 03:48:47 -08:00
Daniel Lemire
38c6d6da54 removing old ci 2023-12-06 21:15:13 -05:00
Daniel Lemire
56a813c54f added test. 2023-12-06 21:14:43 -05:00
Daniel Lemire
cee3272a5c
Update README.md 2023-12-02 18:54:05 -05:00
Daniel Lemire
07d9189a8f
Merge pull request from striezel-stash/update-actions-setup-cmake
Update jwlawson/actions-setup-cmake in GitHub Actions to v1.13
2023-01-10 10:24:35 -05:00
Dirk Stolle
8a55baef01 Update jwlawson/actions-setup-cmake in GitHub Actions to v1.13 2023-01-09 22:56:33 +01:00
Daniel Lemire
d039d6fe5e
Merge pull request from nxtn-staged/patch-1
Use _BitScanReverse on Win32
2023-01-03 09:50:49 -05:00
nxtn-staged
5a8c7606df
Use _BitScanReverse on Win32 2022-12-27 11:23:19 +08:00
18 changed files with 192 additions and 109 deletions

View file

@ -0,0 +1,6 @@
FROM gcr.io/oss-fuzz-base/base-builder
RUN apt-get update && apt-get install -y make autoconf automake libtool
COPY . $SRC/fast_double_parser
COPY .clusterfuzzlite/build.sh $SRC/build.sh
WORKDIR $SRC/fast_double_parser

View file

@ -0,0 +1,3 @@
# ClusterFuzzLite set up
This folder contains a fuzzing set for [ClusterFuzzLite](https://google.github.io/clusterfuzzlite).

View file

@ -0,0 +1,7 @@
#!/bin/bash -eu
# Copy all fuzzer executable to $OUT/
$CXX $CFLAGS $LIB_FUZZING_ENGINE \
$SRC/fast_double_parser/.clusterfuzzlite/parse_number_fuzzer.cpp \
-o $OUT/parse_number_fuzzer \
-I$SRC/fast_double_parser/include

View file

@ -0,0 +1,11 @@
#include <fast_double_parser.h>
#include <string>
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
std::string fuzz_input(reinterpret_cast<const char *>(data), size);
double x;
fast_double_parser::parse_number(fuzz_input.c_str(), &x);
return 0;
}

View file

@ -0,0 +1 @@
language: c++

30
.github/workflows/cflite_pr.yml vendored Normal file
View file

@ -0,0 +1,30 @@
name: ClusterFuzzLite PR fuzzing
on:
workflow_dispatch:
pull_request:
branches: [ main ]
permissions: read-all
jobs:
PR:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
sanitizer: [address]
steps:
- name: Build Fuzzers (${{ matrix.sanitizer }})
id: build
uses: google/clusterfuzzlite/actions/build_fuzzers@v1
with:
sanitizer: ${{ matrix.sanitizer }}
language: c++
bad-build-check: false
- name: Run Fuzzers (${{ matrix.sanitizer }})
id: run
uses: google/clusterfuzzlite/actions/run_fuzzers@v1
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
fuzz-seconds: 100
mode: 'code-change'
report-unreproducible-crashes: false
sanitizer: ${{ matrix.sanitizer }}

View file

@ -1,16 +1,15 @@
name: Ubuntu 20.04 CI (GCC 9)
name: Ubuntu 22.04
on: [push, pull_request]
jobs:
ubuntu-build:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
strategy:
matrix:
cxx: [g++-12, clang++-14]
steps:
- uses: actions/checkout@v3
- name: Setup cmake
uses: jwlawson/actions-setup-cmake@v1.4
with:
cmake-version: '3.9.x'
- uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4
- name: Use cmake
run: |
mkdir build &&

View file

@ -1,20 +0,0 @@
name: Ubuntu 18.04 CI (GCC 7)
on: [push, pull_request]
jobs:
ubuntu-build:
runs-on: ubuntu-18.04
steps:
- uses: actions/checkout@v3
- name: Setup cmake
uses: jwlawson/actions-setup-cmake@v1.4
with:
cmake-version: '3.9.x'
- name: Use cmake
run: |
mkdir build &&
cd build &&
cmake -DFAST_DOUBLE_BENCHMARKS=ON .. &&
cmake --build . &&
ctest -j --output-on-failure

View file

@ -1,25 +0,0 @@
name: VS16-Ninja-CI
on: [push, pull_request]
jobs:
ci:
name: windows-vs16
runs-on: windows-latest
steps:
- uses: actions/checkout@v3
- name: 'Run CMake with VS16'
uses: lukka/run-cmake@v2
with:
cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
cmakeListsTxtPath: '${{ github.workspace }}/CMakeLists.txt'
buildDirectory: "${{ github.workspace }}/../../_temp/windows"
cmakeBuildType: Release
buildWithCMake: true
cmakeGenerator: VS16Win64
cmakeAppendedArgs: -G Ninja
buildWithCMakeArgs: --config Release
- name: 'Run CTest'
run: ctest -C Release --output-on-failure
working-directory: "${{ github.workspace }}/../../_temp/windows"

4
.gitignore vendored
View file

@ -44,3 +44,7 @@
/fast_double_parser.cxxflags
/fast_double_parser.files
/fast_double_parser.includes
# Visual Studio
/.vs
/out

22
BUILD.bazel Normal file
View file

@ -0,0 +1,22 @@
cc_library(
name = "fast_double_parser",
hdrs = ["include/fast_double_parser.h"],
strip_include_prefix = "include",
visibility = ["//visibility:public"],
)
cc_test(
name = "unit",
srcs = ["tests/unit.cpp"],
deps = [":fast_double_parser"],
)
cc_binary(
name = "benchmark",
srcs = ["benchmarks/benchmark.cpp"],
deps = [
":fast_double_parser",
"@abseil-cpp//absl/strings",
"@double-conversion",
],
)

View file

@ -1,6 +1,7 @@
cmake_minimum_required(VERSION 3.11)
cmake_policy(SET CMP0048 NEW)
project(fast_double_parser LANGUAGES CXX VERSION 0.0.0.0)
cmake_minimum_required(VERSION 3.9)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
if (NOT CMAKE_BUILD_TYPE)
@ -19,53 +20,82 @@ set(benchmark_src benchmarks/benchmark.cpp)
add_library(fast_double_parser INTERFACE)
target_include_directories(fast_double_parser INTERFACE include/)
target_include_directories(fast_double_parser
INTERFACE
$<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/include>
$<INSTALL_INTERFACE:include>
)
include(GNUInstallDirs)
install(FILES ${headers} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
install(TARGETS fast_double_parser EXPORT fast_double_parser-targets)
install(
EXPORT fast_double_parser-targets
DESTINATION "share/fast_double_parser"
NAMESPACE fast_double_parser::
)
add_executable(unit ${unit_src} ${bogus_src} ${rebogus_src})
if(FAST_DOUBLE_PARSER_SANITIZE)
target_compile_options(unit PUBLIC -fsanitize=address -fno-omit-frame-pointer -fsanitize=undefined -fno-sanitize-recover=all)
target_link_options(unit PUBLIC -fsanitize=address -fno-omit-frame-pointer -fsanitize=undefined -fno-sanitize-recover=all)
# Ubuntu bug for GCC 5.0+ (safe for all versions)
if (CMAKE_COMPILER_IS_GNUCC)
target_link_libraries(unit PUBLIC -fuse-ld=gold)
endif()
include(CMakePackageConfigHelpers)
configure_package_config_file(
"${CMAKE_CURRENT_SOURCE_DIR}/fast_double_parser-config.cmake.in"
"${CMAKE_CURRENT_BINARY_DIR}/fast_double_parser-config.cmake"
INSTALL_DESTINATION "share/fast_double_parser"
)
install(
FILES "${CMAKE_CURRENT_BINARY_DIR}/fast_double_parser-config.cmake"
DESTINATION "share/fast_double_parser"
)
option(BUILD_TESTING "Build unit tests" ON)
if(BUILD_TESTING)
add_executable(unit ${unit_src} ${bogus_src} ${rebogus_src})
if(FAST_DOUBLE_PARSER_SANITIZE)
target_compile_options(unit PUBLIC -fsanitize=address -fno-omit-frame-pointer -fsanitize=undefined -fno-sanitize-recover=all)
target_link_options(unit PUBLIC -fsanitize=address -fno-omit-frame-pointer -fsanitize=undefined -fno-sanitize-recover=all)
# Ubuntu bug for GCC 5.0+ (safe for all versions)
if (CMAKE_COMPILER_IS_GNUCC AND NOT APPLE)
target_link_libraries(unit PUBLIC -fuse-ld=gold)
endif()
endif()
target_link_libraries(unit PRIVATE fast_double_parser)
enable_testing()
add_test(unit unit)
endif()
target_link_libraries(unit PUBLIC fast_double_parser)
enable_testing()
add_test(unit unit)
option(FAST_DOUBLE_BENCHMARKS "include benchmarks" OFF)
function(initialize_submodule DIRECTORY)
if(NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${DIRECTORY}/.git)
find_package(Git QUIET REQUIRED)
message(STATUS "${CMAKE_CURRENT_SOURCE_DIR}/${DIRECTORY}/.git does not exist. Initializing ${DIRECTORY} submodule ...")
execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init ${CMAKE_CURRENT_SOURCE_DIR}/${DIRECTORY}
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
RESULT_VARIABLE GIT_EXIT_CODE)
if(NOT GIT_EXIT_CODE EQUAL "0")
message(FATAL_ERROR "${GIT_EXECUTABLE} submodule update --init dependencies/${DIRECTORY} failed with exit code ${GIT_EXIT_CODE}, please checkout submodules")
endif()
endif()
endfunction(initialize_submodule)
if(FAST_DOUBLE_BENCHMARKS)
initialize_submodule(benchmarks/dependencies/abseil-cpp)
initialize_submodule(benchmarks/dependencies/double-conversion)
add_subdirectory(benchmarks/dependencies/abseil-cpp)
add_subdirectory(benchmarks/dependencies/double-conversion)
include(FetchContent)
include(ExternalProject)
set(ABSL_ENABLE_INSTALL ON)
set(ABSL_RUN_TEST OFF CACHE INTERNAL "")
set(ABSL_USE_GOOGLETEST_HEAD OFF CACHE INTERNAL "")
FetchContent_Declare(abseil
GIT_REPOSITORY https://github.com/abseil/abseil-cpp.git
GIT_TAG "20210324.2")
FetchContent_GetProperties(abseil)
if(NOT abseil_POPULATED)
set(BUILD_TESTING OFF)
FetchContent_Populate(abseil)
add_subdirectory(${abseil_SOURCE_DIR} ${abseil_BINARY_DIR})
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${abseil_SOURCE_DIR}/absl/copts)
include(${abseil_SOURCE_DIR}/absl/copts/AbseilConfigureCopts.cmake)
endif()
add_executable(benchmark ${benchmark_src})
target_link_libraries(benchmark PUBLIC double-conversion absl_strings)
target_include_directories(benchmark PUBLIC include)
FetchContent_Declare(doubleconversion
GIT_REPOSITORY https://github.com/google/double-conversion.git
GIT_TAG "v3.1.5")
FetchContent_GetProperties(doubleconversion)
FetchContent_MakeAvailable(doubleconversion)
add_executable(benchmark ${benchmark_src})
target_link_libraries(benchmark PUBLIC double-conversion absl::strings)
target_include_directories(benchmark PUBLIC include)
endif(FAST_DOUBLE_BENCHMARKS)

10
MODULE.bazel Normal file
View file

@ -0,0 +1,10 @@
"""fast_double_parser: 4x faster than strtod."""
module(
name = "fast_double_parser",
version = "0.8.0",
compatibility_level = 0,
)
bazel_dep(name = "abseil-cpp", version = "20240722.0", dev_dependency = True)
bazel_dep(name = "double-conversion", version = "3.3.0", dev_dependency = True)

View file

@ -1,5 +1,7 @@
# fast_double_parser: 4x faster than strtod
![VS16-CI](https://github.com/lemire/fast_double_parser/workflows/VS16-CI/badge.svg)![Ubuntu 18.04 CI (GCC 7)](https://github.com/lemire/fast_double_parser/workflows/Ubuntu%2018.04%20CI%20(GCC%207)/badge.svg)![VS16-Ninja-CI](https://github.com/lemire/fast_double_parser/workflows/VS16-Ninja-CI/badge.svg)![MSYS2-CI](https://github.com/lemire/fast_double_parser/workflows/MSYS2-CI/badge.svg)![VS16-CLANG-CI](https://github.com/lemire/fast_double_parser/workflows/VS16-CLANG-CI/badge.svg)![Ubuntu 20.04 CI (GCC 9)](https://github.com/lemire/fast_double_parser/workflows/Ubuntu%2020.04%20CI%20(GCC%209)/badge.svg)[![Build Status](https://api.cirrus-ci.com/github/lemire/fast_double_parser.svg)](https://cirrus-ci.com/github/lemire/fast_double_parser)
![MSYS2-CI](https://github.com/lemire/fast_double_parser/workflows/MSYS2-CI/badge.svg)[![Ubuntu 22.04](https://github.com/lemire/fast_double_parser/actions/workflows/ubuntu.yml/badge.svg)](https://github.com/lemire/fast_double_parser/actions/workflows/ubuntu.yml)
Unless you need support for [RFC 7159](https://tools.ietf.org/html/rfc7159) (JSON standard), we encourage users to adopt [fast_float](https://github.com/fastfloat/fast_float) library instead. It has more functionality.
Fast function to parse ASCII strings containing decimal numbers into double-precision (binary64) floating-point values. That is, given the string "1.0e10", it should return a 64-bit floating-point value equal to 10000000000. We do not sacrifice accuracy. The function will match exactly (down the smallest bit) the result of a standard function like `strtod`.
@ -9,11 +11,6 @@ We support all major compilers: Visual Studio, GNU GCC, LLVM Clang. We require C
The core of this library was ported to Go by Nigel Tao and is now a standard float-parsing routine in Go (`strconv.ParseFloat`).
**We encourage users to adopt [fast_float](https://github.com/fastfloat/fast_float) library instead. It has more functionality and greater speed in some cases.**
## Reference
- Daniel Lemire, [Number Parsing at a Gigabyte per Second](https://arxiv.org/abs/2101.11408), Software: Practice and Experience 51 (8), 2021.
@ -56,7 +53,7 @@ We assume that the rounding mode is set to nearest, the default setting (`std::f
## What if I prefer another API?
The [fast_float](https://github.com/lemire/fast_float) offers an API resembling that of the C++17 `std::from_chars` functions. In particular, you can specify the beginning and the end of the string.
Furthermore [fast_float](https://github.com/lemire/fast_float) supports both 32-bit and 64-bit floating-point numbers. The [fast_float](https://github.com/lemire/fast_float) library is part of Apache Arrow.
Furthermore [fast_float](https://github.com/lemire/fast_float) supports both 32-bit and 64-bit floating-point numbers. The [fast_float](https://github.com/lemire/fast_float) library is part of Apache Arrow, GCC 12, Safari/WebKit and other important systems.
## Why should I expect this function to be faster?
@ -172,6 +169,7 @@ double-conv 243.90 MB/s
- [There is a Rust port](https://github.com/ezrosent/frawk/tree/master/src/runtime/float_parse).
- [There is a Java port](https://github.com/wrandelshofer/FastDoubleParser).
- [There is a C# port](https://github.com/CarlVerret/csFastFloat).
- [Bazel Central Registry](https://registry.bazel.build/modules/fast_double_parser).
## Credit

View file

@ -20,7 +20,7 @@
#include <vector>
#include "double-conversion/ieee.h"
#include "double-conversion/string-to-double.h"
#include "double-conversion/double-conversion.h"
double findmax_fast_double_parser(const std::vector<std::string>& s) {
double answer = 0;

View file

@ -0,0 +1,3 @@
@PACKAGE_INIT@
include("${CMAKE_CURRENT_LIST_DIR}/fast_double_parser-targets.cmake")

View file

@ -193,24 +193,18 @@ really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) {
/* result might be undefined when input_num is zero */
inline int leading_zeroes(uint64_t input_num) {
#ifdef _MSC_VER
#if defined(_M_X64) || defined(_M_ARM64) || defined (_M_IA64)
unsigned long leading_zero = 0;
// Search the mask data from most significant bit (MSB)
// to least significant bit (LSB) for a set bit (1).
#ifdef _WIN64
if (_BitScanReverse64(&leading_zero, input_num))
return (int)(63 - leading_zero);
else
return 64;
#else
int last_bit = 0;
if(input_num & uint64_t(0xffffffff00000000)) input_num >>= 32, last_bit |= 32;
if(input_num & uint64_t( 0xffff0000)) input_num >>= 16, last_bit |= 16;
if(input_num & uint64_t( 0xff00)) input_num >>= 8, last_bit |= 8;
if(input_num & uint64_t( 0xf0)) input_num >>= 4, last_bit |= 4;
if(input_num & uint64_t( 0xc)) input_num >>= 2, last_bit |= 2;
if(input_num & uint64_t( 0x2)) input_num >>= 1, last_bit |= 1;
return 63 - last_bit;
#endif // defined(_M_X64) || defined(_M_ARM64) || defined (_M_IA64)
if (_BitScanReverse(&leading_zero, (uint32_t)(input_num >> 32)))
return (int)(63 - (leading_zero + 32));
if (_BitScanReverse(&leading_zero, (uint32_t)input_num))
return (int)(63 - leading_zero);
#endif // _WIN64
#else
return __builtin_clzll(input_num);
#endif // _MSC_VER

View file

@ -284,6 +284,15 @@ void issue63() {
std::cout << "1-4-abc" << std::endl;
}
void issue2093() {
std::string a = "0.95000000000000000000";
double x;
const char * ok = fast_double_parser::parse_number(a.c_str(), &x);
if(!ok) throw std::runtime_error("0.95000000000000000000");
if(x != 0.95) throw std::runtime_error("cannot parse 0.95000000000000000000.");
std::cout << "0.95000000000000000000" << std::endl;
}
inline void Assert(bool Assertion) {
@ -330,6 +339,7 @@ int main() {
printf("Aborting further tests.");
return EXIT_SUCCESS;
}
issue2093();
Assert(basic_test_64bit("1090544144181609348835077142190",0x1.b8779f2474dfbp+99));
Assert(basic_test_64bit("4503599627370496.5", 4503599627370496.5));
Assert(basic_test_64bit("4503599627370497.5", 4503599627370497.5));