Merge pull request #680 from akheron/dtoa

Use `dtoa()` for optimal encoding of reals
This commit is contained in:
Petri Lehtinen 2024-03-31 22:02:09 +03:00 committed by GitHub
commit 50953fb1fa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 6489 additions and 29 deletions

View file

@ -18,8 +18,9 @@ jobs:
matrix:
os: ["ubuntu-latest", "macos-latest"]
cc: ["gcc", "clang"]
dtoa: ["yes", "no"]
runs-on: ${{matrix.os}}
runs-on: ${{ matrix.os }}
steps:
- if: ${{runner.os == 'macOS'}}
@ -27,9 +28,9 @@ jobs:
- uses: actions/checkout@v4
- run: autoreconf -fi
- env:
CC: ${{matrix.cc}}
CC: ${{ matrix.cc }}
CFLAGS: -Werror
run: ./configure
run: ./configure --enable-dtoa=${{ matrix.dtoa }}
- run: make check
cmake:

View file

@ -9,6 +9,12 @@ Work in progress
is used to switch locales inside the threads (#674, #675, #677. Thanks to
Bruno Haible the report and help with fixing.)
- Use David M. Gay's `dtoa()` algorithm to avoid misprinting issues of real
numbers that are not exactly representable as a `double` (#680).
If this is not desirable, use `./configure --disable-dtoa` or `cmake
-DUSE_DTOA=OFF .`
* Build:
- Make test output nicer in CMake based builds (#683)

View file

@ -5,6 +5,7 @@ project(jansson C)
option(JANSSON_BUILD_SHARED_LIBS "Build shared libraries." OFF)
option(USE_URANDOM "Use /dev/urandom to seed the hash function." ON)
option(USE_WINDOWS_CRYPTOAPI "Use CryptGenRandom to seed the hash function." ON)
option(USE_DTOA "Use dtoa for optimal floating-point to string conversions." ON)
if (MSVC)
# This option must match the settings used in your program, in particular if you
@ -93,6 +94,9 @@ check_function_exists (sched_yield HAVE_SCHED_YIELD)
# Check for the int-type includes
check_include_files (stdint.h HAVE_STDINT_H)
include (TestBigEndian)
TEST_BIG_ENDIAN(WORDS_BIGENDIAN)
# Check our 64 bit integer sizes
check_type_size (__int64 __INT64)
check_type_size (int64_t INT64_T)
@ -193,6 +197,8 @@ endif ()
# detect what to use for the 64 bit type.
# Note: I will prefer long long if I can get it, as that is what the automake system aimed for.
if (NOT DEFINED JSON_INT_T)
set (JSON_INTEGER_IS_LONG_LONG 1)
if (HAVE_LONG_LONG_INT AND (LONG_LONG_INT EQUAL 8))
set (JSON_INT_T "long long")
elseif (HAVE_INT64_T)
@ -274,6 +280,9 @@ include_directories (${CMAKE_CURRENT_BINARY_DIR}/private_include)
# Add the lib sources.
file(GLOB JANSSON_SRC src/*.c)
if (NOT USE_DTOA)
list(FILTER JANSSON_SRC EXCLUDE REGEX ".*dtoa\\.c$")
endif()
set(JANSSON_HDR_PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src/hashtable.h
@ -526,6 +535,11 @@ if (NOT JANSSON_WITHOUT_TESTS)
if (IS_DIRECTORY ${TESTDIR})
get_filename_component(TNAME ${TESTDIR} NAME)
if ((USE_DTOA AND EXISTS ${TESTDIR}/skip_if_dtoa) OR
(NOT USE_DTOA AND EXISTS ${TESTDIR}/skip_unless_dtoa))
continue()
endif()
if (JANSSON_TEST_WITH_VALGRIND)
add_test(memcheck__${SUITE}__${TNAME}
${MEMCHECK_COMMAND} ${SUITE_TEST_CMD} ${TESTDIR})

26
LICENSE
View file

@ -1,4 +1,11 @@
Copyright (c) 2009-2020 Petri Lehtinen <petri@digip.org>
# License
This project is licensed under the MIT license, except where otherwise noted.
The full text of the MIT license is included below.
## MIT License
Copyright (c) 2009-2024 Petri Lehtinen <petri@digip.org>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@ -17,3 +24,20 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
## Exceptions
### `src/dtoa.c`
Copyright (c) 1991, 2000, 2001 by Lucent Technologies.
Permission to use, copy, modify, and distribute this software for any
purpose without fee is hereby granted, provided that this entire notice
is included in all copies of any software which is or includes a copy
or modification of this software and in all copies of the supporting
documentation for such software.
THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
WARRANTY. IN PARTICULAR, NEITHER THE AUTHOR NOR LUCENT MAKES ANY
REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.

View file

@ -21,9 +21,10 @@
#define JANSSON_USING_CMAKE
#endif
/* Note: when using cmake, JSON_INTEGER_IS_LONG_LONG is not defined nor used,
* as we will also check for __int64 etc types.
* (the definition was used in the automake system) */
/* If your compiler supports the `long long` type and the strtoll()
library function, JSON_INTEGER_IS_LONG_LONG is defined to 1,
otherwise to 0. */
#cmakedefine JSON_INTEGER_IS_LONG_LONG 1
/* Bring in the cmake-detected defines */
#cmakedefine HAVE_STDINT_H 1

View file

@ -21,6 +21,8 @@
#cmakedefine HAVE_LOCALE_H 1
#cmakedefine HAVE_SETLOCALE 1
#cmakedefine WORDS_BIGENDIAN 1
#cmakedefine HAVE_INT32_T 1
#ifndef HAVE_INT32_T
# define int32_t @JSON_INT32@
@ -50,4 +52,11 @@
#cmakedefine USE_URANDOM 1
#cmakedefine USE_WINDOWS_CRYPTOAPI 1
#cmakedefine USE_DTOA 1
#if USE_DTOA
# define DTOA_ENABLED 1
#else
# define DTOA_ENABLED 0
#endif
#define INITIAL_HASHTABLE_ORDER @JANSSON_INITIAL_HASHTABLE_ORDER@

View file

@ -25,6 +25,8 @@ AC_TYPE_UINT16_T
AC_TYPE_UINT8_T
AC_TYPE_LONG_LONG_INT
AC_C_BIGENDIAN
AC_C_INLINE
case $ac_cv_c_inline in
yes) json_inline=inline;;
@ -136,6 +138,19 @@ JSON_SYMVER_LDFLAGS=
AC_CHECK_DECL([__GLIBC__], [JSON_SYMVER_LDFLAGS=-Wl,--default-symver])
AC_SUBST([JSON_SYMVER_LDFLAGS])
AC_ARG_ENABLE([dtoa],
[AS_HELP_STRING([--enable-dtoa], [Use dtoa for optimal floating point to string conversion])],
[case "$enableval" in
yes) dtoa=yes ;;
no) dtoa=no ;;
*) AC_MSG_ERROR([bad value ${enableval} for --enable-dtoa]) ;;
esac], [dtoa=yes])
if test "$dtoa" = "yes"; then
AC_DEFINE([DTOA_ENABLED], [1],
[Define to 1 to use dtoa to convert floating points to strings])
fi
AM_CONDITIONAL([DTOA_ENABLED], [test "$dtoa" = "yes"])
AC_ARG_ENABLE([ossfuzzers],
[AS_HELP_STRING([--enable-ossfuzzers],
[Whether to generate the fuzzers for OSS-Fuzz])],

View file

@ -1,4 +1,4 @@
EXTRA_DIST = jansson.def
EXTRA_DIST = jansson.def dtoa.c
include_HEADERS = jansson.h
nodist_include_HEADERS = jansson_config.h
@ -22,6 +22,11 @@ libjansson_la_SOURCES = \
utf.h \
value.c \
version.c
if DTOA_ENABLED
libjansson_la_SOURCES += dtoa.c
endif
libjansson_la_LDFLAGS = \
-no-undefined \
-export-symbols-regex '^json_|^jansson_' \

6265
src/dtoa.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -23,8 +23,8 @@
#include "strbuffer.h"
#include "utf.h"
#define MAX_INTEGER_STR_LENGTH 100
#define MAX_REAL_STR_LENGTH 100
#define MAX_INTEGER_STR_LENGTH 25
#define MAX_REAL_STR_LENGTH 25
#define FLAGS_TO_INDENT(f) ((f)&0x1F)
#define FLAGS_TO_PRECISION(f) (((f) >> 11) & 0x1F)

View file

@ -42,21 +42,6 @@ static void to_locale(strbuffer_t *strbuffer) {
*pos = point;
}
static void from_locale(char *buffer) {
char point;
char *pos;
point = get_decimal_point();
if (point == '.') {
/* No conversion needed */
return;
}
pos = strchr(buffer, point);
if (pos)
*pos = '.';
}
int jsonp_strtod(strbuffer_t *strbuffer, double *out) {
double value;
char *end;
@ -76,6 +61,127 @@ int jsonp_strtod(strbuffer_t *strbuffer, double *out) {
return 0;
}
#if DTOA_ENABLED
/* see dtoa.c */
char *dtoa_r(double dd, int mode, int ndigits, int *decpt, int *sign, char **rve,
char *buf, size_t blen);
int jsonp_dtostr(char *buffer, size_t size, double value, int precision) {
/* adapted from `format_float_short()` in
* https://github.com/python/cpython/blob/2cf18a44303b6d84faa8ecffaecc427b53ae121e/Python/pystrtod.c#L969
*/
char digits[25];
char *digits_end;
int mode = precision == 0 ? 0 : 2;
int decpt, sign, exp_len, exp = 0, use_exp = 0;
int digits_len, vdigits_start, vdigits_end;
char *p;
if (dtoa_r(value, mode, precision, &decpt, &sign, &digits_end, digits, 25) == NULL) {
// digits is too short => should not happen
return -1;
}
digits_len = digits_end - digits;
if (decpt <= -4 || decpt > 16) {
use_exp = 1;
exp = decpt - 1;
decpt = 1;
}
vdigits_start = decpt <= 0 ? decpt - 1 : 0;
vdigits_end = digits_len;
if (!use_exp) {
/* decpt + 1 to add ".0" if value is an integer */
vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
} else {
vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
}
if (
/* sign, decimal point and trailing 0 byte */
(size_t)(3 +
/* total digit count (including zero padding on both sides) */
(vdigits_end - vdigits_start) +
/* exponent "e+100", max 3 numerical digits */
(use_exp ? 5 : 0)) > size) {
/* buffer is too short */
return -1;
}
p = buffer;
if (sign == 1) {
*p++ = '-';
}
/* note that exactly one of the three 'if' conditions is true,
so we include exactly one decimal point */
/* Zero padding on left of digit string */
if (decpt <= 0) {
memset(p, '0', decpt - vdigits_start);
p += decpt - vdigits_start;
*p++ = '.';
memset(p, '0', 0 - decpt);
p += 0 - decpt;
} else {
memset(p, '0', 0 - vdigits_start);
p += 0 - vdigits_start;
}
/* Digits, with included decimal point */
if (0 < decpt && decpt <= digits_len) {
strncpy(p, digits, decpt - 0);
p += decpt - 0;
*p++ = '.';
strncpy(p, digits + decpt, digits_len - decpt);
p += digits_len - decpt;
} else {
strncpy(p, digits, digits_len);
p += digits_len;
}
/* And zeros on the right */
if (digits_len < decpt) {
memset(p, '0', decpt - digits_len);
p += decpt - digits_len;
*p++ = '.';
memset(p, '0', vdigits_end - decpt);
p += vdigits_end - decpt;
} else {
memset(p, '0', vdigits_end - digits_len);
p += vdigits_end - digits_len;
}
if (p[-1] == '.')
p--;
if (use_exp) {
*p++ = 'e';
exp_len = sprintf(p, "%d", exp);
p += exp_len;
}
*p = '\0';
return (int)(p - buffer);
}
#else /* DTOA_ENABLED == 0 */
static void from_locale(char *buffer) {
char point;
char *pos;
point = get_decimal_point();
if (point == '.') {
/* No conversion needed */
return;
}
pos = strchr(buffer, point);
if (pos)
*pos = '.';
}
int jsonp_dtostr(char *buffer, size_t size, double value, int precision) {
int ret;
char *start, *end;
@ -128,3 +234,4 @@ int jsonp_dtostr(char *buffer, size_t size, double value, int precision) {
return (int)length;
}
#endif

View file

@ -1 +1 @@
[1.23456789, 1.0, 1.0000000000000002]
[1.23456789, 1.0, 1.0000000000000002, 1.23456e99, 1.23456e-99, 0.0000000000012345]

View file

@ -1 +1 @@
[1.235, 1.0, 1.0]
[1.235, 1.0, 1.0, 1.235e99, 1.235e-99, 1.235e-12]

View file

@ -0,0 +1 @@
[1.23e47, 0.1, 0.3, 9.99]

View file

@ -0,0 +1 @@
[1.2299999999999999e47, 0.10000000000000001, 0.29999999999999999, 9.9900000000000002]

View file

@ -1 +1 @@
[123e45]
[1.23e47, 0.1, 0.3, 9.99]

View file

@ -1 +1 @@
[1.2299999999999999e47]
[1.23e47, 0.1, 0.3, 9.99]

View file

@ -5,11 +5,22 @@
# Jansson is free software; you can redistribute it and/or modify
# it under the terms of the MIT license. See LICENSE for details.
dtoa_enabled() {
grep -q "DTOA_ENABLED 1" $top_builddir/jansson_private_config.h
}
is_test() {
test -d $test_path
}
do_run() {
if [ -f $test_path/skip_unless_dtoa ]; then
dtoa_enabled || return 77
fi
if [ -f $test_path/skip_if_dtoa ]; then
dtoa_enabled && return 77
fi
variant=$1
s=".$1"