tests: Run both with and without partial token heuristic

If we always run with the heuristic enabled, it may hide some bugs by
grouping up input into bigger parse attempts.

CI-fighting-assistance-by: Sebastian Pipping <sebastian@pipping.org>
This commit is contained in:
Snild Dolkow 2023-09-18 20:32:55 +02:00
parent 1b9d398517
commit 9fe3672459
4 changed files with 26 additions and 6 deletions

View file

@ -31,6 +31,7 @@
Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org>
Copyright (c) 2018 Yury Gribov <tetra2005@gmail.com>
Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com>
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@ -160,6 +161,8 @@ unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser);
const char *unsignedCharToPrintable(unsigned char c);
#endif
extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c
#ifdef __cplusplus
}
#endif

View file

@ -625,6 +625,8 @@ static unsigned long getDebugLevel(const char *variableName,
? 0 \
: ((*((pool)->ptr)++ = c), 1))
XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c
struct XML_ParserStruct {
/* The first member must be m_userData so that the XML_GetUserData
macro works. */
@ -985,7 +987,8 @@ callProcessor(XML_Parser parser, const char *start, const char *end,
const char **endPtr) {
const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
if (! parser->m_parsingStatus.finalBuffer) {
if (g_reparseDeferralEnabledDefault
&& ! parser->m_parsingStatus.finalBuffer) {
// Heuristic: don't try to parse a partial token again until the amount of
// available data has increased significantly.
const size_t had_before = parser->m_partialTokenBytesBefore;

View file

@ -4641,6 +4641,12 @@ START_TEST(test_utf8_in_start_tags) {
char doc[1024];
size_t failCount = 0;
// we need all the bytes to be parsed, but we don't want the errors that can
// trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
if (g_reparseDeferralEnabledDefault) {
return;
}
for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
size_t j = 0;
for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
@ -5204,6 +5210,9 @@ START_TEST(test_big_tokens_take_linear_time) {
memset(aaaaaa, 'a', fillsize);
if (! g_reparseDeferralEnabledDefault) {
return; // heuristic is disabled; we would get O(n^2) and fail.
}
#if defined(_WIN32)
if (CLOCKS_PER_SEC < 100000) {
// Skip this test if clock() doesn't have reasonably good resolution.
@ -5219,7 +5228,8 @@ START_TEST(test_big_tokens_take_linear_time) {
XML_Parser parser = XML_ParserCreate(NULL);
assert_true(parser != NULL);
enum XML_Status status;
set_subtest("%saaaaaa%s", text[i].pre, text[i].post);
set_subtest("max_slowdown=%d text=\"%saaaaaa%s\"", max_slowdown,
text[i].pre, text[i].post);
const clock_t start = clock();
// parse the start text

View file

@ -98,10 +98,14 @@ main(int argc, char *argv[]) {
printf("Expat version: %" XML_FMT_STR "\n", XML_ExpatVersion());
for (g_chunkSize = 0; g_chunkSize <= 5; g_chunkSize++) {
char context[100];
snprintf(context, sizeof(context), "chunksize=%d", g_chunkSize);
context[sizeof(context) - 1] = '\0';
srunner_run_all(sr, context, verbosity);
for (int enabled = 0; enabled <= 1; ++enabled) {
char context[100];
g_reparseDeferralEnabledDefault = enabled;
snprintf(context, sizeof(context), "chunksize=%d deferral=%d",
g_chunkSize, enabled);
context[sizeof(context) - 1] = '\0';
srunner_run_all(sr, context, verbosity);
}
}
srunner_summarize(sr, verbosity);
nf = srunner_ntests_failed(sr);