From 9fe3672459c1bf10926b85f013aa1b623d855545 Mon Sep 17 00:00:00 2001 From: Snild Dolkow Date: Mon, 18 Sep 2023 20:32:55 +0200 Subject: [PATCH] tests: Run both with and without partial token heuristic If we always run with the heuristic enabled, it may hide some bugs by grouping up input into bigger parse attempts. CI-fighting-assistance-by: Sebastian Pipping --- expat/lib/internal.h | 3 +++ expat/lib/xmlparse.c | 5 ++++- expat/tests/basic_tests.c | 12 +++++++++++- expat/tests/runtests.c | 12 ++++++++---- 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/expat/lib/internal.h b/expat/lib/internal.h index 03c8fdec..47bc6129 100644 --- a/expat/lib/internal.h +++ b/expat/lib/internal.h @@ -31,6 +31,7 @@ Copyright (c) 2016-2022 Sebastian Pipping Copyright (c) 2018 Yury Gribov Copyright (c) 2019 David Loffredo + Copyright (c) 2023 Sony Corporation / Snild Dolkow Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -160,6 +161,8 @@ unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser); const char *unsignedCharToPrintable(unsigned char c); #endif +extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c + #ifdef __cplusplus } #endif diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c index 71968522..717eca69 100644 --- a/expat/lib/xmlparse.c +++ b/expat/lib/xmlparse.c @@ -625,6 +625,8 @@ static unsigned long getDebugLevel(const char *variableName, ? 0 \ : ((*((pool)->ptr)++ = c), 1)) +XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c + struct XML_ParserStruct { /* The first member must be m_userData so that the XML_GetUserData macro works. */ @@ -985,7 +987,8 @@ callProcessor(XML_Parser parser, const char *start, const char *end, const char **endPtr) { const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start); - if (! parser->m_parsingStatus.finalBuffer) { + if (g_reparseDeferralEnabledDefault + && ! parser->m_parsingStatus.finalBuffer) { // Heuristic: don't try to parse a partial token again until the amount of // available data has increased significantly. const size_t had_before = parser->m_partialTokenBytesBefore; diff --git a/expat/tests/basic_tests.c b/expat/tests/basic_tests.c index acfdc806..b309b782 100644 --- a/expat/tests/basic_tests.c +++ b/expat/tests/basic_tests.c @@ -4641,6 +4641,12 @@ START_TEST(test_utf8_in_start_tags) { char doc[1024]; size_t failCount = 0; + // we need all the bytes to be parsed, but we don't want the errors that can + // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on. + if (g_reparseDeferralEnabledDefault) { + return; + } + for (; i < sizeof(cases) / sizeof(cases[0]); i++) { size_t j = 0; for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) { @@ -5204,6 +5210,9 @@ START_TEST(test_big_tokens_take_linear_time) { memset(aaaaaa, 'a', fillsize); + if (! g_reparseDeferralEnabledDefault) { + return; // heuristic is disabled; we would get O(n^2) and fail. + } #if defined(_WIN32) if (CLOCKS_PER_SEC < 100000) { // Skip this test if clock() doesn't have reasonably good resolution. @@ -5219,7 +5228,8 @@ START_TEST(test_big_tokens_take_linear_time) { XML_Parser parser = XML_ParserCreate(NULL); assert_true(parser != NULL); enum XML_Status status; - set_subtest("%saaaaaa%s", text[i].pre, text[i].post); + set_subtest("max_slowdown=%d text=\"%saaaaaa%s\"", max_slowdown, + text[i].pre, text[i].post); const clock_t start = clock(); // parse the start text diff --git a/expat/tests/runtests.c b/expat/tests/runtests.c index db983dce..7f4a438d 100644 --- a/expat/tests/runtests.c +++ b/expat/tests/runtests.c @@ -98,10 +98,14 @@ main(int argc, char *argv[]) { printf("Expat version: %" XML_FMT_STR "\n", XML_ExpatVersion()); for (g_chunkSize = 0; g_chunkSize <= 5; g_chunkSize++) { - char context[100]; - snprintf(context, sizeof(context), "chunksize=%d", g_chunkSize); - context[sizeof(context) - 1] = '\0'; - srunner_run_all(sr, context, verbosity); + for (int enabled = 0; enabled <= 1; ++enabled) { + char context[100]; + g_reparseDeferralEnabledDefault = enabled; + snprintf(context, sizeof(context), "chunksize=%d deferral=%d", + g_chunkSize, enabled); + context[sizeof(context) - 1] = '\0'; + srunner_run_all(sr, context, verbosity); + } } srunner_summarize(sr, verbosity); nf = srunner_ntests_failed(sr);