From c803b93e8736ed255ff1a6db5ab6add7ccea736c Mon Sep 17 00:00:00 2001 From: Snild Dolkow Date: Fri, 25 Aug 2023 14:49:29 +0200 Subject: [PATCH 1/2] minicheck: Add simple subtest support This will be useful when a test runs through several examples and fails somewhere in the middle. The subtest string replaces the phase_info string (i.e. "during actual test") in the failure output. Added subtest info to various tests where I found for loops. --- expat/tests/acc_tests.c | 4 ++++ expat/tests/basic_tests.c | 7 +++++++ expat/tests/minicheck.c | 25 +++++++++++++++++++++++++ expat/tests/minicheck.h | 16 ++++++++++++++++ expat/tests/misc_tests.c | 2 ++ expat/tests/ns_tests.c | 1 + 6 files changed, 55 insertions(+) diff --git a/expat/tests/acc_tests.c b/expat/tests/acc_tests.c index 9c6fdcc5..0cc3edee 100644 --- a/expat/tests/acc_tests.c +++ b/expat/tests/acc_tests.c @@ -18,6 +18,7 @@ Copyright (c) 2019 David Loffredo Copyright (c) 2020 Tim Gates Copyright (c) 2021 Dong-hee Na + Copyright (c) 2023 Sony Corporation / Snild Dolkow Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -384,6 +385,7 @@ START_TEST(test_helper_unsigned_char_to_printable) { // Smoke test unsigned char uc = 0; for (; uc < (unsigned char)-1; uc++) { + set_subtest("char %u", (unsigned)uc); const char *const printable = unsignedCharToPrintable(uc); if (printable == NULL) fail("unsignedCharToPrintable returned NULL"); @@ -392,8 +394,10 @@ START_TEST(test_helper_unsigned_char_to_printable) { } // Two concrete samples + set_subtest("char 'A'"); if (strcmp(unsignedCharToPrintable('A'), "A") != 0) fail("unsignedCharToPrintable result mistaken"); + set_subtest("char '\\'"); if (strcmp(unsignedCharToPrintable('\\'), "\\\\") != 0) fail("unsignedCharToPrintable result mistaken"); } diff --git a/expat/tests/basic_tests.c b/expat/tests/basic_tests.c index 31e02335..86045c21 100644 --- a/expat/tests/basic_tests.c +++ b/expat/tests/basic_tests.c @@ -1212,6 +1212,7 @@ START_TEST(test_ext_entity_invalid_parse) { const ExtFaults *fault = faults; for (; fault->parse_text != NULL; fault++) { + set_subtest("\"%s\"", fault->parse_text); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); XML_SetUserData(g_parser, (void *)fault); @@ -1282,6 +1283,7 @@ START_TEST(test_dtd_attr_handling) { AttTest *test; for (test = attr_data; test->definition != NULL; test++) { + set_subtest("%s", test->definition); XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler); XML_SetUserData(g_parser, test); if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog), @@ -1670,6 +1672,7 @@ START_TEST(test_bad_cdata) { size_t i = 0; for (; i < sizeof(cases) / sizeof(struct CaseData); i++) { + set_subtest("%s", cases[i].text); const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES( g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE); const enum XML_Error actualError = XML_GetErrorCode(g_parser); @@ -1737,6 +1740,7 @@ START_TEST(test_bad_cdata_utf16) { size_t i; for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) { + set_subtest("case %lu", (long unsigned)(i + 1)); enum XML_Status actual_status; enum XML_Error actual_error; @@ -2336,6 +2340,7 @@ START_TEST(test_ext_entity_invalid_suspended_parse) { ExtFaults *fault; for (fault = &faults[0]; fault->parse_text != NULL; fault++) { + set_subtest("%s", fault->parse_text); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_suspending_faulter); @@ -2939,6 +2944,7 @@ START_TEST(test_bad_ignore_section) { ExtFaults *fault; for (fault = &faults[0]; fault->parse_text != NULL; fault++) { + set_subtest("%s", fault->parse_text); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); XML_SetUserData(g_parser, fault); @@ -2982,6 +2988,7 @@ START_TEST(test_external_entity_values) { int i; for (i = 0; data_004_2[i].parse_text != NULL; i++) { + set_subtest("%s", data_004_2[i].parse_text); XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer); XML_SetUserData(g_parser, &data_004_2[i]); diff --git a/expat/tests/minicheck.c b/expat/tests/minicheck.c index 3a833bb6..283c40c4 100644 --- a/expat/tests/minicheck.c +++ b/expat/tests/minicheck.c @@ -15,6 +15,7 @@ Copyright (c) 2017 Rhodri James Copyright (c) 2018 Marco Maggi Copyright (c) 2019 David Loffredo + Copyright (c) 2023 Sony Corporation / Snild Dolkow Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -41,6 +42,7 @@ # undef NDEBUG /* because test suite relies on assert(...) at the moment */ #endif +#include #include #include #include @@ -136,17 +138,35 @@ srunner_create(Suite *suite) { static jmp_buf env; +#define SUBTEST_LEN (50) // informative, but not too long static char const *_check_current_function = NULL; +static char _check_current_subtest[SUBTEST_LEN]; static int _check_current_lineno = -1; static char const *_check_current_filename = NULL; void _check_set_test_info(char const *function, char const *filename, int lineno) { _check_current_function = function; + set_subtest("%s", ""); _check_current_lineno = lineno; _check_current_filename = filename; } +void +set_subtest(char const *fmt, ...) { + va_list ap; + va_start(ap, fmt); + vsnprintf(_check_current_subtest, SUBTEST_LEN, fmt, ap); + va_end(ap); + // replace line feeds with spaces, for nicer error logs + for (size_t i = 0; i < SUBTEST_LEN; ++i) { + if (_check_current_subtest[i] == '\n') { + _check_current_subtest[i] = ' '; + } + } + _check_current_subtest[SUBTEST_LEN - 1] = '\0'; // ensure termination +} + static void handle_success(int verbosity) { if (verbosity >= CK_VERBOSE) { @@ -158,6 +178,9 @@ static void handle_failure(SRunner *runner, int verbosity, const char *phase_info) { runner->nfailures++; if (verbosity != CK_SILENT) { + if (strlen(_check_current_subtest) != 0) { + phase_info = _check_current_subtest; + } printf("FAIL: %s (%s at %s:%d)\n", _check_current_function, phase_info, _check_current_filename, _check_current_lineno); } @@ -174,6 +197,7 @@ srunner_run_all(SRunner *runner, int verbosity) { volatile int i; for (i = 0; i < tc->ntests; ++i) { runner->nchecks++; + set_subtest("%s", ""); if (tc->setup != NULL) { /* setup */ @@ -189,6 +213,7 @@ srunner_run_all(SRunner *runner, int verbosity) { continue; } (tc->tests[i])(); + set_subtest("%s", ""); /* teardown */ if (tc->teardown != NULL) { diff --git a/expat/tests/minicheck.h b/expat/tests/minicheck.h index 8a189f88..28e1a60d 100644 --- a/expat/tests/minicheck.h +++ b/expat/tests/minicheck.h @@ -15,6 +15,7 @@ Copyright (c) 2004-2006 Fred L. Drake, Jr. Copyright (c) 2006-2012 Karl Waclawek Copyright (c) 2016-2017 Sebastian Pipping + Copyright (c) 2023 Sony Corporation / Snild Dolkow Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -59,6 +60,19 @@ extern "C" { # define __func__ __FUNCTION__ # endif +/* PRINTF_LIKE has two effects: + 1. Make clang's -Wformat-nonliteral stop warning about non-literal format + strings in annotated functions' code. + 2. Make both clang and gcc's -Wformat-nonliteral warn about *callers* of + the annotated function that use a non-literal format string. +*/ +# if defined(__GNUC__) +# define PRINTF_LIKE(fmtpos, argspos) \ + __attribute__((format(printf, fmtpos, argspos))) +# else +# define PRINTF_LIKE(fmtpos, argspos) +# endif + # define START_TEST(testname) \ static void testname(void) { \ _check_set_test_info(__func__, __FILE__, __LINE__); \ @@ -67,6 +81,8 @@ extern "C" { } \ } +void PRINTF_LIKE(1, 2) set_subtest(char const *fmt, ...); + # define fail(msg) _fail_unless(0, __FILE__, __LINE__, msg) typedef void (*tcase_setup_function)(void); diff --git a/expat/tests/misc_tests.c b/expat/tests/misc_tests.c index d575b340..ae0c89cd 100644 --- a/expat/tests/misc_tests.c +++ b/expat/tests/misc_tests.c @@ -18,6 +18,7 @@ Copyright (c) 2019 David Loffredo Copyright (c) 2020 Tim Gates Copyright (c) 2021 Dong-hee Na + Copyright (c) 2023 Sony Corporation / Snild Dolkow Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -328,6 +329,7 @@ START_TEST(test_misc_deny_internal_entity_closing_doctype_issue_317) { size_t inputIndex = 0; for (; inputIndex < sizeof(inputs) / sizeof(inputs[0]); inputIndex++) { + set_subtest("%s", inputs[inputIndex]); XML_Parser parser; enum XML_Status parseResult; int setParamEntityResult; diff --git a/expat/tests/ns_tests.c b/expat/tests/ns_tests.c index 5ecb09ac..bdf28979 100644 --- a/expat/tests/ns_tests.c +++ b/expat/tests/ns_tests.c @@ -694,6 +694,7 @@ START_TEST(test_ns_separator_in_uri) { size_t i = 0; size_t failCount = 0; for (; i < sizeof(cases) / sizeof(cases[0]); i++) { + set_subtest("%s", cases[i].doc); XML_Parser parser = XML_ParserCreateNS(NULL, cases[i].namesep); XML_SetElementHandler(parser, dummy_start_element, dummy_end_element); if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc), From b1e955449cea6bb5862cd249e659c2123bd95a9e Mon Sep 17 00:00:00 2001 From: Snild Dolkow Date: Thu, 31 Aug 2023 12:36:43 +0200 Subject: [PATCH 2/2] Always consume BOM bytes when found in prolog The byte order mark is not correctly consumed when followed by an incomplete token in a non-final parse. This results in the BOM staying in the buffer, causing an invalid token error later. This was not detected by existing tests because they either parse everything in one call, or add a single byte at a time. By moving `s` forward when we find a BOM, we make sure that the BOM bytes are properly consumed in all cases. --- expat/lib/xmlparse.c | 19 +++++----- expat/tests/basic_tests.c | 78 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 9 deletions(-) diff --git a/expat/lib/xmlparse.c b/expat/lib/xmlparse.c index a18aa2e3..4227bdfa 100644 --- a/expat/lib/xmlparse.c +++ b/expat/lib/xmlparse.c @@ -36,6 +36,7 @@ Copyright (c) 2022 Samanta Navarro Copyright (c) 2022 Jeffrey Walton Copyright (c) 2022 Jann Horn + Copyright (c) 2023 Sony Corporation / Snild Dolkow Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -4483,15 +4484,15 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, parser->m_processor = entityValueProcessor; return entityValueProcessor(parser, next, end, nextPtr); } - /* If we are at the end of the buffer, this would cause XmlPrologTok to - return XML_TOK_NONE on the next call, which would then cause the - function to exit with *nextPtr set to s - that is what we want for other - tokens, but not for the BOM - we would rather like to skip it; - then, when this routine is entered the next time, XmlPrologTok will - return XML_TOK_INVALID, since the BOM is still in the buffer + /* XmlPrologTok has now set the encoding based on the BOM it found, and we + must move s and nextPtr forward to consume the BOM. + + If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we + would leave the BOM in the buffer and return. On the next call to this + function, our XmlPrologTok call would return XML_TOK_INVALID, since it + is not valid to have multiple BOMs. */ - else if (tok == XML_TOK_BOM && next == end - && ! parser->m_parsingStatus.finalBuffer) { + else if (tok == XML_TOK_BOM) { # ifdef XML_DTD if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, XML_ACCOUNT_DIRECT)) { @@ -4501,7 +4502,7 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, # endif *nextPtr = next; - return XML_ERROR_NONE; + s = next; } /* If we get this token, we have the start of what might be a normal tag, but not a declaration (i.e. it doesn't begin with diff --git a/expat/tests/basic_tests.c b/expat/tests/basic_tests.c index 86045c21..6f7e5a5e 100644 --- a/expat/tests/basic_tests.c +++ b/expat/tests/basic_tests.c @@ -2955,6 +2955,83 @@ START_TEST(test_bad_ignore_section) { } END_TEST +struct bom_testdata { + const char *external; + int split; + XML_Bool nested_callback_happened; +}; + +static int XMLCALL +external_bom_checker(XML_Parser parser, const XML_Char *context, + const XML_Char *base, const XML_Char *systemId, + const XML_Char *publicId) { + const char *text = ""; + UNUSED_P(base); + UNUSED_P(systemId); + UNUSED_P(publicId); + + XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); + if (ext_parser == NULL) + fail("Could not create external entity parser"); + + if (! xcstrcmp(systemId, XCS("004-2.ent"))) { + struct bom_testdata *const testdata + = (struct bom_testdata *)XML_GetUserData(parser); + const char *const external = testdata->external; + const int split = testdata->split; + testdata->nested_callback_happened = XML_TRUE; + + if (XML_Parse(ext_parser, external, split, XML_FALSE) != XML_STATUS_OK) { + xml_failure(ext_parser); + } + text = external + split; // the parse below will continue where we left off. + } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) { + text = "\n" + "\n" + "\n"; + } else { + fail("unknown systemId"); + } + + if (XML_Parse(ext_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK) + xml_failure(ext_parser); + + XML_ParserFree(ext_parser); + return XML_STATUS_OK; +} + +/* regression test: BOM should be consumed when followed by a partial token. */ +START_TEST(test_external_bom_consumed) { + const char *const text = "\n" + "\n"; + const char *const external = "\xEF\xBB\xBF"; + const int len = (int)strlen(external); + for (int split = 0; split <= len; ++split) { + set_subtest("split at byte %d", split); + + struct bom_testdata testdata; + testdata.external = external; + testdata.split = split; + testdata.nested_callback_happened = XML_FALSE; + + XML_Parser parser = XML_ParserCreate(NULL); + if (parser == NULL) { + fail("Couldn't create parser"); + } + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_bom_checker); + XML_SetUserData(parser, &testdata); + if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) + == XML_STATUS_ERROR) + xml_failure(parser); + if (! testdata.nested_callback_happened) { + fail("ref handler not called"); + } + XML_ParserFree(parser); + } +} +END_TEST + /* Test recursive parsing */ START_TEST(test_external_entity_values) { const char *text = "\n" @@ -5047,6 +5124,7 @@ make_basic_test_case(Suite *s) { tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16); tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be); tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section); + tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed); tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values); tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone); tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);