mirror of
https://github.com/libexpat/libexpat.git
synced 2025-04-06 05:34:59 +00:00
Merge pull request #753 from SonyMobile/consume-bom
Fix parse-size-dependent "invalid token" error for external entities that start with a byte order mark
This commit is contained in:
commit
bcdc25b04d
7 changed files with 143 additions and 9 deletions
|
@ -36,6 +36,7 @@
|
|||
Copyright (c) 2022 Samanta Navarro <ferivoz@riseup.net>
|
||||
Copyright (c) 2022 Jeffrey Walton <noloader@gmail.com>
|
||||
Copyright (c) 2022 Jann Horn <jannh@google.com>
|
||||
Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com>
|
||||
Licensed under the MIT license:
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
|
@ -4483,15 +4484,15 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
|
|||
parser->m_processor = entityValueProcessor;
|
||||
return entityValueProcessor(parser, next, end, nextPtr);
|
||||
}
|
||||
/* If we are at the end of the buffer, this would cause XmlPrologTok to
|
||||
return XML_TOK_NONE on the next call, which would then cause the
|
||||
function to exit with *nextPtr set to s - that is what we want for other
|
||||
tokens, but not for the BOM - we would rather like to skip it;
|
||||
then, when this routine is entered the next time, XmlPrologTok will
|
||||
return XML_TOK_INVALID, since the BOM is still in the buffer
|
||||
/* XmlPrologTok has now set the encoding based on the BOM it found, and we
|
||||
must move s and nextPtr forward to consume the BOM.
|
||||
|
||||
If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we
|
||||
would leave the BOM in the buffer and return. On the next call to this
|
||||
function, our XmlPrologTok call would return XML_TOK_INVALID, since it
|
||||
is not valid to have multiple BOMs.
|
||||
*/
|
||||
else if (tok == XML_TOK_BOM && next == end
|
||||
&& ! parser->m_parsingStatus.finalBuffer) {
|
||||
else if (tok == XML_TOK_BOM) {
|
||||
# ifdef XML_DTD
|
||||
if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
|
||||
XML_ACCOUNT_DIRECT)) {
|
||||
|
@ -4501,7 +4502,7 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
|
|||
# endif
|
||||
|
||||
*nextPtr = next;
|
||||
return XML_ERROR_NONE;
|
||||
s = next;
|
||||
}
|
||||
/* If we get this token, we have the start of what might be a
|
||||
normal tag, but not a declaration (i.e. it doesn't begin with
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
|
||||
Copyright (c) 2020 Tim Gates <tim.gates@iress.com>
|
||||
Copyright (c) 2021 Dong-hee Na <donghee.na@python.org>
|
||||
Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com>
|
||||
Licensed under the MIT license:
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
|
@ -384,6 +385,7 @@ START_TEST(test_helper_unsigned_char_to_printable) {
|
|||
// Smoke test
|
||||
unsigned char uc = 0;
|
||||
for (; uc < (unsigned char)-1; uc++) {
|
||||
set_subtest("char %u", (unsigned)uc);
|
||||
const char *const printable = unsignedCharToPrintable(uc);
|
||||
if (printable == NULL)
|
||||
fail("unsignedCharToPrintable returned NULL");
|
||||
|
@ -392,8 +394,10 @@ START_TEST(test_helper_unsigned_char_to_printable) {
|
|||
}
|
||||
|
||||
// Two concrete samples
|
||||
set_subtest("char 'A'");
|
||||
if (strcmp(unsignedCharToPrintable('A'), "A") != 0)
|
||||
fail("unsignedCharToPrintable result mistaken");
|
||||
set_subtest("char '\\'");
|
||||
if (strcmp(unsignedCharToPrintable('\\'), "\\\\") != 0)
|
||||
fail("unsignedCharToPrintable result mistaken");
|
||||
}
|
||||
|
|
|
@ -1212,6 +1212,7 @@ START_TEST(test_ext_entity_invalid_parse) {
|
|||
const ExtFaults *fault = faults;
|
||||
|
||||
for (; fault->parse_text != NULL; fault++) {
|
||||
set_subtest("\"%s\"", fault->parse_text);
|
||||
XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
|
||||
XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
|
||||
XML_SetUserData(g_parser, (void *)fault);
|
||||
|
@ -1282,6 +1283,7 @@ START_TEST(test_dtd_attr_handling) {
|
|||
AttTest *test;
|
||||
|
||||
for (test = attr_data; test->definition != NULL; test++) {
|
||||
set_subtest("%s", test->definition);
|
||||
XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler);
|
||||
XML_SetUserData(g_parser, test);
|
||||
if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog),
|
||||
|
@ -1670,6 +1672,7 @@ START_TEST(test_bad_cdata) {
|
|||
|
||||
size_t i = 0;
|
||||
for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
|
||||
set_subtest("%s", cases[i].text);
|
||||
const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
|
||||
g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
|
||||
const enum XML_Error actualError = XML_GetErrorCode(g_parser);
|
||||
|
@ -1737,6 +1740,7 @@ START_TEST(test_bad_cdata_utf16) {
|
|||
size_t i;
|
||||
|
||||
for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) {
|
||||
set_subtest("case %lu", (long unsigned)(i + 1));
|
||||
enum XML_Status actual_status;
|
||||
enum XML_Error actual_error;
|
||||
|
||||
|
@ -2336,6 +2340,7 @@ START_TEST(test_ext_entity_invalid_suspended_parse) {
|
|||
ExtFaults *fault;
|
||||
|
||||
for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
|
||||
set_subtest("%s", fault->parse_text);
|
||||
XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
|
||||
XML_SetExternalEntityRefHandler(g_parser,
|
||||
external_entity_suspending_faulter);
|
||||
|
@ -2939,6 +2944,7 @@ START_TEST(test_bad_ignore_section) {
|
|||
ExtFaults *fault;
|
||||
|
||||
for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
|
||||
set_subtest("%s", fault->parse_text);
|
||||
XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
|
||||
XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
|
||||
XML_SetUserData(g_parser, fault);
|
||||
|
@ -2949,6 +2955,83 @@ START_TEST(test_bad_ignore_section) {
|
|||
}
|
||||
END_TEST
|
||||
|
||||
struct bom_testdata {
|
||||
const char *external;
|
||||
int split;
|
||||
XML_Bool nested_callback_happened;
|
||||
};
|
||||
|
||||
static int XMLCALL
|
||||
external_bom_checker(XML_Parser parser, const XML_Char *context,
|
||||
const XML_Char *base, const XML_Char *systemId,
|
||||
const XML_Char *publicId) {
|
||||
const char *text = "";
|
||||
UNUSED_P(base);
|
||||
UNUSED_P(systemId);
|
||||
UNUSED_P(publicId);
|
||||
|
||||
XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
|
||||
if (ext_parser == NULL)
|
||||
fail("Could not create external entity parser");
|
||||
|
||||
if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
|
||||
struct bom_testdata *const testdata
|
||||
= (struct bom_testdata *)XML_GetUserData(parser);
|
||||
const char *const external = testdata->external;
|
||||
const int split = testdata->split;
|
||||
testdata->nested_callback_happened = XML_TRUE;
|
||||
|
||||
if (XML_Parse(ext_parser, external, split, XML_FALSE) != XML_STATUS_OK) {
|
||||
xml_failure(ext_parser);
|
||||
}
|
||||
text = external + split; // the parse below will continue where we left off.
|
||||
} else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
|
||||
text = "<!ELEMENT doc EMPTY>\n"
|
||||
"<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
|
||||
"<!ENTITY % e2 '%e1;'>\n";
|
||||
} else {
|
||||
fail("unknown systemId");
|
||||
}
|
||||
|
||||
if (XML_Parse(ext_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
|
||||
xml_failure(ext_parser);
|
||||
|
||||
XML_ParserFree(ext_parser);
|
||||
return XML_STATUS_OK;
|
||||
}
|
||||
|
||||
/* regression test: BOM should be consumed when followed by a partial token. */
|
||||
START_TEST(test_external_bom_consumed) {
|
||||
const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
|
||||
"<doc></doc>\n";
|
||||
const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
|
||||
const int len = (int)strlen(external);
|
||||
for (int split = 0; split <= len; ++split) {
|
||||
set_subtest("split at byte %d", split);
|
||||
|
||||
struct bom_testdata testdata;
|
||||
testdata.external = external;
|
||||
testdata.split = split;
|
||||
testdata.nested_callback_happened = XML_FALSE;
|
||||
|
||||
XML_Parser parser = XML_ParserCreate(NULL);
|
||||
if (parser == NULL) {
|
||||
fail("Couldn't create parser");
|
||||
}
|
||||
XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
|
||||
XML_SetExternalEntityRefHandler(parser, external_bom_checker);
|
||||
XML_SetUserData(parser, &testdata);
|
||||
if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
|
||||
== XML_STATUS_ERROR)
|
||||
xml_failure(parser);
|
||||
if (! testdata.nested_callback_happened) {
|
||||
fail("ref handler not called");
|
||||
}
|
||||
XML_ParserFree(parser);
|
||||
}
|
||||
}
|
||||
END_TEST
|
||||
|
||||
/* Test recursive parsing */
|
||||
START_TEST(test_external_entity_values) {
|
||||
const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
|
||||
|
@ -2982,6 +3065,7 @@ START_TEST(test_external_entity_values) {
|
|||
int i;
|
||||
|
||||
for (i = 0; data_004_2[i].parse_text != NULL; i++) {
|
||||
set_subtest("%s", data_004_2[i].parse_text);
|
||||
XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
|
||||
XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer);
|
||||
XML_SetUserData(g_parser, &data_004_2[i]);
|
||||
|
@ -5040,6 +5124,7 @@ make_basic_test_case(Suite *s) {
|
|||
tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16);
|
||||
tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be);
|
||||
tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section);
|
||||
tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed);
|
||||
tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values);
|
||||
tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone);
|
||||
tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
|
||||
Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it>
|
||||
Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
|
||||
Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com>
|
||||
Licensed under the MIT license:
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
|
@ -41,6 +42,7 @@
|
|||
# undef NDEBUG /* because test suite relies on assert(...) at the moment */
|
||||
#endif
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <setjmp.h>
|
||||
|
@ -137,17 +139,35 @@ srunner_create(Suite *suite) {
|
|||
|
||||
static jmp_buf env;
|
||||
|
||||
#define SUBTEST_LEN (50) // informative, but not too long
|
||||
static char const *_check_current_function = NULL;
|
||||
static char _check_current_subtest[SUBTEST_LEN];
|
||||
static int _check_current_lineno = -1;
|
||||
static char const *_check_current_filename = NULL;
|
||||
|
||||
void
|
||||
_check_set_test_info(char const *function, char const *filename, int lineno) {
|
||||
_check_current_function = function;
|
||||
set_subtest("%s", "");
|
||||
_check_current_lineno = lineno;
|
||||
_check_current_filename = filename;
|
||||
}
|
||||
|
||||
void
|
||||
set_subtest(char const *fmt, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, fmt);
|
||||
vsnprintf(_check_current_subtest, SUBTEST_LEN, fmt, ap);
|
||||
va_end(ap);
|
||||
// replace line feeds with spaces, for nicer error logs
|
||||
for (size_t i = 0; i < SUBTEST_LEN; ++i) {
|
||||
if (_check_current_subtest[i] == '\n') {
|
||||
_check_current_subtest[i] = ' ';
|
||||
}
|
||||
}
|
||||
_check_current_subtest[SUBTEST_LEN - 1] = '\0'; // ensure termination
|
||||
}
|
||||
|
||||
static void
|
||||
handle_success(int verbosity) {
|
||||
if (verbosity >= CK_VERBOSE) {
|
||||
|
@ -159,6 +179,9 @@ static void
|
|||
handle_failure(SRunner *runner, int verbosity, const char *phase_info) {
|
||||
runner->nfailures++;
|
||||
if (verbosity != CK_SILENT) {
|
||||
if (strlen(_check_current_subtest) != 0) {
|
||||
phase_info = _check_current_subtest;
|
||||
}
|
||||
printf("FAIL: %s (%s at %s:%d)\n", _check_current_function, phase_info,
|
||||
_check_current_filename, _check_current_lineno);
|
||||
}
|
||||
|
@ -175,6 +198,7 @@ srunner_run_all(SRunner *runner, int verbosity) {
|
|||
volatile int i;
|
||||
for (i = 0; i < tc->ntests; ++i) {
|
||||
runner->nchecks++;
|
||||
set_subtest("%s", "");
|
||||
|
||||
if (tc->setup != NULL) {
|
||||
/* setup */
|
||||
|
@ -190,6 +214,7 @@ srunner_run_all(SRunner *runner, int verbosity) {
|
|||
continue;
|
||||
}
|
||||
(tc->tests[i])();
|
||||
set_subtest("%s", "");
|
||||
|
||||
/* teardown */
|
||||
if (tc->teardown != NULL) {
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
Copyright (c) 2004-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
|
||||
Copyright (c) 2006-2012 Karl Waclawek <karl@waclawek.net>
|
||||
Copyright (c) 2016-2017 Sebastian Pipping <sebastian@pipping.org>
|
||||
Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com>
|
||||
Licensed under the MIT license:
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
|
@ -59,6 +60,19 @@ extern "C" {
|
|||
# define __func__ __FUNCTION__
|
||||
# endif
|
||||
|
||||
/* PRINTF_LIKE has two effects:
|
||||
1. Make clang's -Wformat-nonliteral stop warning about non-literal format
|
||||
strings in annotated functions' code.
|
||||
2. Make both clang and gcc's -Wformat-nonliteral warn about *callers* of
|
||||
the annotated function that use a non-literal format string.
|
||||
*/
|
||||
# if defined(__GNUC__)
|
||||
# define PRINTF_LIKE(fmtpos, argspos) \
|
||||
__attribute__((format(printf, fmtpos, argspos)))
|
||||
# else
|
||||
# define PRINTF_LIKE(fmtpos, argspos)
|
||||
# endif
|
||||
|
||||
# define START_TEST(testname) \
|
||||
static void testname(void) { \
|
||||
_check_set_test_info(__func__, __FILE__, __LINE__); \
|
||||
|
@ -67,6 +81,8 @@ extern "C" {
|
|||
} \
|
||||
}
|
||||
|
||||
void PRINTF_LIKE(1, 2) set_subtest(char const *fmt, ...);
|
||||
|
||||
# define fail(msg) _fail_unless(0, __FILE__, __LINE__, msg)
|
||||
|
||||
typedef void (*tcase_setup_function)(void);
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
Copyright (c) 2019 David Loffredo <loffredo@steptools.com>
|
||||
Copyright (c) 2020 Tim Gates <tim.gates@iress.com>
|
||||
Copyright (c) 2021 Dong-hee Na <donghee.na@python.org>
|
||||
Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com>
|
||||
Licensed under the MIT license:
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
|
@ -328,6 +329,7 @@ START_TEST(test_misc_deny_internal_entity_closing_doctype_issue_317) {
|
|||
size_t inputIndex = 0;
|
||||
|
||||
for (; inputIndex < sizeof(inputs) / sizeof(inputs[0]); inputIndex++) {
|
||||
set_subtest("%s", inputs[inputIndex]);
|
||||
XML_Parser parser;
|
||||
enum XML_Status parseResult;
|
||||
int setParamEntityResult;
|
||||
|
|
|
@ -694,6 +694,7 @@ START_TEST(test_ns_separator_in_uri) {
|
|||
size_t i = 0;
|
||||
size_t failCount = 0;
|
||||
for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
|
||||
set_subtest("%s", cases[i].doc);
|
||||
XML_Parser parser = XML_ParserCreateNS(NULL, cases[i].namesep);
|
||||
XML_SetElementHandler(parser, dummy_start_element, dummy_end_element);
|
||||
if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc),
|
||||
|
|
Loading…
Add table
Reference in a new issue