From 72cdab2aebfb321e8127aadef57421206af78389 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sun, 21 Mar 2021 14:40:06 +0100 Subject: [PATCH 1/4] Revert "runtests.c: Workaround issue with combining XML_MIN_SIZE and ASan (#332)" This reverts commit 48d4929bffdd60c88b913b58365062c92b789363. --- expat/tests/runtests.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/expat/tests/runtests.c b/expat/tests/runtests.c index 90245607..ceb10545 100644 --- a/expat/tests/runtests.c +++ b/expat/tests/runtests.c @@ -11301,14 +11301,10 @@ make_suite(void) { tcase_add_test(tc_basic, test_good_cdata_utf16); tcase_add_test(tc_basic, test_good_cdata_utf16_le); tcase_add_test(tc_basic, test_long_cdata_utf16); -#ifndef XML_MIN_SIZE /* FIXME workaround -DXML_MIN_SIZE + ASan (issue #332) */ tcase_add_test(tc_basic, test_multichar_cdata_utf16); -#endif tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair); tcase_add_test(tc_basic, test_bad_cdata); -#ifndef XML_MIN_SIZE /* FIXME workaround -DXML_MIN_SIZE + ASan (issue #332) */ tcase_add_test(tc_basic, test_bad_cdata_utf16); -#endif tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls); tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls); tcase_add_test(tc_basic, test_memory_allocation); From 841338da7ff8168ce57355809c79a0dbc9cf9f3f Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sun, 21 Mar 2021 14:41:31 +0100 Subject: [PATCH 2/4] Revert "runtests.c: Fix compiler warning "unused function" with -DEXPAT_MIN_SIZE=ON" This reverts commit 76f2beee810aa0a6bee3fe63e33fa170961e565d. --- expat/tests/runtests.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/expat/tests/runtests.c b/expat/tests/runtests.c index ceb10545..51646152 100644 --- a/expat/tests/runtests.c +++ b/expat/tests/runtests.c @@ -2245,7 +2245,6 @@ START_TEST(test_long_cdata_utf16) { END_TEST /* Test handling of multiple unit UTF-16 characters */ -#ifndef XML_MIN_SIZE /* FIXME workaround -DXML_MIN_SIZE + ASan (issue #332) */ START_TEST(test_multichar_cdata_utf16) { /* Test data is: * @@ -2267,11 +2266,11 @@ START_TEST(test_multichar_cdata_utf16) { "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f" "\0]\0]\0>\0<\0/\0a\0>"; -# ifdef XML_UNICODE +#ifdef XML_UNICODE const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f"); -# else +#else const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f"); -# endif +#endif CharData storage; CharData_Init(&storage); @@ -2284,7 +2283,6 @@ START_TEST(test_multichar_cdata_utf16) { CharData_CheckXMLChars(&storage, expected); } END_TEST -#endif /* ifndef XML_MIN_SIZE */ /* Test that an element name with a UTF-16 surrogate pair is rejected */ START_TEST(test_utf16_bad_surrogate_pair) { @@ -2369,7 +2367,6 @@ START_TEST(test_bad_cdata) { END_TEST /* Test failures in UTF-16 CDATA */ -#ifndef XML_MIN_SIZE /* FIXME workaround -DXML_MIN_SIZE + ASan (issue #332) */ START_TEST(test_bad_cdata_utf16) { struct CaseData { size_t text_bytes; @@ -2442,7 +2439,6 @@ START_TEST(test_bad_cdata_utf16) { } } END_TEST -#endif /* ifndef XML_MIN_SIZE */ static const char *long_cdata_text = " Date: Mon, 26 Apr 2021 14:11:53 +0200 Subject: [PATCH 3/4] lib: Add comments about effect of XML_MIN_SIZE to xmltok_impl.c --- expat/lib/xmltok_impl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/expat/lib/xmltok_impl.c b/expat/lib/xmltok_impl.c index 06d5c908..cc0d319a 100644 --- a/expat/lib/xmltok_impl.c +++ b/expat/lib/xmltok_impl.c @@ -1,4 +1,4 @@ -/* This file is included! +/* This file is included (from xmltok.c, 1-3 times depending on XML_MIN_SIZE)! __ __ _ ___\ \/ /_ __ __ _| |_ / _ \\ /| '_ \ / _` | __| @@ -32,7 +32,7 @@ #ifdef XML_TOK_IMPL_C -# ifndef IS_INVALID_CHAR +# ifndef IS_INVALID_CHAR // i.e. for UTF-16 and XML_MIN_SIZE not defined # define IS_INVALID_CHAR(enc, ptr, n) (0) # endif From ed36812db2017e8a68eb7825ecd8dd2bc89cd2e5 Mon Sep 17 00:00:00 2001 From: Sebastian Pipping Date: Sat, 24 Apr 2021 21:13:24 +0200 Subject: [PATCH 4/4] lib: Fix macro IS_INVALID_CHAR (for UTF-16 with macro XML_MIN_SIZE defined) What happens is that with macro XML_MIN_SIZE defined, for UTF-16 macro IS_INVALID_CHAR was being set to .. > #define IS_INVALID_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p)) .. which calls NULL pointers in .isInvalid{2,3,4} at runtime. For UTF-16 we actually need what xmltok_impl.c does for macro IS_INVALID_CHAR when it has not yet been defined: > # ifndef IS_INVALID_CHAR > # define IS_INVALID_CHAR(enc, ptr, n) (0) > # endif So the fix is a combination of these two: - Use .isInvalid{2,3,4} where needed and available and - return 0/false for UTF-16 where .isInvalid{2,3,4} are NULL. --- expat/Changes | 8 ++++++++ expat/lib/xmltok.c | 10 ++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/expat/Changes b/expat/Changes index 40b716d9..69c39f37 100644 --- a/expat/Changes +++ b/expat/Changes @@ -3,12 +3,20 @@ NOTE: We are looking for help with a few things: If you can help, please get in touch. Thanks! Release X.X.X XXX XXXXX XX XXXX + Bug fixes: + #332 #470 For (non-default) compilation with -DEXPAT_MIN_SIZE=ON (CMake) + or CPPFLAGS=-DXML_MIN_SIZE (GNU Autotools): Fix segfault + for UTF-16 payloads containing CDATA sections. + Other changes: #457 Unexpose symbol _INTERNAL_trim_to_complete_utf8_characters #458 #459 CMake: Support absolute paths for both CMAKE_INSTALL_LIBDIR and CMAKE_INSTALL_INCLUDEDIR #468 #469 xmlwf: Improve help output and the xmlwf man page + Special thanks to: + Dimitry Andric + Release 2.3.0 Thu March 25 2021 Bug fixes: #438 When calling XML_ParseBuffer without a prior successful call to diff --git a/expat/lib/xmltok.c b/expat/lib/xmltok.c index d9474240..7759ffb6 100644 --- a/expat/lib/xmltok.c +++ b/expat/lib/xmltok.c @@ -259,8 +259,14 @@ sb_byteToAscii(const ENCODING *enc, const char *p) { #define IS_NAME_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isName##n(enc, p)) #define IS_NMSTRT_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isNmstrt##n(enc, p)) -#define IS_INVALID_CHAR(enc, p, n) \ - (AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p)) +#ifdef XML_MIN_SIZE +# define IS_INVALID_CHAR(enc, p, n) \ + (AS_NORMAL_ENCODING(enc)->isInvalid##n \ + && AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p)) +#else +# define IS_INVALID_CHAR(enc, p, n) \ + (AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p)) +#endif #ifdef XML_MIN_SIZE # define IS_NAME_CHAR_MINBPC(enc, p) \