mirror of
https://github.com/libexpat/libexpat.git
synced 2025-04-05 05:05:00 +00:00
Add app setting for enabling/disabling reparse heuristic
Suggested-by: Sebastian Pipping <sebastian@pipping.org> CI-fighting-assistance-by: Sebastian Pipping <sebastian@pipping.org>
This commit is contained in:
parent
09957b8ced
commit
1d3162da8a
6 changed files with 196 additions and 7 deletions
1
.github/workflows/data/exported-symbols.txt
vendored
1
.github/workflows/data/exported-symbols.txt
vendored
|
@ -53,6 +53,7 @@ XML_SetNotationDeclHandler
|
|||
XML_SetNotStandaloneHandler
|
||||
XML_SetParamEntityParsing
|
||||
XML_SetProcessingInstructionHandler
|
||||
XML_SetReparseDeferralEnabled
|
||||
XML_SetReturnNSTriplet
|
||||
XML_SetSkippedEntityHandler
|
||||
XML_SetStartCdataSectionHandler
|
||||
|
|
|
@ -152,10 +152,11 @@ interface.</p>
|
|||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
<a href="#billion-laughs">Billion Laughs Attack Protection</a>
|
||||
<a href="#attack-protection">Attack Protection</a>
|
||||
<ul>
|
||||
<li><a href="#XML_SetBillionLaughsAttackProtectionMaximumAmplification">XML_SetBillionLaughsAttackProtectionMaximumAmplification</a></li>
|
||||
<li><a href="#XML_SetBillionLaughsAttackProtectionActivationThreshold">XML_SetBillionLaughsAttackProtectionActivationThreshold</a></li>
|
||||
<li><a href="#XML_SetReparseDeferralEnabled">XML_SetReparseDeferralEnabled</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><a href="#miscellaneous">Miscellaneous Functions</a>
|
||||
|
@ -2167,11 +2168,7 @@ parse position may be before the beginning of the buffer.</p>
|
|||
return <code>NULL</code>.</p>
|
||||
</div>
|
||||
|
||||
<h3><a name="billion-laughs">Billion Laughs Attack Protection</a></h3>
|
||||
|
||||
<p>The functions in this section configure the built-in
|
||||
protection against various forms of
|
||||
<a href="https://en.wikipedia.org/wiki/Billion_laughs_attack">billion laughs attacks</a>.</p>
|
||||
<h3><a name="attack-protection">Attack Protection</a><a name="billion-laughs"></a></h3>
|
||||
|
||||
<h4 id="XML_SetBillionLaughsAttackProtectionMaximumAmplification">XML_SetBillionLaughsAttackProtectionMaximumAmplification</h4>
|
||||
<pre class="fcndec">
|
||||
|
@ -2259,6 +2256,27 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser p,
|
|||
</p>
|
||||
</div>
|
||||
|
||||
<h4 id="XML_SetReparseDeferralEnabled">XML_SetReparseDeferralEnabled</h4>
|
||||
<pre class="fcndec">
|
||||
/* Added in Expat 2.6.0. */
|
||||
XML_Bool XMLCALL
|
||||
XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled);
|
||||
</pre>
|
||||
<div class="fcndef">
|
||||
<p>
|
||||
Large tokens may require many parse calls before enough data is available for Expat to parse it in full.
|
||||
If Expat retried parsing the token on every parse call, parsing could take quadratic time.
|
||||
To avoid this, Expat only retries once a significant amount of new data is available.
|
||||
This function allows disabling this behavior.
|
||||
</p>
|
||||
<p>
|
||||
The <code>enabled</code> argument should be <code>XML_TRUE</code> or <code>XML_FALSE</code>.
|
||||
</p>
|
||||
<p>
|
||||
Returns <code>XML_TRUE</code> on success, and <code>XML_FALSE</code> on error.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<h3><a name="miscellaneous">Miscellaneous functions</a></h3>
|
||||
|
||||
<p>The functions in this section either obtain state information from
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de>
|
||||
Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
|
||||
Copyright (c) 2022 Thijs Schreijer <thijs@thijsschreijer.nl>
|
||||
Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com>
|
||||
Licensed under the MIT license:
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
|
@ -1054,6 +1055,10 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold(
|
|||
XML_Parser parser, unsigned long long activationThresholdBytes);
|
||||
#endif
|
||||
|
||||
/* Added in Expat 2.6.0. */
|
||||
XMLPARSEAPI(XML_Bool)
|
||||
XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled);
|
||||
|
||||
/* Expat follows the semantic versioning convention.
|
||||
See https://semver.org
|
||||
*/
|
||||
|
|
|
@ -77,3 +77,5 @@ EXPORTS
|
|||
; added with version 2.4.0
|
||||
@_EXPAT_COMMENT_DTD_OR_GE@ XML_SetBillionLaughsAttackProtectionActivationThreshold @69
|
||||
@_EXPAT_COMMENT_DTD_OR_GE@ XML_SetBillionLaughsAttackProtectionMaximumAmplification @70
|
||||
; added with version 2.6.0
|
||||
XML_SetReparseDeferralEnabled @71
|
||||
|
|
|
@ -651,6 +651,7 @@ struct XML_ParserStruct {
|
|||
XML_Index m_parseEndByteIndex;
|
||||
const char *m_parseEndPtr;
|
||||
size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
|
||||
XML_Bool m_reparseDeferralEnabled;
|
||||
XML_Char *m_dataBuf;
|
||||
XML_Char *m_dataBufEnd;
|
||||
XML_StartElementHandler m_startElementHandler;
|
||||
|
@ -987,7 +988,7 @@ callProcessor(XML_Parser parser, const char *start, const char *end,
|
|||
const char **endPtr) {
|
||||
const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
|
||||
|
||||
if (g_reparseDeferralEnabledDefault
|
||||
if (parser->m_reparseDeferralEnabled
|
||||
&& ! parser->m_parsingStatus.finalBuffer) {
|
||||
// Heuristic: don't try to parse a partial token again until the amount of
|
||||
// available data has increased significantly.
|
||||
|
@ -1193,6 +1194,7 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) {
|
|||
parser->m_parseEndByteIndex = 0;
|
||||
parser->m_parseEndPtr = NULL;
|
||||
parser->m_partialTokenBytesBefore = 0;
|
||||
parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
|
||||
parser->m_declElementType = NULL;
|
||||
parser->m_declAttributeId = NULL;
|
||||
parser->m_declEntity = NULL;
|
||||
|
@ -2617,6 +2619,15 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold(
|
|||
}
|
||||
#endif /* XML_GE == 1 */
|
||||
|
||||
XML_Bool XMLCALL
|
||||
XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
|
||||
if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
|
||||
parser->m_reparseDeferralEnabled = enabled;
|
||||
return XML_TRUE;
|
||||
}
|
||||
return XML_FALSE;
|
||||
}
|
||||
|
||||
/* Initially tag->rawName always points into the parse buffer;
|
||||
for those TAG instances opened while the current parse buffer was
|
||||
processed, and not yet closed, we need to store tag->rawName in a more
|
||||
|
|
|
@ -5304,6 +5304,154 @@ START_TEST(test_big_tokens_take_linear_time) {
|
|||
}
|
||||
END_TEST
|
||||
|
||||
START_TEST(test_set_reparse_deferral) {
|
||||
const char *const pre = "<d>";
|
||||
const char *const start = "<x attr='";
|
||||
const char *const end = "'></x>";
|
||||
char eeeeee[100];
|
||||
const int fillsize = (int)sizeof(eeeeee);
|
||||
memset(eeeeee, 'e', fillsize);
|
||||
|
||||
for (int enabled = 0; enabled <= 1; enabled += 1) {
|
||||
set_subtest("deferral=%d", enabled);
|
||||
|
||||
XML_Parser parser = XML_ParserCreate(NULL);
|
||||
assert_true(parser != NULL);
|
||||
assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
|
||||
|
||||
CharData storage;
|
||||
CharData_Init(&storage);
|
||||
XML_SetUserData(parser, &storage);
|
||||
XML_SetStartElementHandler(parser, start_element_event_handler);
|
||||
|
||||
enum XML_Status status;
|
||||
// parse the start text
|
||||
status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
|
||||
if (status != XML_STATUS_OK) {
|
||||
xml_failure(parser);
|
||||
}
|
||||
CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
|
||||
|
||||
// ..and the start of the token
|
||||
status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
|
||||
if (status != XML_STATUS_OK) {
|
||||
xml_failure(parser);
|
||||
}
|
||||
CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
|
||||
|
||||
// try to parse lots of 'e', but the token isn't finished
|
||||
for (int c = 0; c < 100; ++c) {
|
||||
status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
|
||||
if (status != XML_STATUS_OK) {
|
||||
xml_failure(parser);
|
||||
}
|
||||
}
|
||||
CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
|
||||
|
||||
// end the <x> token.
|
||||
status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
|
||||
if (status != XML_STATUS_OK) {
|
||||
xml_failure(parser);
|
||||
}
|
||||
|
||||
if (enabled) {
|
||||
// In general, we may need to push more data to trigger a reparse attempt,
|
||||
// but in this test, the data is constructed to always require it.
|
||||
CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
|
||||
// 2x the token length should suffice; the +1 covers the start and end.
|
||||
for (int c = 0; c < 101; ++c) {
|
||||
status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
|
||||
if (status != XML_STATUS_OK) {
|
||||
xml_failure(parser);
|
||||
}
|
||||
}
|
||||
}
|
||||
CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
|
||||
|
||||
XML_ParserFree(parser);
|
||||
}
|
||||
}
|
||||
END_TEST
|
||||
|
||||
START_TEST(test_set_reparse_deferral_on_null_parser) {
|
||||
assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
|
||||
assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
|
||||
assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
|
||||
assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
|
||||
assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
|
||||
== XML_FALSE);
|
||||
assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
|
||||
== XML_FALSE);
|
||||
}
|
||||
END_TEST
|
||||
|
||||
START_TEST(test_set_reparse_deferral_on_the_fly) {
|
||||
const char *const pre = "<d><x attr='";
|
||||
const char *const end = "'></x";
|
||||
const char *const post = ">";
|
||||
char iiiiii[100];
|
||||
const int fillsize = (int)sizeof(iiiiii);
|
||||
memset(iiiiii, 'i', fillsize);
|
||||
|
||||
XML_Parser parser = XML_ParserCreate(NULL);
|
||||
assert_true(parser != NULL);
|
||||
assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
|
||||
|
||||
CharData storage;
|
||||
CharData_Init(&storage);
|
||||
XML_SetUserData(parser, &storage);
|
||||
XML_SetStartElementHandler(parser, start_element_event_handler);
|
||||
|
||||
enum XML_Status status;
|
||||
// parse the start text
|
||||
status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
|
||||
if (status != XML_STATUS_OK) {
|
||||
xml_failure(parser);
|
||||
}
|
||||
CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
|
||||
|
||||
// try to parse some 'i', but the token isn't finished
|
||||
status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
|
||||
if (status != XML_STATUS_OK) {
|
||||
xml_failure(parser);
|
||||
}
|
||||
CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
|
||||
|
||||
// end the <x> token.
|
||||
status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
|
||||
if (status != XML_STATUS_OK) {
|
||||
xml_failure(parser);
|
||||
}
|
||||
CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
|
||||
|
||||
// now change the heuristic setting and add *no* data
|
||||
assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
|
||||
// we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
|
||||
status = XML_Parse(parser, post, (int)strlen(post), XML_FALSE);
|
||||
if (status != XML_STATUS_OK) {
|
||||
xml_failure(parser);
|
||||
}
|
||||
CharData_CheckXMLChars(&storage, XCS("dx"));
|
||||
|
||||
XML_ParserFree(parser);
|
||||
}
|
||||
END_TEST
|
||||
|
||||
START_TEST(test_set_bad_reparse_option) {
|
||||
XML_Parser parser = XML_ParserCreate(NULL);
|
||||
assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
|
||||
assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
|
||||
assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
|
||||
assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
|
||||
assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
|
||||
assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
|
||||
assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
|
||||
assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
|
||||
assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
|
||||
XML_ParserFree(parser);
|
||||
}
|
||||
END_TEST
|
||||
|
||||
void
|
||||
make_basic_test_case(Suite *s) {
|
||||
TCase *tc_basic = tcase_create("basic tests");
|
||||
|
@ -5545,4 +5693,8 @@ make_basic_test_case(Suite *s) {
|
|||
test_pool_integrity_with_unfinished_attr);
|
||||
tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
|
||||
tcase_add_test(tc_basic, test_big_tokens_take_linear_time);
|
||||
tcase_add_test(tc_basic, test_set_reparse_deferral);
|
||||
tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
|
||||
tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
|
||||
tcase_add_test(tc_basic, test_set_bad_reparse_option);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue