Merge pull request #466 from libexpat/protect-against-billion-laughs-attacks

[CVE-2013-0340, CWE-776] Protect against billion laughs attacks (fixes #34)
This commit is contained in:
Sebastian Pipping 2021-05-11 14:53:22 +02:00 committed by GitHub
commit 309cd4aa4b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 1817 additions and 62 deletions

View file

@ -29,6 +29,8 @@ XML_ParserReset
XML_ResumeParser
XML_SetAttlistDeclHandler
XML_SetBase
XML_SetBillionLaughsAttackProtectionActivationThreshold
XML_SetBillionLaughsAttackProtectionMaximumAmplification
XML_SetCdataSectionHandler
XML_SetCharacterDataHandler
XML_SetCommentHandler

View file

@ -44,6 +44,7 @@ cppcheck_args=(
--error-exitcode=1
--force
--suppress=objectIndex
--suppress=unknownMacro
)
find_args=(

View file

@ -248,6 +248,10 @@ if(FLAG_VISIBILITY)
add_definitions(-DXML_ENABLE_VISIBILITY=1)
set(EXTRA_COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -fvisibility=hidden")
endif()
if(MINGW)
# Without __USE_MINGW_ANSI_STDIO the compiler produces a false positive
set(EXTRA_COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -Wno-pedantic-ms-format")
endif()
if (EXPAT_WARNINGS_AS_ERRORS)
if(MSVC)
add_definitions(/WX)

View file

@ -3,11 +3,40 @@ NOTE: We are looking for help with a few things:
If you can help, please get in touch. Thanks!
Release X.X.X XXX XXXXX XX XXXX
Security fixes:
#34 #466 CVE-2013-0340/CWE-776 -- Protect against billion laughs attacks
(denial-of-service; flavors targeting CPU time or RAM or both,
leveraging general entities or parameter entities or both)
by tracking and limiting the input amplification factor
(<amplification> := (<direct> + <indirect>) / <direct>).
By conservative default, amplification up to a factor of 100.0
is tolerated and rejection only starts after 8 MiB of output bytes
(=<direct> + <indirect>) have been processed.
A new error code XML_ERROR_AMPLIFICATION_LIMIT_BREACH signals
this condition.
Bug fixes:
#332 #470 For (non-default) compilation with -DEXPAT_MIN_SIZE=ON (CMake)
or CPPFLAGS=-DXML_MIN_SIZE (GNU Autotools): Fix segfault
for UTF-16 payloads containing CDATA sections.
New features:
#34 #466 Add two new API functions to further tighten billion laughs
protection parameters when desired.
- XML_SetBillionLaughsAttackProtectionMaximumAmplification
- XML_SetBillionLaughsAttackProtectionActivationThreshold
Please see file "doc/reference.html" for more details.
If you ever need to increase the defaults for non-attack XML
payload, please file a bug report with libexpat.
#34 #466 Introduce environment switches EXPAT_ACCOUNTING_DEBUG=(0|1|2|3)
and EXPAT_ENTITY_DEBUG=(0|1) for runtime debugging of accounting
and entity processing; specific behavior of these values may
change in the future.
#34 #466 xmlwf: Add arguments "-a FACTOR" and "-b BYTES" to further tighten
billion laughs protection parameters when desired.
If you ever need to increase the defaults for non-attack XML
payload, please file a bug report with libexpat.
Other changes:
#457 Unexpose symbol _INTERNAL_trim_to_complete_utf8_characters
#458 #459 CMake: Support absolute paths for both CMAKE_INSTALL_LIBDIR
@ -16,6 +45,11 @@ Release X.X.X XXX XXXXX XX XXXX
Special thanks to:
Dimitry Andric
Nick Wellnhofer
Yury Gribov
and
Clang LeakSan
JetBrains
Release 2.3.0 Thu March 25 2021
Bug fixes:

View file

@ -111,7 +111,7 @@ AS_IF([test "$GCC" = yes],
AX_APPEND_COMPILE_FLAGS([-fno-strict-aliasing -Wmissing-prototypes -Wstrict-prototypes], [AM_CFLAGS])
AX_APPEND_COMPILE_FLAGS([-pedantic -Wduplicated-cond -Wduplicated-branches -Wlogical-op], [AM_CFLAGS])
AX_APPEND_COMPILE_FLAGS([-Wrestrict -Wnull-dereference -Wjump-misses-init -Wdouble-promotion], [AM_CFLAGS])
AX_APPEND_COMPILE_FLAGS([-Wshadow -Wformat=2 -Wmisleading-indentation], [AM_CFLAGS])])
AX_APPEND_COMPILE_FLAGS([-Wshadow -Wformat=2 -Wno-pedantic-ms-format -Wmisleading-indentation], [AM_CFLAGS])])
AC_LANG_PUSH([C++])
AC_PROG_CXX

View file

@ -148,6 +148,13 @@ interface.</p>
<li><a href="#XML_GetInputContext">XML_GetInputContext</a></li>
</ul>
</li>
<li>
<a href="#billion-laughs">Billion Laughs Attack Protection</a>
<ul>
<li><a href="#XML_SetBillionLaughsAttackProtectionMaximumAmplification">XML_SetBillionLaughsAttackProtectionMaximumAmplification</a></li>
<li><a href="#XML_SetBillionLaughsAttackProtectionActivationThreshold">XML_SetBillionLaughsAttackProtectionActivationThreshold</a></li>
</ul>
</li>
<li><a href="#miscellaneous">Miscellaneous Functions</a>
<ul>
<li><a href="#XML_SetUserData">XML_SetUserData</a></li>
@ -2073,6 +2080,98 @@ parse position may be before the beginning of the buffer.</p>
return NULL.</p>
</div>
<h3><a name="billion-laughs">Billion Laughs Attack Protection</a></h3>
<p>The functions in this section configure the built-in
protection against various forms of
<a href="https://en.wikipedia.org/wiki/Billion_laughs_attack">billion laughs attacks</a>.</p>
<h4 id="XML_SetBillionLaughsAttackProtectionMaximumAmplification">XML_SetBillionLaughsAttackProtectionMaximumAmplification</h4>
<pre class="fcndec">
/* Added in Expat 2.4.0. */
XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionMaximumAmplification(XML_Parser p,
float maximumAmplificationFactor);
</pre>
<div class="fcndef">
<p>
Sets the maximum tolerated amplification factor
for protection against
<a href="https://en.wikipedia.org/wiki/Billion_laughs_attack">billion laughs attacks</a>
(default: <code>100.0</code>)
of parser <code>p</code> to <code>maximumAmplificationFactor</code>, and
returns <code>XML_TRUE</code> upon success and <code>XML_TRUE</code> upon error.
</p>
The amplification factor is calculated as ..
<pre>
amplification := (direct + indirect) / direct
</pre>
.. while parsing, whereas
<code>direct</code> is the number of bytes read from the primary document in parsing and
<code>indirect</code> is the number of bytes added by expanding entities and reading of external DTD files, combined.
<p>For a call to <code>XML_SetBillionLaughsAttackProtectionMaximumAmplification</code> to succeed:</p>
<ul>
<li>parser <code>p</code> must be a non-<code>NULL</code> root parser (without any parent parsers) and</li>
<li><code>maximumAmplificationFactor</code> must be non-<code>NaN</code> and greater than or equal to <code>1.0</code>.</li>
</ul>
<p>
<strong>Note:</strong>
If you ever need to increase this value for non-attack payload,
please <a href="https://github.com/libexpat/libexpat/issues">file a bug report</a>.
</p>
<p>
<strong>Note:</strong>
Peak amplifications
of factor 15,000 for the entire payload and
of factor 30,000 in the middle of parsing
have been observed with small benign files in practice.
So if you do reduce the maximum allowed amplification,
please make sure that the activation threshold is still big enough
to not end up with undesired false positives (i.e. benign files being rejected).
</p>
</div>
<h4 id="XML_SetBillionLaughsAttackProtectionActivationThreshold">XML_SetBillionLaughsAttackProtectionActivationThreshold</h4>
<pre class="fcndec">
/* Added in Expat 2.4.0. */
XML_Bool XMLCALL
XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser p,
unsigned long long activationThresholdBytes);
</pre>
<div class="fcndef">
<p>
Sets number of output bytes (including amplification from entity expansion and reading DTD files)
needed to activate protection against
<a href="https://en.wikipedia.org/wiki/Billion_laughs_attack">billion laughs attacks</a>
(default: <code>8 MiB</code>)
of parser <code>p</code> to <code>activationThresholdBytes</code>, and
returns <code>XML_TRUE</code> upon success and <code>XML_TRUE</code> upon error.
</p>
<p>For a call to <code>XML_SetBillionLaughsAttackProtectionActivationThreshold</code> to succeed:</p>
<ul>
<li>parser <code>p</code> must be a non-<code>NULL</code> root parser (without any parent parsers).</li>
</ul>
<p>
<strong>Note:</strong>
If you ever need to increase this value for non-attack payload,
please <a href="https://github.com/libexpat/libexpat/issues">file a bug report</a>.
</p>
<p>
<strong>Note:</strong>
Activation thresholds below 4 MiB are known to break support for
<a href="https://en.wikipedia.org/wiki/Darwin_Information_Typing_Architecture">DITA</a> 1.3 payload
and are hence not recommended.
</p>
</div>
<h3><a name="miscellaneous">Miscellaneous functions</a></h3>
<p>The functions in this section either obtain state information from

View file

@ -4,7 +4,7 @@
<!ENTITY dhfirstname "<firstname>Scott</firstname>">
<!ENTITY dhsurname "<surname>Bronson</surname>">
<!-- Please adjust the date whenever revising the manpage. -->
<!ENTITY dhdate "<date>April 25, 2021</date>">
<!ENTITY dhdate "<date>May 4, 2021</date>">
<!-- SECTION should be 1-8, maybe w/ subsection other parameters are
allowed: see man(7), man(1). -->
<!ENTITY dhsection "<manvolnum>1</manvolnum>">
@ -131,6 +131,50 @@ supports both.
<variablelist>
<varlistentry>
<term><option>-a</option> <replaceable>factor</replaceable></term>
<listitem>
<para>
Sets the maximum tolerated amplification factor
for protection against billion laughs attacks (default: 100.0).
The amplification factor is calculated as ..
</para>
<literallayout>
amplification := (direct + indirect) / direct
</literallayout>
<para>
.. while parsing, whereas
&lt;direct&gt; is the number of bytes read
from the primary document in parsing and
&lt;indirect&gt; is the number of bytes
added by expanding entities and reading of external DTD files,
combined.
</para>
<para>
<emphasis>NOTE</emphasis>:
If you ever need to increase this value for non-attack payload,
please file a bug report.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>-b</option> <replaceable>bytes</replaceable></term>
<listitem>
<para>
Sets the number of output bytes (including amplification)
needed to activate protection against billion laughs attacks
(default: 8 MiB).
This can be thought of as an &quot;activation threshold&quot;.
</para>
<para>
<emphasis>NOTE</emphasis>:
If you ever need to increase this value for non-attack payload,
please file a bug report.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>-c</option></term>
<listitem>
@ -458,6 +502,7 @@ supports both.
<literallayout>
The Expat home page: https://libexpat.github.io/
The W3 XML 1.0 specification (fourth edition): https://www.w3.org/TR/2006/REC-xml-20060816/
Billion laughs attack: https://en.wikipedia.org/wiki/Billion_laughs_attack
</literallayout>
</para>

View file

@ -124,7 +124,9 @@ enum XML_Error {
/* Added in 2.2.1. */
XML_ERROR_INVALID_ARGUMENT,
/* Added in 2.3.0. */
XML_ERROR_NO_BUFFER
XML_ERROR_NO_BUFFER,
/* Added in 2.4.0. */
XML_ERROR_AMPLIFICATION_LIMIT_BREACH
};
enum XML_Content_Type {
@ -1006,7 +1008,10 @@ enum XML_FeatureEnum {
XML_FEATURE_SIZEOF_XML_LCHAR,
XML_FEATURE_NS,
XML_FEATURE_LARGE_SIZE,
XML_FEATURE_ATTR_INFO
XML_FEATURE_ATTR_INFO,
/* Added in Expat 2.4.0. */
XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT,
XML_FEATURE_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT
/* Additional features must be added to the end of this enum. */
};
@ -1019,6 +1024,18 @@ typedef struct {
XMLPARSEAPI(const XML_Feature *)
XML_GetFeatureList(void);
#ifdef XML_DTD
/* Added in Expat 2.4.0. */
XMLPARSEAPI(XML_Bool)
XML_SetBillionLaughsAttackProtectionMaximumAmplification(
XML_Parser parser, float maximumAmplificationFactor);
/* Added in Expat 2.4.0. */
XMLPARSEAPI(XML_Bool)
XML_SetBillionLaughsAttackProtectionActivationThreshold(
XML_Parser parser, unsigned long long activationThresholdBytes);
#endif
/* Expat follows the semantic versioning convention.
See http://semver.org.
*/

View file

@ -105,10 +105,46 @@
# endif
#endif
#include <limits.h> // ULONG_MAX
#if defined(_WIN32) && ! defined(__USE_MINGW_ANSI_STDIO)
# define EXPAT_FMT_ULL(midpart) "%" midpart "I64u"
# if defined(_WIN64) // Note: modifiers "td" and "zu" do not work for MinGW
# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "I64d"
# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "I64u"
# else
# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "d"
# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "u"
# endif
#else
# define EXPAT_FMT_ULL(midpart) "%" midpart "llu"
# if ! defined(ULONG_MAX)
# error Compiler did not define ULONG_MAX for us
# elif ULONG_MAX == 18446744073709551615u // 2^64-1
# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "ld"
# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "lu"
# else
# define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "d"
# define EXPAT_FMT_SIZE_T(midpart) "%" midpart "u"
# endif
#endif
#ifndef UNUSED_P
# define UNUSED_P(p) (void)p
#endif
/* NOTE BEGIN If you ever patch these defaults to greater values
for non-attack XML payload in your environment,
please file a bug report with libexpat. Thank you!
*/
#define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_MAXIMUM_AMPLIFICATION_DEFAULT \
100.0f
#define EXPAT_BILLION_LAUGHS_ATTACK_PROTECTION_ACTIVATION_THRESHOLD_DEFAULT \
8388608 // 8 MiB, 2^23
/* NOTE END */
#include "expat.h" // so we can use type XML_Parser below
#ifdef __cplusplus
extern "C" {
#endif
@ -116,6 +152,12 @@ extern "C" {
void _INTERNAL_trim_to_complete_utf8_characters(const char *from,
const char **fromLimRef);
#if defined(XML_DTD)
unsigned long long testingAccountingGetCountBytesDirect(XML_Parser parser);
unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser);
const char *unsignedCharToPrintable(unsigned char c);
#endif
#ifdef __cplusplus
}
#endif

View file

@ -75,3 +75,6 @@ EXPORTS
; XML_GetAttributeInfo @66
XML_SetHashSalt @67
; internal @68 removed with version 2.3.1
; added with version 2.4.0
XML_SetBillionLaughsAttackProtectionActivationThreshold @69
XML_SetBillionLaughsAttackProtectionMaximumAmplification @70

View file

@ -75,3 +75,6 @@ EXPORTS
; XML_GetAttributeInfo @66
XML_SetHashSalt @67
; internal @68 removed with version 2.3.1
; added with version 2.4.0
XML_SetBillionLaughsAttackProtectionActivationThreshold @69
XML_SetBillionLaughsAttackProtectionMaximumAmplification @70

File diff suppressed because it is too large Load diff

View file

@ -53,6 +53,7 @@
#include <ctype.h>
#include <limits.h>
#include <stdint.h> /* intptr_t uint64_t */
#include <math.h> /* NAN, INFINITY, isnan */
#if ! defined(__cplusplus)
# include <stdbool.h>
@ -61,7 +62,7 @@
#include "expat.h"
#include "chardata.h"
#include "structdata.h"
#include "internal.h" /* for UNUSED_P only */
#include "internal.h"
#include "minicheck.h"
#include "memcheck.h"
#include "siphash.h"
@ -11225,6 +11226,379 @@ START_TEST(test_nsalloc_prefixed_element) {
}
END_TEST
#if defined(XML_DTD)
typedef enum XML_Status (*XmlParseFunction)(XML_Parser, const char *, int, int);
struct AccountingTestCase {
const char *primaryText;
const char *firstExternalText; /* often NULL */
const char *secondExternalText; /* often NULL */
const unsigned long long expectedCountBytesIndirectExtra;
XML_Bool singleBytesWanted;
};
static int
accounting_external_entity_ref_handler(XML_Parser parser,
const XML_Char *context,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId) {
UNUSED_P(context);
UNUSED_P(base);
UNUSED_P(publicId);
const struct AccountingTestCase *const testCase
= (const struct AccountingTestCase *)XML_GetUserData(parser);
const char *externalText = NULL;
if (xcstrcmp(systemId, XCS("first.ent")) == 0) {
externalText = testCase->firstExternalText;
} else if (xcstrcmp(systemId, XCS("second.ent")) == 0) {
externalText = testCase->secondExternalText;
} else {
assert(! "systemId is neither \"first.ent\" nor \"second.ent\"");
}
assert(externalText);
XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
assert(entParser);
const XmlParseFunction xmlParseFunction
= testCase->singleBytesWanted ? _XML_Parse_SINGLE_BYTES : XML_Parse;
const enum XML_Status status = xmlParseFunction(
entParser, externalText, (int)strlen(externalText), XML_TRUE);
XML_ParserFree(entParser);
return status;
}
START_TEST(test_accounting_precision) {
const XML_Bool filled_later = XML_TRUE; /* value is arbitrary */
struct AccountingTestCase cases[] = {
{"<e/>", NULL, NULL, 0, 0},
{"<e></e>", NULL, NULL, 0, 0},
/* Attributes */
{"<e k1=\"v2\" k2=\"v2\"/>", NULL, NULL, 0, filled_later},
{"<e k1=\"v2\" k2=\"v2\"></e>", NULL, NULL, 0, 0},
{"<p:e xmlns:p=\"https://domain.invalid/\" />", NULL, NULL, 0,
filled_later},
{"<e k=\"&amp;&apos;&gt;&lt;&quot;\" />", NULL, NULL,
sizeof(XML_Char) * 5 /* number of predefined entites */, filled_later},
{"<e1 xmlns='https://example.org/'>\n"
" <e2 xmlns=''/>\n"
"</e1>",
NULL, NULL, 0, filled_later},
/* Text */
{"<e>text</e>", NULL, NULL, 0, filled_later},
{"<e1><e2>text1<e3/>text2</e2></e1>", NULL, NULL, 0, filled_later},
{"<e>&amp;&apos;&gt;&lt;&quot;</e>", NULL, NULL,
sizeof(XML_Char) * 5 /* number of predefined entites */, filled_later},
{"<e>&#65;&#41;</e>", NULL, NULL, 0, filled_later},
/* Prolog */
{"<?xml version=\"1.0\"?><root/>", NULL, NULL, 0, filled_later},
/* Whitespace */
{" <e1> <e2> </e2> </e1> ", NULL, NULL, 0, filled_later},
{"<e1 ><e2 /></e1 >", NULL, NULL, 0, filled_later},
{"<e1><e2 k = \"v\"/><e3 k = 'v'/></e1>", NULL, NULL, 0, filled_later},
/* Comments */
{"<!-- Comment --><e><!-- Comment --></e>", NULL, NULL, 0, filled_later},
/* Processing instructions */
{"<?xml-stylesheet type=\"text/xsl\" href=\"https://domain.invalid/\" media=\"all\"?><e/>",
NULL, NULL, 0, filled_later},
{"<?pi0?><?pi1 ?><?pi2 ?><!DOCTYPE r SYSTEM 'first.ent'><r/>",
"<?pi3?><!ENTITY % e1 SYSTEM 'second.ent'><?pi4?>%e1;<?pi5?>", "<?pi6?>",
0, filled_later},
/* CDATA */
{"<e><![CDATA[one two three]]></e>", NULL, NULL, 0, filled_later},
/* Conditional sections */
{"<!DOCTYPE r [\n"
"<!ENTITY % draft 'INCLUDE'>\n"
"<!ENTITY % final 'IGNORE'>\n"
"<!ENTITY % import SYSTEM \"first.ent\">\n"
"%import;\n"
"]>\n"
"<r/>\n",
"<![%draft;[<!--1-->]]>\n"
"<![%final;[<!--22-->]]>",
NULL, sizeof(XML_Char) * (strlen("INCLUDE") + strlen("IGNORE")),
filled_later},
/* General entities */
{"<!DOCTYPE root [\n"
"<!ENTITY nine \"123456789\">\n"
"]>\n"
"<root>&nine;</root>",
NULL, NULL, sizeof(XML_Char) * strlen("123456789"), filled_later},
{"<!DOCTYPE root [\n"
"<!ENTITY nine \"123456789\">\n"
"]>\n"
"<root k1=\"&nine;\"/>",
NULL, NULL, sizeof(XML_Char) * strlen("123456789"), filled_later},
{"<!DOCTYPE root [\n"
"<!ENTITY nine \"123456789\">\n"
"<!ENTITY nine2 \"&nine;&nine;\">\n"
"]>\n"
"<root>&nine2;&nine2;&nine2;</root>",
NULL, NULL,
sizeof(XML_Char) * 3 /* calls to &nine2; */ * 2 /* calls to &nine; */
* (strlen("&nine;") + strlen("123456789")),
filled_later},
{"<!DOCTYPE r [\n"
" <!ENTITY five SYSTEM 'first.ent'>\n"
"]>\n"
"<r>&five;</r>",
"12345", NULL, 0, filled_later},
/* Parameter entities */
{"<!DOCTYPE r [\n"
"<!ENTITY % comment \"<!---->\">\n"
"%comment;\n"
"]>\n"
"<r/>",
NULL, NULL, sizeof(XML_Char) * strlen("<!---->"), filled_later},
{"<!DOCTYPE r [\n"
"<!ENTITY % ninedef \"&#60;!ENTITY nine &#34;123456789&#34;&#62;\">\n"
"%ninedef;\n"
"]>\n"
"<r>&nine;</r>",
NULL, NULL,
sizeof(XML_Char)
* (strlen("<!ENTITY nine \"123456789\">") + strlen("123456789")),
filled_later},
{"<!DOCTYPE r [\n"
"<!ENTITY % comment \"<!--1-->\">\n"
"<!ENTITY % comment2 \"&#37;comment;<!--22-->&#37;comment;\">\n"
"%comment2;\n"
"]>\n"
"<r/>\n",
NULL, NULL,
sizeof(XML_Char)
* (strlen("%comment;<!--22-->%comment;") + 2 * strlen("<!--1-->")),
filled_later},
{"<!DOCTYPE r [\n"
" <!ENTITY % five \"12345\">\n"
" <!ENTITY % five2def \"&#60;!ENTITY five2 &#34;[&#37;five;][&#37;five;]]]]&#34;&#62;\">\n"
" %five2def;\n"
"]>\n"
"<r>&five2;</r>",
NULL, NULL, /* from "%five2def;": */
sizeof(XML_Char)
* (strlen("<!ENTITY five2 \"[%five;][%five;]]]]\">")
+ 2 /* calls to "%five;" */ * strlen("12345")
+ /* from "&five2;": */ strlen("[12345][12345]]]]")),
filled_later},
{"<!DOCTYPE r SYSTEM \"first.ent\">\n"
"<r/>",
"<!ENTITY % comment '<!--1-->'>\n"
"<!ENTITY % comment2 '<!--22-->%comment;<!--22-->%comment;<!--22-->'>\n"
"%comment2;",
NULL,
sizeof(XML_Char)
* (strlen("<!--22-->%comment;<!--22-->%comment;<!--22-->")
+ 2 /* calls to "%comment;" */ * strlen("<!---->")),
filled_later},
{"<!DOCTYPE r SYSTEM 'first.ent'>\n"
"<r/>",
"<!ENTITY % e1 PUBLIC 'foo' 'second.ent'>\n"
"<!ENTITY % e2 '<!--22-->%e1;<!--22-->'>\n"
"%e2;\n",
"<!--1-->", sizeof(XML_Char) * strlen("<!--22--><!--1--><!--22-->"),
filled_later},
{
"<!DOCTYPE r SYSTEM 'first.ent'>\n"
"<r/>",
"<!ENTITY % e1 SYSTEM 'second.ent'>\n"
"<!ENTITY % e2 '%e1;'>",
"<?xml version='1.0' encoding='utf-8'?>\n"
"hello\n"
"xml" /* without trailing newline! */,
0,
filled_later,
},
{
"<!DOCTYPE r SYSTEM 'first.ent'>\n"
"<r/>",
"<!ENTITY % e1 SYSTEM 'second.ent'>\n"
"<!ENTITY % e2 '%e1;'>",
"<?xml version='1.0' encoding='utf-8'?>\n"
"hello\n"
"xml\n" /* with trailing newline! */,
0,
filled_later,
},
{"<!DOCTYPE doc SYSTEM 'first.ent'>\n"
"<doc></doc>\n",
"<!ELEMENT doc EMPTY>\n"
"<!ENTITY % e1 SYSTEM 'second.ent'>\n"
"<!ENTITY % e2 '%e1;'>\n"
"%e1;\n",
"\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>" /* UTF-8 BOM */,
strlen("\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>"), filled_later},
{"<!DOCTYPE r [\n"
" <!ENTITY five SYSTEM 'first.ent'>\n"
"]>\n"
"<r>&five;</r>",
"\xEF\xBB\xBF" /* UTF-8 BOM */, NULL, 0, filled_later},
};
const size_t countCases = sizeof(cases) / sizeof(cases[0]);
size_t u = 0;
for (; u < countCases; u++) {
size_t v = 0;
for (; v < 2; v++) {
const XML_Bool singleBytesWanted = (v == 0) ? XML_FALSE : XML_TRUE;
const unsigned long long expectedCountBytesDirect
= strlen(cases[u].primaryText);
const unsigned long long expectedCountBytesIndirect
= (cases[u].firstExternalText ? strlen(cases[u].firstExternalText)
: 0)
+ (cases[u].secondExternalText ? strlen(cases[u].secondExternalText)
: 0)
+ cases[u].expectedCountBytesIndirectExtra;
XML_Parser parser = XML_ParserCreate(NULL);
XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
if (cases[u].firstExternalText) {
XML_SetExternalEntityRefHandler(parser,
accounting_external_entity_ref_handler);
XML_SetUserData(parser, (void *)&cases[u]);
cases[u].singleBytesWanted = singleBytesWanted;
}
const XmlParseFunction xmlParseFunction
= singleBytesWanted ? _XML_Parse_SINGLE_BYTES : XML_Parse;
enum XML_Status status
= xmlParseFunction(parser, cases[u].primaryText,
(int)strlen(cases[u].primaryText), XML_TRUE);
if (status != XML_STATUS_OK) {
_xml_failure(parser, __FILE__, __LINE__);
}
const unsigned long long actualCountBytesDirect
= testingAccountingGetCountBytesDirect(parser);
const unsigned long long actualCountBytesIndirect
= testingAccountingGetCountBytesIndirect(parser);
XML_ParserFree(parser);
if (actualCountBytesDirect != expectedCountBytesDirect) {
fprintf(
stderr,
"Document " EXPAT_FMT_SIZE_T("") " of " EXPAT_FMT_SIZE_T("") ", %s: Expected " EXPAT_FMT_ULL(
"") " count direct bytes, got " EXPAT_FMT_ULL("") " instead.\n",
u + 1, countCases, singleBytesWanted ? "single bytes" : "chunks",
expectedCountBytesDirect, actualCountBytesDirect);
fail("Count of direct bytes is off");
}
if (actualCountBytesIndirect != expectedCountBytesIndirect) {
fprintf(
stderr,
"Document " EXPAT_FMT_SIZE_T("") " of " EXPAT_FMT_SIZE_T("") ", %s: Expected " EXPAT_FMT_ULL(
"") " count indirect bytes, got " EXPAT_FMT_ULL("") " instead.\n",
u + 1, countCases, singleBytesWanted ? "single bytes" : "chunks",
expectedCountBytesIndirect, actualCountBytesIndirect);
fail("Count of indirect bytes is off");
}
}
}
}
END_TEST
START_TEST(test_billion_laughs_attack_protection_api) {
XML_Parser parserWithoutParent = XML_ParserCreate(NULL);
XML_Parser parserWithParent
= XML_ExternalEntityParserCreate(parserWithoutParent, NULL, NULL);
if (parserWithoutParent == NULL)
fail("parserWithoutParent is NULL");
if (parserWithParent == NULL)
fail("parserWithParent is NULL");
// XML_SetBillionLaughsAttackProtectionMaximumAmplification, error cases
if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(NULL, 123.0f)
== XML_TRUE)
fail("Call with NULL parser is NOT supposed to succeed");
if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(parserWithParent,
123.0f)
== XML_TRUE)
fail("Call with non-root parser is NOT supposed to succeed");
if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
parserWithoutParent, NAN)
== XML_TRUE)
fail("Call with NaN limit is NOT supposed to succeed");
if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
parserWithoutParent, -1.0f)
== XML_TRUE)
fail("Call with negative limit is NOT supposed to succeed");
if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
parserWithoutParent, 0.9f)
== XML_TRUE)
fail("Call with positive limit <1.0 is NOT supposed to succeed");
// XML_SetBillionLaughsAttackProtectionMaximumAmplification, success cases
if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
parserWithoutParent, 1.0f)
== XML_FALSE)
fail("Call with positive limit >=1.0 is supposed to succeed");
if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
parserWithoutParent, 123456.789f)
== XML_FALSE)
fail("Call with positive limit >=1.0 is supposed to succeed");
if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
parserWithoutParent, INFINITY)
== XML_FALSE)
fail("Call with positive limit >=1.0 is supposed to succeed");
// XML_SetBillionLaughsAttackProtectionActivationThreshold, error cases
if (XML_SetBillionLaughsAttackProtectionActivationThreshold(NULL, 123)
== XML_TRUE)
fail("Call with NULL parser is NOT supposed to succeed");
if (XML_SetBillionLaughsAttackProtectionActivationThreshold(parserWithParent,
123)
== XML_TRUE)
fail("Call with non-root parser is NOT supposed to succeed");
// XML_SetBillionLaughsAttackProtectionActivationThreshold, success cases
if (XML_SetBillionLaughsAttackProtectionActivationThreshold(
parserWithoutParent, 123)
== XML_FALSE)
fail("Call with non-NULL parentless parser is supposed to succeed");
XML_ParserFree(parserWithParent);
XML_ParserFree(parserWithoutParent);
}
END_TEST
START_TEST(test_helper_unsigned_char_to_printable) {
// Smoke test
unsigned char uc = 0;
for (; uc < (unsigned char)-1; uc++) {
const char *const printable = unsignedCharToPrintable(uc);
if (printable == NULL)
fail("unsignedCharToPrintable returned NULL");
if (strlen(printable) < (size_t)1)
fail("unsignedCharToPrintable returned empty string");
}
// Two concrete samples
if (strcmp(unsignedCharToPrintable('A'), "A") != 0)
fail("unsignedCharToPrintable result mistaken");
if (strcmp(unsignedCharToPrintable('\\'), "\\\\") != 0)
fail("unsignedCharToPrintable result mistaken");
}
END_TEST
#endif // defined(XML_DTD)
static Suite *
make_suite(void) {
Suite *s = suite_create("basic");
@ -11233,6 +11607,9 @@ make_suite(void) {
TCase *tc_misc = tcase_create("miscellaneous tests");
TCase *tc_alloc = tcase_create("allocation tests");
TCase *tc_nsalloc = tcase_create("namespace allocation tests");
#if defined(XML_DTD)
TCase *tc_accounting = tcase_create("accounting tests");
#endif
suite_add_tcase(s, tc_basic);
tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
@ -11593,6 +11970,13 @@ make_suite(void) {
tcase_add_test(tc_nsalloc, test_nsalloc_long_systemid_in_ext);
tcase_add_test(tc_nsalloc, test_nsalloc_prefixed_element);
#if defined(XML_DTD)
suite_add_tcase(s, tc_accounting);
tcase_add_test(tc_accounting, test_accounting_precision);
tcase_add_test(tc_accounting, test_billion_laughs_attack_protection_api);
tcase_add_test(tc_accounting, test_helper_unsigned_char_to_printable);
#endif
return s;
}

View file

@ -7,7 +7,7 @@
|_| XML parser
Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
Copyright (c) 2016-2017 Sebastian Pipping <sebastian@pipping.org>
Copyright (c) 2016-2021 Sebastian Pipping <sebastian@pipping.org>
Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
Licensed under the MIT license:
@ -55,6 +55,8 @@
# define tmain wmain
# define tremove _wremove
# define tchar wchar_t
# define tcstof wcstof
# define tcstoull wcstoull
#else /* not XML_UNICODE */
# define T(x) x
# define ftprintf fprintf
@ -72,4 +74,6 @@
# define tmain main
# define tremove remove
# define tchar char
# define tcstof strtof
# define tcstoull strtoull
#endif /* not XML_UNICODE */

View file

@ -39,11 +39,15 @@
USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <expat_config.h>
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <math.h> /* for isnan */
#include <errno.h>
#include "expat.h"
#include "codepage.h"
@ -903,6 +907,12 @@ usage(const XML_Char *prog, int rc) {
T(" -t write no XML output for [t]iming of plain parsing\n")
T(" -N enable adding doctype and [n]otation declarations\n")
T("\n")
T("billion laughs attack protection:\n")
T(" NOTE: If you ever need to increase these values for non-attack payload, please file a bug report.\n")
T("\n")
T(" -a FACTOR set maximum tolerated [a]mplification factor (default: 100.0)\n")
T(" -b BYTES set number of output [b]ytes needed to activate (default: 8 MiB)\n")
T("\n")
T("info arguments:\n")
T(" -h show this [h]elp message and exit\n")
T(" -v show program's [v]ersion number and exit\n")
@ -951,6 +961,11 @@ tmain(int argc, XML_Char **argv) {
int requireStandalone = 0;
int requiresNotations = 0;
int continueOnError = 0;
float attackMaximumAmplification = -1.0f; /* signaling "not set" */
unsigned long long attackThresholdBytes;
XML_Bool attackThresholdGiven = XML_FALSE;
int exitCode = XMLWF_EXIT_SUCCESS;
enum XML_ParamEntityParsing paramEntityParsing
= XML_PARAM_ENTITY_PARSING_NEVER;
@ -1030,6 +1045,49 @@ tmain(int argc, XML_Char **argv) {
continueOnError = 1;
j++;
break;
case T('a'): {
const XML_Char *valueText = NULL;
XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
errno = 0;
XML_Char *afterValueText = (XML_Char *)valueText;
attackMaximumAmplification = tcstof(valueText, &afterValueText);
if ((errno != 0) || (afterValueText[0] != T('\0'))
|| isnan(attackMaximumAmplification)
|| (attackMaximumAmplification < 1.0f)) {
// This prevents tperror(..) from reporting misleading "[..]: Success"
errno = ERANGE;
tperror(T("invalid amplification limit") T(
" (needs a floating point number greater or equal than 1.0)"));
exit(XMLWF_EXIT_USAGE_ERROR);
}
#ifndef XML_DTD
ftprintf(stderr, T("Warning: Given amplification limit ignored") T(
", xmlwf has been compiled without DTD support.\n"));
#endif
break;
}
case T('b'): {
const XML_Char *valueText = NULL;
XMLWF_SHIFT_ARG_INTO(valueText, argc, argv, i, j);
errno = 0;
XML_Char *afterValueText = (XML_Char *)valueText;
attackThresholdBytes = tcstoull(valueText, &afterValueText, 10);
if ((errno != 0) || (afterValueText[0] != T('\0'))) {
// This prevents tperror(..) from reporting misleading "[..]: Success"
errno = ERANGE;
tperror(T("invalid ignore threshold")
T(" (needs an integer from 0 to 2^64-1)"));
exit(XMLWF_EXIT_USAGE_ERROR);
}
attackThresholdGiven = XML_TRUE;
#ifndef XML_DTD
ftprintf(stderr, T("Warning: Given attack threshold ignored") T(
", xmlwf has been compiled without DTD support.\n"));
#endif
break;
}
case T('\0'):
if (j > 1) {
i++;
@ -1060,6 +1118,19 @@ tmain(int argc, XML_Char **argv) {
exit(XMLWF_EXIT_INTERNAL_ERROR);
}
if (attackMaximumAmplification != -1.0f) {
#ifdef XML_DTD
XML_SetBillionLaughsAttackProtectionMaximumAmplification(
parser, attackMaximumAmplification);
#endif
}
if (attackThresholdGiven) {
#ifdef XML_DTD
XML_SetBillionLaughsAttackProtectionActivationThreshold(
parser, attackThresholdBytes);
#endif
}
if (requireStandalone)
XML_SetNotStandaloneHandler(parser, notStandalone);
XML_SetParamEntityParsing(parser, paramEntityParsing);

View file

@ -73,6 +73,14 @@ output_mode.add_argument('-m', action='store_true', help='write [m]eta XML, not
output_mode.add_argument('-t', action='store_true', help='write no XML output for [t]iming of plain parsing')
output_related.add_argument('-N', action='store_true', help='enable adding doctype and [n]otation declarations')
billion_laughs = parser.add_argument_group('billion laughs attack protection',
description='NOTE: '
'If you ever need to increase these values '
'for non-attack payload, please file a bug report.')
billion_laughs.add_argument('-a', metavar='FACTOR',
help='set maximum tolerated [a]mplification factor (default: 100.0)')
billion_laughs.add_argument('-b', metavar='BYTES', help='set number of output [b]ytes needed to activate (default: 8 MiB)')
parser.add_argument('files', metavar='FILE', nargs='*', help='file to process (default: STDIN)')
info = parser.add_argument_group('info arguments')