ICU-13547 limit nesting depth of UnicodeSet pattern

X-SVN-Rev: 40979
This commit is contained in:
Markus Scherer 2018-02-23 21:39:23 +00:00
parent 17e51eed71
commit fdbe2f371b
7 changed files with 71 additions and 15 deletions

View file

@ -1521,6 +1521,7 @@ private:
UnicodeString& rebuiltPat,
uint32_t options,
UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
int32_t depth,
UErrorCode& ec);
//----------------------------------------------------------------

View file

@ -129,7 +129,7 @@ UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
// _applyPattern calls add() etc., which set pat to empty.
UnicodeString rebuiltPat;
RuleCharacterIterator chars(pattern, symbols, pos);
applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, status);
applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, 0, status);
if (U_FAILURE(status)) return *this;
if (chars.inVariable()) {
// syntaxError(chars, "Extra chars in variable value");

View file

@ -257,6 +257,7 @@ const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) {
return i.fSet;
}
namespace {
// Cache some sets for other services -------------------------------------- ***
void U_CALLCONV createUni32Set(UErrorCode &errorCode) {
@ -315,6 +316,8 @@ isPOSIXClose(const UnicodeString &pattern, int32_t pos) {
// memory leak checker tools
#define _dbgct(me)
} // namespace
//----------------------------------------------------------------
// Constructors &c
//----------------------------------------------------------------
@ -382,7 +385,7 @@ UnicodeSet::applyPatternIgnoreSpace(const UnicodeString& pattern,
// _applyPattern calls add() etc., which set pat to empty.
UnicodeString rebuiltPat;
RuleCharacterIterator chars(pattern, symbols, pos);
applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, status);
applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, 0, status);
if (U_FAILURE(status)) return;
if (chars.inVariable()) {
// syntaxError(chars, "Extra chars in variable value");
@ -406,6 +409,8 @@ UBool UnicodeSet::resemblesPattern(const UnicodeString& pattern, int32_t pos) {
// Implementation: Pattern parsing
//----------------------------------------------------------------
namespace {
/**
* A small all-inline class to manage a UnicodeSet pointer. Add
* operator->() etc. as needed.
@ -424,6 +429,10 @@ public:
}
};
constexpr int32_t MAX_DEPTH = 100;
} // namespace
/**
* Parse the pattern from the given RuleCharacterIterator. The
* iterator is advanced over the parsed pattern.
@ -443,8 +452,13 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
UnicodeString& rebuiltPat,
uint32_t options,
UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
int32_t depth,
UErrorCode& ec) {
if (U_FAILURE(ec)) return;
if (depth > MAX_DEPTH) {
ec = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
// Syntax characters: [ ] ^ - & { }
@ -579,7 +593,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
}
switch (setMode) {
case 1:
nested->applyPattern(chars, symbols, patLocal, options, caseClosure, ec);
nested->applyPattern(chars, symbols, patLocal, options, caseClosure, depth + 1, ec);
break;
case 2:
chars.skipIgnored(opts);
@ -837,6 +851,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
// Property set implementation
//----------------------------------------------------------------
namespace {
static UBool numericValueFilter(UChar32 ch, void* context) {
return u_getNumericValue(ch) == *(double*)context;
}
@ -868,6 +884,8 @@ static UBool scriptExtensionsFilter(UChar32 ch, void* context) {
return uscript_hasScript(ch, *(UScriptCode*)context);
}
} // namespace
/**
* Generic filter-based scanning code for UCD property UnicodeSets.
*/
@ -924,6 +942,8 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
}
}
namespace {
static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
/* Note: we use ' ' in compiler code page */
int32_t j = 0;
@ -941,6 +961,8 @@ static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
return TRUE;
}
} // namespace
//----------------------------------------------------------------
// Property set API
//----------------------------------------------------------------

View file

@ -42,15 +42,6 @@ UnicodeString operator+(const UnicodeString& left, const UnicodeSet& set) {
return left + UnicodeSetTest::escape(pat);
}
#define CASE(id,test) case id: \
name = #test; \
if (exec) { \
logln(#test "---"); \
logln(); \
test(); \
} \
break
UnicodeSetTest::UnicodeSetTest() : utf8Cnv(NULL) {
}
@ -100,6 +91,7 @@ UnicodeSetTest::runIndexedTest(int32_t index, UBool exec,
TESTCASE_AUTO(TestUCAUnsafeBackwards);
TESTCASE_AUTO(TestIntOverflow);
TESTCASE_AUTO(TestUnusedCcc);
TESTCASE_AUTO(TestDeepPattern);
TESTCASE_AUTO_END;
}
@ -3970,3 +3962,19 @@ void UnicodeSetTest::TestUnusedCcc() {
assertTrue("[:ccc=1.1:] -> empty set", ccc1_1.isEmpty());
#endif
}
void UnicodeSetTest::TestDeepPattern() {
IcuTestErrorCode errorCode(*this, "TestDeepPattern");
// Nested ranges are parsed via recursion which can use a lot of stack space.
// After a reasonable limit, we should get an error.
constexpr int32_t DEPTH = 20000;
UnicodeString pattern, suffix;
for (int32_t i = 0; i < DEPTH; ++i) {
pattern.append(u"[a", 2);
suffix.append(']');
}
pattern.append(suffix);
UnicodeSet set(pattern, errorCode);
assertTrue("[a[a[a...1000s...]]] -> error", errorCode.isFailure());
errorCode.reset();
}

View file

@ -93,6 +93,7 @@ private:
void TestUCAUnsafeBackwards();
void TestIntOverflow();
void TestUnusedCcc();
void TestDeepPattern();
private:

View file

@ -2422,7 +2422,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
StringBuilder rebuiltPat = new StringBuilder();
RuleCharacterIterator chars =
new RuleCharacterIterator(pattern, symbols, pos);
applyPattern(chars, symbols, rebuiltPat, options);
applyPattern(chars, symbols, rebuiltPat, options, 0);
if (chars.inVariable()) {
syntaxError(chars, "Extra chars in variable value");
}
@ -2458,6 +2458,8 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
SETMODE2_PROPERTYPAT = 2,
SETMODE3_PREPARSED = 3;
private static final int MAX_DEPTH = 100;
/**
* Parse the pattern from the given RuleCharacterIterator. The
* iterator is advanced over the parsed pattern.
@ -2473,7 +2475,10 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
* IGNORE_SPACE, CASE.
*/
private void applyPattern(RuleCharacterIterator chars, SymbolTable symbols,
Appendable rebuiltPat, int options) {
Appendable rebuiltPat, int options, int depth) {
if (depth > MAX_DEPTH) {
syntaxError(chars, "Pattern nested too deeply");
}
// Syntax characters: [ ] ^ - & { }
@ -2605,7 +2610,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
}
switch (setMode) {
case SETMODE1_UNICODESET:
nested.applyPattern(chars, symbols, patBuf, options);
nested.applyPattern(chars, symbols, patBuf, options, depth + 1);
break;
case SETMODE2_PROPERTYPAT:
chars.skipIgnored(opts);

View file

@ -2773,4 +2773,23 @@ public class UnicodeSetTest extends TestFmwk {
} catch (IllegalArgumentException expected) {
}
}
@Test
public void TestDeepPattern() {
// Nested ranges are parsed via recursion which can use a lot of stack space.
// After a reasonable limit, we should get an error.
final int DEPTH = 20000;
StringBuilder pattern = new StringBuilder();
StringBuilder suffix = new StringBuilder();
for (int i = 0; i < DEPTH; ++i) {
pattern.append("[a");
suffix.append(']');
}
pattern.append(suffix);
try {
new UnicodeSet(pattern.toString());
fail("[a[a[a...1000s...]]] did not throw an exception");
} catch(RuntimeException expected) {
}
}
}