mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 14:05:32 +00:00
ICU-13547 limit nesting depth of UnicodeSet pattern
X-SVN-Rev: 40979
This commit is contained in:
parent
17e51eed71
commit
fdbe2f371b
7 changed files with 71 additions and 15 deletions
|
@ -1521,6 +1521,7 @@ private:
|
|||
UnicodeString& rebuiltPat,
|
||||
uint32_t options,
|
||||
UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
|
||||
int32_t depth,
|
||||
UErrorCode& ec);
|
||||
|
||||
//----------------------------------------------------------------
|
||||
|
|
|
@ -129,7 +129,7 @@ UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
|
|||
// _applyPattern calls add() etc., which set pat to empty.
|
||||
UnicodeString rebuiltPat;
|
||||
RuleCharacterIterator chars(pattern, symbols, pos);
|
||||
applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, status);
|
||||
applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, 0, status);
|
||||
if (U_FAILURE(status)) return *this;
|
||||
if (chars.inVariable()) {
|
||||
// syntaxError(chars, "Extra chars in variable value");
|
||||
|
|
|
@ -257,6 +257,7 @@ const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) {
|
|||
return i.fSet;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
// Cache some sets for other services -------------------------------------- ***
|
||||
void U_CALLCONV createUni32Set(UErrorCode &errorCode) {
|
||||
|
@ -315,6 +316,8 @@ isPOSIXClose(const UnicodeString &pattern, int32_t pos) {
|
|||
// memory leak checker tools
|
||||
#define _dbgct(me)
|
||||
|
||||
} // namespace
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// Constructors &c
|
||||
//----------------------------------------------------------------
|
||||
|
@ -382,7 +385,7 @@ UnicodeSet::applyPatternIgnoreSpace(const UnicodeString& pattern,
|
|||
// _applyPattern calls add() etc., which set pat to empty.
|
||||
UnicodeString rebuiltPat;
|
||||
RuleCharacterIterator chars(pattern, symbols, pos);
|
||||
applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, status);
|
||||
applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, 0, status);
|
||||
if (U_FAILURE(status)) return;
|
||||
if (chars.inVariable()) {
|
||||
// syntaxError(chars, "Extra chars in variable value");
|
||||
|
@ -406,6 +409,8 @@ UBool UnicodeSet::resemblesPattern(const UnicodeString& pattern, int32_t pos) {
|
|||
// Implementation: Pattern parsing
|
||||
//----------------------------------------------------------------
|
||||
|
||||
namespace {
|
||||
|
||||
/**
|
||||
* A small all-inline class to manage a UnicodeSet pointer. Add
|
||||
* operator->() etc. as needed.
|
||||
|
@ -424,6 +429,10 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
constexpr int32_t MAX_DEPTH = 100;
|
||||
|
||||
} // namespace
|
||||
|
||||
/**
|
||||
* Parse the pattern from the given RuleCharacterIterator. The
|
||||
* iterator is advanced over the parsed pattern.
|
||||
|
@ -443,8 +452,13 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
UnicodeString& rebuiltPat,
|
||||
uint32_t options,
|
||||
UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
|
||||
int32_t depth,
|
||||
UErrorCode& ec) {
|
||||
if (U_FAILURE(ec)) return;
|
||||
if (depth > MAX_DEPTH) {
|
||||
ec = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
// Syntax characters: [ ] ^ - & { }
|
||||
|
||||
|
@ -579,7 +593,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
}
|
||||
switch (setMode) {
|
||||
case 1:
|
||||
nested->applyPattern(chars, symbols, patLocal, options, caseClosure, ec);
|
||||
nested->applyPattern(chars, symbols, patLocal, options, caseClosure, depth + 1, ec);
|
||||
break;
|
||||
case 2:
|
||||
chars.skipIgnored(opts);
|
||||
|
@ -837,6 +851,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
|||
// Property set implementation
|
||||
//----------------------------------------------------------------
|
||||
|
||||
namespace {
|
||||
|
||||
static UBool numericValueFilter(UChar32 ch, void* context) {
|
||||
return u_getNumericValue(ch) == *(double*)context;
|
||||
}
|
||||
|
@ -868,6 +884,8 @@ static UBool scriptExtensionsFilter(UChar32 ch, void* context) {
|
|||
return uscript_hasScript(ch, *(UScriptCode*)context);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
/**
|
||||
* Generic filter-based scanning code for UCD property UnicodeSets.
|
||||
*/
|
||||
|
@ -924,6 +942,8 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
|
|||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
|
||||
/* Note: we use ' ' in compiler code page */
|
||||
int32_t j = 0;
|
||||
|
@ -941,6 +961,8 @@ static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
//----------------------------------------------------------------
|
||||
// Property set API
|
||||
//----------------------------------------------------------------
|
||||
|
|
|
@ -42,15 +42,6 @@ UnicodeString operator+(const UnicodeString& left, const UnicodeSet& set) {
|
|||
return left + UnicodeSetTest::escape(pat);
|
||||
}
|
||||
|
||||
#define CASE(id,test) case id: \
|
||||
name = #test; \
|
||||
if (exec) { \
|
||||
logln(#test "---"); \
|
||||
logln(); \
|
||||
test(); \
|
||||
} \
|
||||
break
|
||||
|
||||
UnicodeSetTest::UnicodeSetTest() : utf8Cnv(NULL) {
|
||||
}
|
||||
|
||||
|
@ -100,6 +91,7 @@ UnicodeSetTest::runIndexedTest(int32_t index, UBool exec,
|
|||
TESTCASE_AUTO(TestUCAUnsafeBackwards);
|
||||
TESTCASE_AUTO(TestIntOverflow);
|
||||
TESTCASE_AUTO(TestUnusedCcc);
|
||||
TESTCASE_AUTO(TestDeepPattern);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
|
@ -3970,3 +3962,19 @@ void UnicodeSetTest::TestUnusedCcc() {
|
|||
assertTrue("[:ccc=1.1:] -> empty set", ccc1_1.isEmpty());
|
||||
#endif
|
||||
}
|
||||
|
||||
void UnicodeSetTest::TestDeepPattern() {
|
||||
IcuTestErrorCode errorCode(*this, "TestDeepPattern");
|
||||
// Nested ranges are parsed via recursion which can use a lot of stack space.
|
||||
// After a reasonable limit, we should get an error.
|
||||
constexpr int32_t DEPTH = 20000;
|
||||
UnicodeString pattern, suffix;
|
||||
for (int32_t i = 0; i < DEPTH; ++i) {
|
||||
pattern.append(u"[a", 2);
|
||||
suffix.append(']');
|
||||
}
|
||||
pattern.append(suffix);
|
||||
UnicodeSet set(pattern, errorCode);
|
||||
assertTrue("[a[a[a...1000s...]]] -> error", errorCode.isFailure());
|
||||
errorCode.reset();
|
||||
}
|
||||
|
|
|
@ -93,6 +93,7 @@ private:
|
|||
void TestUCAUnsafeBackwards();
|
||||
void TestIntOverflow();
|
||||
void TestUnusedCcc();
|
||||
void TestDeepPattern();
|
||||
|
||||
private:
|
||||
|
||||
|
|
|
@ -2422,7 +2422,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
StringBuilder rebuiltPat = new StringBuilder();
|
||||
RuleCharacterIterator chars =
|
||||
new RuleCharacterIterator(pattern, symbols, pos);
|
||||
applyPattern(chars, symbols, rebuiltPat, options);
|
||||
applyPattern(chars, symbols, rebuiltPat, options, 0);
|
||||
if (chars.inVariable()) {
|
||||
syntaxError(chars, "Extra chars in variable value");
|
||||
}
|
||||
|
@ -2458,6 +2458,8 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
SETMODE2_PROPERTYPAT = 2,
|
||||
SETMODE3_PREPARSED = 3;
|
||||
|
||||
private static final int MAX_DEPTH = 100;
|
||||
|
||||
/**
|
||||
* Parse the pattern from the given RuleCharacterIterator. The
|
||||
* iterator is advanced over the parsed pattern.
|
||||
|
@ -2473,7 +2475,10 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
* IGNORE_SPACE, CASE.
|
||||
*/
|
||||
private void applyPattern(RuleCharacterIterator chars, SymbolTable symbols,
|
||||
Appendable rebuiltPat, int options) {
|
||||
Appendable rebuiltPat, int options, int depth) {
|
||||
if (depth > MAX_DEPTH) {
|
||||
syntaxError(chars, "Pattern nested too deeply");
|
||||
}
|
||||
|
||||
// Syntax characters: [ ] ^ - & { }
|
||||
|
||||
|
@ -2605,7 +2610,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
|
|||
}
|
||||
switch (setMode) {
|
||||
case SETMODE1_UNICODESET:
|
||||
nested.applyPattern(chars, symbols, patBuf, options);
|
||||
nested.applyPattern(chars, symbols, patBuf, options, depth + 1);
|
||||
break;
|
||||
case SETMODE2_PROPERTYPAT:
|
||||
chars.skipIgnored(opts);
|
||||
|
|
|
@ -2773,4 +2773,23 @@ public class UnicodeSetTest extends TestFmwk {
|
|||
} catch (IllegalArgumentException expected) {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestDeepPattern() {
|
||||
// Nested ranges are parsed via recursion which can use a lot of stack space.
|
||||
// After a reasonable limit, we should get an error.
|
||||
final int DEPTH = 20000;
|
||||
StringBuilder pattern = new StringBuilder();
|
||||
StringBuilder suffix = new StringBuilder();
|
||||
for (int i = 0; i < DEPTH; ++i) {
|
||||
pattern.append("[a");
|
||||
suffix.append(']');
|
||||
}
|
||||
pattern.append(suffix);
|
||||
try {
|
||||
new UnicodeSet(pattern.toString());
|
||||
fail("[a[a[a...1000s...]]] did not throw an exception");
|
||||
} catch(RuntimeException expected) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue