mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-20360 Testing and fixing stack overflow in numparse.
This commit is contained in:
parent
78ac6ca3dc
commit
8a56b89b03
9 changed files with 188 additions and 52 deletions
|
@ -72,7 +72,7 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString&
|
|||
parser->addMatcher(parser->fLocalMatchers.padding = {u"@"});
|
||||
parser->addMatcher(parser->fLocalMatchers.scientific = {symbols, grouper});
|
||||
parser->addMatcher(parser->fLocalMatchers.currency = {currencySymbols, symbols, parseFlags, status});
|
||||
// parser.addMatcher(new RequireNumberMatcher());
|
||||
parser->addMatcher(parser->fLocalValidators.number = {});
|
||||
|
||||
parser->freeze();
|
||||
return parser.orphan();
|
||||
|
@ -252,9 +252,13 @@ void NumberParserImpl::parse(const UnicodeString& input, int32_t start, bool gre
|
|||
StringSegment segment(input, 0 != (fParseFlags & PARSE_FLAG_IGNORE_CASE));
|
||||
segment.adjustOffset(start);
|
||||
if (greedy) {
|
||||
parseGreedyRecursive(segment, result, status);
|
||||
parseGreedy(segment, result, status);
|
||||
} else if (0 != (fParseFlags & PARSE_FLAG_ALLOW_INFINITE_RECURSION)) {
|
||||
// Start at 1 so that recursionLevels never gets to 0
|
||||
parseLongestRecursive(segment, result, 1, status);
|
||||
} else {
|
||||
parseLongestRecursive(segment, result, status);
|
||||
// Arbitrary recursion safety limit: 100 levels.
|
||||
parseLongestRecursive(segment, result, -100, status);
|
||||
}
|
||||
for (int32_t i = 0; i < fNumMatchers; i++) {
|
||||
fMatchers[i]->postProcess(result);
|
||||
|
@ -262,44 +266,53 @@ void NumberParserImpl::parse(const UnicodeString& input, int32_t start, bool gre
|
|||
result.postProcess();
|
||||
}
|
||||
|
||||
void NumberParserImpl::parseGreedyRecursive(StringSegment& segment, ParsedNumber& result,
|
||||
void NumberParserImpl::parseGreedy(StringSegment& segment, ParsedNumber& result,
|
||||
UErrorCode& status) const {
|
||||
// Base Case
|
||||
if (segment.length() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
int initialOffset = segment.getOffset();
|
||||
for (int32_t i = 0; i < fNumMatchers; i++) {
|
||||
// Note: this method is not recursive in order to avoid stack overflow.
|
||||
for (int i = 0; i <fNumMatchers;) {
|
||||
// Base Case
|
||||
if (segment.length() == 0) {
|
||||
return;
|
||||
}
|
||||
const NumberParseMatcher* matcher = fMatchers[i];
|
||||
if (!matcher->smokeTest(segment)) {
|
||||
// Matcher failed smoke test: try the next one
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
int32_t initialOffset = segment.getOffset();
|
||||
matcher->match(segment, result, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
if (segment.getOffset() != initialOffset) {
|
||||
// In a greedy parse, recurse on only the first match.
|
||||
parseGreedyRecursive(segment, result, status);
|
||||
// The following line resets the offset so that the StringSegment says the same across
|
||||
// the function
|
||||
// call boundary. Since we recurse only once, this line is not strictly necessary.
|
||||
segment.setOffset(initialOffset);
|
||||
return;
|
||||
// Greedy heuristic: accept the match and loop back
|
||||
i = 0;
|
||||
continue;
|
||||
} else {
|
||||
// Matcher did not match: try the next one
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
UPRV_UNREACHABLE;
|
||||
}
|
||||
|
||||
// NOTE: If we get here, the greedy parse completed without consuming the entire string.
|
||||
}
|
||||
|
||||
void NumberParserImpl::parseLongestRecursive(StringSegment& segment, ParsedNumber& result,
|
||||
int32_t recursionLevels,
|
||||
UErrorCode& status) const {
|
||||
// Base Case
|
||||
if (segment.length() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Safety against stack overflow
|
||||
if (recursionLevels == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: Give a nice way for the matcher to reset the ParsedNumber?
|
||||
ParsedNumber initial(result);
|
||||
ParsedNumber candidate;
|
||||
|
@ -326,7 +339,7 @@ void NumberParserImpl::parseLongestRecursive(StringSegment& segment, ParsedNumbe
|
|||
|
||||
// If the entire segment was consumed, recurse.
|
||||
if (segment.getOffset() - initialOffset == charsToConsume) {
|
||||
parseLongestRecursive(segment, candidate, status);
|
||||
parseLongestRecursive(segment, candidate, recursionLevels + 1, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -95,9 +95,10 @@ class U_I18N_API NumberParserImpl : public MutableMatcherCollection, public UMem
|
|||
|
||||
explicit NumberParserImpl(parse_flags_t parseFlags);
|
||||
|
||||
void parseGreedyRecursive(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const;
|
||||
void parseGreedy(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const;
|
||||
|
||||
void parseLongestRecursive(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const;
|
||||
void parseLongestRecursive(
|
||||
StringSegment& segment, ParsedNumber& result, int32_t recursionLevels, UErrorCode& status) const;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -52,7 +52,7 @@ bool ParsedNumber::seenNumber() const {
|
|||
return !quantity.bogus || 0 != (flags & FLAG_NAN) || 0 != (flags & FLAG_INFINITY);
|
||||
}
|
||||
|
||||
double ParsedNumber::getDouble() const {
|
||||
double ParsedNumber::getDouble(UErrorCode& status) const {
|
||||
bool sawNaN = 0 != (flags & FLAG_NAN);
|
||||
bool sawInfinity = 0 != (flags & FLAG_INFINITY);
|
||||
|
||||
|
@ -69,7 +69,10 @@ double ParsedNumber::getDouble() const {
|
|||
return INFINITY;
|
||||
}
|
||||
}
|
||||
U_ASSERT(!quantity.bogus);
|
||||
if (quantity.bogus) {
|
||||
status = U_INVALID_STATE_ERROR;
|
||||
return 0.0;
|
||||
}
|
||||
if (quantity.isZero() && quantity.isNegative()) {
|
||||
return -0.0;
|
||||
}
|
||||
|
|
|
@ -49,6 +49,7 @@ enum ParseFlags {
|
|||
// PARSE_FLAG_OPTIMIZE = 0x0800, // no longer used
|
||||
// PARSE_FLAG_FORCE_BIG_DECIMAL = 0x1000, // not used in ICU4C
|
||||
PARSE_FLAG_NO_FOREIGN_CURRENCY = 0x2000,
|
||||
PARSE_FLAG_ALLOW_INFINITE_RECURSION = 0x4000,
|
||||
};
|
||||
|
||||
|
||||
|
@ -160,7 +161,7 @@ class U_I18N_API ParsedNumber {
|
|||
|
||||
bool seenNumber() const;
|
||||
|
||||
double getDouble() const;
|
||||
double getDouble(UErrorCode& status) const;
|
||||
|
||||
void populateFormattable(Formattable& output, parse_flags_t parseFlags) const;
|
||||
|
||||
|
|
|
@ -237,6 +237,8 @@ class NumberParserTest : public IntlTest {
|
|||
void testAffixPatternMatcher();
|
||||
void testGroupingDisabled();
|
||||
void testCaseFolding();
|
||||
void test20360_BidiOverflow();
|
||||
void testInfiniteRecursion();
|
||||
|
||||
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
|
||||
};
|
||||
|
|
|
@ -25,6 +25,8 @@ void NumberParserTest::runIndexedTest(int32_t index, UBool exec, const char*& na
|
|||
TESTCASE_AUTO(testSeriesMatcher);
|
||||
TESTCASE_AUTO(testCombinedCurrencyMatcher);
|
||||
TESTCASE_AUTO(testAffixPatternMatcher);
|
||||
TESTCASE_AUTO(test20360_BidiOverflow);
|
||||
TESTCASE_AUTO(testInfiniteRecursion);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
|
@ -139,10 +141,10 @@ void NumberParserTest::testBasic() {
|
|||
ParsedNumber resultObject;
|
||||
parser->parse(inputString, true, resultObject, status);
|
||||
assertTrue("Greedy Parse failed: " + message, resultObject.success());
|
||||
assertEquals(
|
||||
"Greedy Parse failed: " + message, cas.expectedCharsConsumed, resultObject.charEnd);
|
||||
assertEquals(
|
||||
"Greedy Parse failed: " + message, cas.expectedResultDouble, resultObject.getDouble());
|
||||
assertEquals("Greedy Parse failed: " + message,
|
||||
cas.expectedCharsConsumed, resultObject.charEnd);
|
||||
assertEquals("Greedy Parse failed: " + message,
|
||||
cas.expectedResultDouble, resultObject.getDouble(status));
|
||||
}
|
||||
|
||||
if (0 != (cas.flags & 0x02)) {
|
||||
|
@ -157,7 +159,7 @@ void NumberParserTest::testBasic() {
|
|||
assertEquals(
|
||||
"Non-Greedy Parse failed: " + message,
|
||||
cas.expectedResultDouble,
|
||||
resultObject.getDouble());
|
||||
resultObject.getDouble(status));
|
||||
}
|
||||
|
||||
if (0 != (cas.flags & 0x04)) {
|
||||
|
@ -171,10 +173,10 @@ void NumberParserTest::testBasic() {
|
|||
ParsedNumber resultObject;
|
||||
parser->parse(inputString, true, resultObject, status);
|
||||
assertTrue("Strict Parse failed: " + message, resultObject.success());
|
||||
assertEquals(
|
||||
"Strict Parse failed: " + message, cas.expectedCharsConsumed, resultObject.charEnd);
|
||||
assertEquals(
|
||||
"Strict Parse failed: " + message, cas.expectedResultDouble, resultObject.getDouble());
|
||||
assertEquals("Strict Parse failed: " + message,
|
||||
cas.expectedCharsConsumed, resultObject.charEnd);
|
||||
assertEquals("Strict Parse failed: " + message,
|
||||
cas.expectedResultDouble, resultObject.getDouble(status));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -350,5 +352,59 @@ void NumberParserTest::testAffixPatternMatcher() {
|
|||
}
|
||||
}
|
||||
|
||||
void NumberParserTest::test20360_BidiOverflow() {
|
||||
IcuTestErrorCode status(*this, "test20360_BidiOverflow");
|
||||
UnicodeString inputString;
|
||||
inputString.append(u'-');
|
||||
for (int32_t i=0; i<100000; i++) {
|
||||
inputString.append(u'\u061C');
|
||||
}
|
||||
inputString.append(u'5');
|
||||
|
||||
LocalPointer<const NumberParserImpl> parser(NumberParserImpl::createSimpleParser("en", u"0", 0, status));
|
||||
if (status.errDataIfFailureAndReset("createSimpleParser() failed")) {
|
||||
return;
|
||||
}
|
||||
|
||||
ParsedNumber resultObject;
|
||||
parser->parse(inputString, true, resultObject, status);
|
||||
assertTrue("Greedy Parse, success", resultObject.success());
|
||||
assertEquals("Greedy Parse, chars consumed", 100002, resultObject.charEnd);
|
||||
assertEquals("Greedy Parse, expected double", -5.0, resultObject.getDouble(status));
|
||||
|
||||
resultObject.clear();
|
||||
parser->parse(inputString, false, resultObject, status);
|
||||
assertFalse("Non-Greedy Parse, success", resultObject.success());
|
||||
assertEquals("Non-Greedy Parse, chars consumed", 1, resultObject.charEnd);
|
||||
}
|
||||
|
||||
void NumberParserTest::testInfiniteRecursion() {
|
||||
IcuTestErrorCode status(*this, "testInfiniteRecursion");
|
||||
UnicodeString inputString;
|
||||
inputString.append(u'-');
|
||||
for (int32_t i=0; i<200; i++) {
|
||||
inputString.append(u'\u061C');
|
||||
}
|
||||
inputString.append(u'5');
|
||||
|
||||
LocalPointer<const NumberParserImpl> parser(NumberParserImpl::createSimpleParser("en", u"0", 0, status));
|
||||
if (status.errDataIfFailureAndReset("createSimpleParser() failed")) {
|
||||
return;
|
||||
}
|
||||
|
||||
ParsedNumber resultObject;
|
||||
parser->parse(inputString, false, resultObject, status);
|
||||
assertFalse("Default recursion limit, success", resultObject.success());
|
||||
assertEquals("Default recursion limit, chars consumed", 1, resultObject.charEnd);
|
||||
|
||||
parser.adoptInstead(NumberParserImpl::createSimpleParser(
|
||||
"en", u"0", PARSE_FLAG_ALLOW_INFINITE_RECURSION, status));
|
||||
resultObject.clear();
|
||||
parser->parse(inputString, false, resultObject, status);
|
||||
assertTrue("Unlimited recursion, success", resultObject.success());
|
||||
assertEquals("Unlimited recursion, chars consumed", 202, resultObject.charEnd);
|
||||
assertEquals("Unlimited recursion, expected double", -5.0, resultObject.getDouble(status));
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -321,9 +321,13 @@ public class NumberParserImpl {
|
|||
0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE));
|
||||
segment.adjustOffset(start);
|
||||
if (greedy) {
|
||||
parseGreedyRecursive(segment, result);
|
||||
parseGreedy(segment, result);
|
||||
} else if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_ALLOW_INFINITE_RECURSION)) {
|
||||
// Start at 1 so that recursionLevels never gets to 0
|
||||
parseLongestRecursive(segment, result, 1);
|
||||
} else {
|
||||
parseLongestRecursive(segment, result);
|
||||
// Arbitrary recursion safety limit: 100 levels.
|
||||
parseLongestRecursive(segment, result, -100);
|
||||
}
|
||||
for (NumberParseMatcher matcher : matchers) {
|
||||
matcher.postProcess(result);
|
||||
|
@ -331,39 +335,46 @@ public class NumberParserImpl {
|
|||
result.postProcess();
|
||||
}
|
||||
|
||||
private void parseGreedyRecursive(StringSegment segment, ParsedNumber result) {
|
||||
// Base Case
|
||||
if (segment.length() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
int initialOffset = segment.getOffset();
|
||||
for (int i = 0; i < matchers.size(); i++) {
|
||||
private void parseGreedy(StringSegment segment, ParsedNumber result) {
|
||||
// Note: this method is not recursive in order to avoid stack overflow.
|
||||
for (int i = 0; i < matchers.size();) {
|
||||
// Base Case
|
||||
if (segment.length() == 0) {
|
||||
return;
|
||||
}
|
||||
NumberParseMatcher matcher = matchers.get(i);
|
||||
if (!matcher.smokeTest(segment)) {
|
||||
// Matcher failed smoke test: try the next one
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
int initialOffset = segment.getOffset();
|
||||
matcher.match(segment, result);
|
||||
if (segment.getOffset() != initialOffset) {
|
||||
// In a greedy parse, recurse on only the first match.
|
||||
parseGreedyRecursive(segment, result);
|
||||
// The following line resets the offset so that the StringSegment says the same across
|
||||
// the function
|
||||
// call boundary. Since we recurse only once, this line is not strictly necessary.
|
||||
segment.setOffset(initialOffset);
|
||||
return;
|
||||
// Greedy heuristic: accept the match and loop back
|
||||
i = 0;
|
||||
continue;
|
||||
} else {
|
||||
// Matcher did not match: try the next one
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: If we get here, the greedy parse completed without consuming the entire string.
|
||||
}
|
||||
|
||||
private void parseLongestRecursive(StringSegment segment, ParsedNumber result) {
|
||||
private void parseLongestRecursive(StringSegment segment, ParsedNumber result, int recursionLevels) {
|
||||
// Base Case
|
||||
if (segment.length() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Safety against stack overflow
|
||||
if (recursionLevels == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: Give a nice way for the matcher to reset the ParsedNumber?
|
||||
ParsedNumber initial = new ParsedNumber();
|
||||
initial.copyFrom(result);
|
||||
|
@ -388,7 +399,7 @@ public class NumberParserImpl {
|
|||
|
||||
// If the entire segment was consumed, recurse.
|
||||
if (segment.getOffset() - initialOffset == charsToConsume) {
|
||||
parseLongestRecursive(segment, candidate);
|
||||
parseLongestRecursive(segment, candidate, recursionLevels + 1);
|
||||
if (candidate.isBetterThan(result)) {
|
||||
result.copyFrom(candidate);
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ public class ParsingUtils {
|
|||
// public static final int PARSE_FLAG_OPTIMIZE = 0x0800; // no longer used
|
||||
public static final int PARSE_FLAG_FORCE_BIG_DECIMAL = 0x1000;
|
||||
public static final int PARSE_FLAG_NO_FOREIGN_CURRENCIES = 0x2000;
|
||||
public static final int PARSE_FLAG_ALLOW_INFINITE_RECURSION = 0x4000;
|
||||
|
||||
public static void putLeadCodePoints(UnicodeSet input, UnicodeSet output) {
|
||||
for (EntryRange range : input.ranges()) {
|
||||
|
|
|
@ -389,4 +389,52 @@ public class NumberParserTest {
|
|||
result.charEnd);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test20360_BidiOverflow() {
|
||||
StringBuilder inputString = new StringBuilder();
|
||||
inputString.append('-');
|
||||
for (int i=0; i<100000; i++) {
|
||||
inputString.append('\u061C');
|
||||
}
|
||||
inputString.append('5');
|
||||
|
||||
NumberParserImpl parser = NumberParserImpl.createSimpleParser(ULocale.ENGLISH, "0", 0);
|
||||
|
||||
ParsedNumber resultObject = new ParsedNumber();
|
||||
parser.parse(inputString.toString(), true, resultObject);
|
||||
assertTrue("Greedy Parse, success", resultObject.success());
|
||||
assertEquals("Greedy Parse, chars consumed", 100002, resultObject.charEnd);
|
||||
assertEquals("Greedy Parse, expected double", -5, resultObject.getNumber().intValue());
|
||||
|
||||
resultObject.clear();
|
||||
parser.parse(inputString.toString(), false, resultObject);
|
||||
assertFalse("Non-Greedy Parse, success", resultObject.success());
|
||||
assertEquals("Non-Greedy Parse, chars consumed", 1, resultObject.charEnd);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInfiniteRecursion() {
|
||||
StringBuilder inputString = new StringBuilder();
|
||||
inputString.append('-');
|
||||
for (int i=0; i<200; i++) {
|
||||
inputString.append('\u061C');
|
||||
}
|
||||
inputString.append('5');
|
||||
|
||||
NumberParserImpl parser = NumberParserImpl.createSimpleParser(ULocale.ENGLISH, "0", 0);
|
||||
|
||||
ParsedNumber resultObject = new ParsedNumber();
|
||||
parser.parse(inputString.toString(), false, resultObject);
|
||||
assertFalse("Default recursion limit, success", resultObject.success());
|
||||
assertEquals("Default recursion limit, chars consumed", 1, resultObject.charEnd);
|
||||
|
||||
parser = NumberParserImpl.createSimpleParser(
|
||||
ULocale.ENGLISH, "0", ParsingUtils.PARSE_FLAG_ALLOW_INFINITE_RECURSION);
|
||||
resultObject.clear();
|
||||
parser.parse(inputString.toString(), false, resultObject);
|
||||
assertTrue("Unlimited recursion, success", resultObject.success());
|
||||
assertEquals("Unlimited recursion, chars consumed", 202, resultObject.charEnd);
|
||||
assertEquals("Unlimited recursion, expected double", -5, resultObject.getNumber().intValue());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue