ICU-20360 Testing and fixing stack overflow in numparse.

This commit is contained in:
Shane Carr 2019-02-06 14:38:36 -08:00 committed by Shane F. Carr
parent 78ac6ca3dc
commit 8a56b89b03
9 changed files with 188 additions and 52 deletions

View file

@ -72,7 +72,7 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString&
parser->addMatcher(parser->fLocalMatchers.padding = {u"@"});
parser->addMatcher(parser->fLocalMatchers.scientific = {symbols, grouper});
parser->addMatcher(parser->fLocalMatchers.currency = {currencySymbols, symbols, parseFlags, status});
// parser.addMatcher(new RequireNumberMatcher());
parser->addMatcher(parser->fLocalValidators.number = {});
parser->freeze();
return parser.orphan();
@ -252,9 +252,13 @@ void NumberParserImpl::parse(const UnicodeString& input, int32_t start, bool gre
StringSegment segment(input, 0 != (fParseFlags & PARSE_FLAG_IGNORE_CASE));
segment.adjustOffset(start);
if (greedy) {
parseGreedyRecursive(segment, result, status);
parseGreedy(segment, result, status);
} else if (0 != (fParseFlags & PARSE_FLAG_ALLOW_INFINITE_RECURSION)) {
// Start at 1 so that recursionLevels never gets to 0
parseLongestRecursive(segment, result, 1, status);
} else {
parseLongestRecursive(segment, result, status);
// Arbitrary recursion safety limit: 100 levels.
parseLongestRecursive(segment, result, -100, status);
}
for (int32_t i = 0; i < fNumMatchers; i++) {
fMatchers[i]->postProcess(result);
@ -262,44 +266,53 @@ void NumberParserImpl::parse(const UnicodeString& input, int32_t start, bool gre
result.postProcess();
}
void NumberParserImpl::parseGreedyRecursive(StringSegment& segment, ParsedNumber& result,
void NumberParserImpl::parseGreedy(StringSegment& segment, ParsedNumber& result,
UErrorCode& status) const {
// Base Case
if (segment.length() == 0) {
return;
}
int initialOffset = segment.getOffset();
for (int32_t i = 0; i < fNumMatchers; i++) {
// Note: this method is not recursive in order to avoid stack overflow.
for (int i = 0; i <fNumMatchers;) {
// Base Case
if (segment.length() == 0) {
return;
}
const NumberParseMatcher* matcher = fMatchers[i];
if (!matcher->smokeTest(segment)) {
// Matcher failed smoke test: try the next one
i++;
continue;
}
int32_t initialOffset = segment.getOffset();
matcher->match(segment, result, status);
if (U_FAILURE(status)) {
return;
}
if (segment.getOffset() != initialOffset) {
// In a greedy parse, recurse on only the first match.
parseGreedyRecursive(segment, result, status);
// The following line resets the offset so that the StringSegment says the same across
// the function
// call boundary. Since we recurse only once, this line is not strictly necessary.
segment.setOffset(initialOffset);
return;
// Greedy heuristic: accept the match and loop back
i = 0;
continue;
} else {
// Matcher did not match: try the next one
i++;
continue;
}
UPRV_UNREACHABLE;
}
// NOTE: If we get here, the greedy parse completed without consuming the entire string.
}
void NumberParserImpl::parseLongestRecursive(StringSegment& segment, ParsedNumber& result,
int32_t recursionLevels,
UErrorCode& status) const {
// Base Case
if (segment.length() == 0) {
return;
}
// Safety against stack overflow
if (recursionLevels == 0) {
return;
}
// TODO: Give a nice way for the matcher to reset the ParsedNumber?
ParsedNumber initial(result);
ParsedNumber candidate;
@ -326,7 +339,7 @@ void NumberParserImpl::parseLongestRecursive(StringSegment& segment, ParsedNumbe
// If the entire segment was consumed, recurse.
if (segment.getOffset() - initialOffset == charsToConsume) {
parseLongestRecursive(segment, candidate, status);
parseLongestRecursive(segment, candidate, recursionLevels + 1, status);
if (U_FAILURE(status)) {
return;
}

View file

@ -95,9 +95,10 @@ class U_I18N_API NumberParserImpl : public MutableMatcherCollection, public UMem
explicit NumberParserImpl(parse_flags_t parseFlags);
void parseGreedyRecursive(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const;
void parseGreedy(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const;
void parseLongestRecursive(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const;
void parseLongestRecursive(
StringSegment& segment, ParsedNumber& result, int32_t recursionLevels, UErrorCode& status) const;
};

View file

@ -52,7 +52,7 @@ bool ParsedNumber::seenNumber() const {
return !quantity.bogus || 0 != (flags & FLAG_NAN) || 0 != (flags & FLAG_INFINITY);
}
double ParsedNumber::getDouble() const {
double ParsedNumber::getDouble(UErrorCode& status) const {
bool sawNaN = 0 != (flags & FLAG_NAN);
bool sawInfinity = 0 != (flags & FLAG_INFINITY);
@ -69,7 +69,10 @@ double ParsedNumber::getDouble() const {
return INFINITY;
}
}
U_ASSERT(!quantity.bogus);
if (quantity.bogus) {
status = U_INVALID_STATE_ERROR;
return 0.0;
}
if (quantity.isZero() && quantity.isNegative()) {
return -0.0;
}

View file

@ -49,6 +49,7 @@ enum ParseFlags {
// PARSE_FLAG_OPTIMIZE = 0x0800, // no longer used
// PARSE_FLAG_FORCE_BIG_DECIMAL = 0x1000, // not used in ICU4C
PARSE_FLAG_NO_FOREIGN_CURRENCY = 0x2000,
PARSE_FLAG_ALLOW_INFINITE_RECURSION = 0x4000,
};
@ -160,7 +161,7 @@ class U_I18N_API ParsedNumber {
bool seenNumber() const;
double getDouble() const;
double getDouble(UErrorCode& status) const;
void populateFormattable(Formattable& output, parse_flags_t parseFlags) const;

View file

@ -237,6 +237,8 @@ class NumberParserTest : public IntlTest {
void testAffixPatternMatcher();
void testGroupingDisabled();
void testCaseFolding();
void test20360_BidiOverflow();
void testInfiniteRecursion();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
};

View file

@ -25,6 +25,8 @@ void NumberParserTest::runIndexedTest(int32_t index, UBool exec, const char*& na
TESTCASE_AUTO(testSeriesMatcher);
TESTCASE_AUTO(testCombinedCurrencyMatcher);
TESTCASE_AUTO(testAffixPatternMatcher);
TESTCASE_AUTO(test20360_BidiOverflow);
TESTCASE_AUTO(testInfiniteRecursion);
TESTCASE_AUTO_END;
}
@ -139,10 +141,10 @@ void NumberParserTest::testBasic() {
ParsedNumber resultObject;
parser->parse(inputString, true, resultObject, status);
assertTrue("Greedy Parse failed: " + message, resultObject.success());
assertEquals(
"Greedy Parse failed: " + message, cas.expectedCharsConsumed, resultObject.charEnd);
assertEquals(
"Greedy Parse failed: " + message, cas.expectedResultDouble, resultObject.getDouble());
assertEquals("Greedy Parse failed: " + message,
cas.expectedCharsConsumed, resultObject.charEnd);
assertEquals("Greedy Parse failed: " + message,
cas.expectedResultDouble, resultObject.getDouble(status));
}
if (0 != (cas.flags & 0x02)) {
@ -157,7 +159,7 @@ void NumberParserTest::testBasic() {
assertEquals(
"Non-Greedy Parse failed: " + message,
cas.expectedResultDouble,
resultObject.getDouble());
resultObject.getDouble(status));
}
if (0 != (cas.flags & 0x04)) {
@ -171,10 +173,10 @@ void NumberParserTest::testBasic() {
ParsedNumber resultObject;
parser->parse(inputString, true, resultObject, status);
assertTrue("Strict Parse failed: " + message, resultObject.success());
assertEquals(
"Strict Parse failed: " + message, cas.expectedCharsConsumed, resultObject.charEnd);
assertEquals(
"Strict Parse failed: " + message, cas.expectedResultDouble, resultObject.getDouble());
assertEquals("Strict Parse failed: " + message,
cas.expectedCharsConsumed, resultObject.charEnd);
assertEquals("Strict Parse failed: " + message,
cas.expectedResultDouble, resultObject.getDouble(status));
}
}
}
@ -350,5 +352,59 @@ void NumberParserTest::testAffixPatternMatcher() {
}
}
void NumberParserTest::test20360_BidiOverflow() {
IcuTestErrorCode status(*this, "test20360_BidiOverflow");
UnicodeString inputString;
inputString.append(u'-');
for (int32_t i=0; i<100000; i++) {
inputString.append(u'\u061C');
}
inputString.append(u'5');
LocalPointer<const NumberParserImpl> parser(NumberParserImpl::createSimpleParser("en", u"0", 0, status));
if (status.errDataIfFailureAndReset("createSimpleParser() failed")) {
return;
}
ParsedNumber resultObject;
parser->parse(inputString, true, resultObject, status);
assertTrue("Greedy Parse, success", resultObject.success());
assertEquals("Greedy Parse, chars consumed", 100002, resultObject.charEnd);
assertEquals("Greedy Parse, expected double", -5.0, resultObject.getDouble(status));
resultObject.clear();
parser->parse(inputString, false, resultObject, status);
assertFalse("Non-Greedy Parse, success", resultObject.success());
assertEquals("Non-Greedy Parse, chars consumed", 1, resultObject.charEnd);
}
void NumberParserTest::testInfiniteRecursion() {
IcuTestErrorCode status(*this, "testInfiniteRecursion");
UnicodeString inputString;
inputString.append(u'-');
for (int32_t i=0; i<200; i++) {
inputString.append(u'\u061C');
}
inputString.append(u'5');
LocalPointer<const NumberParserImpl> parser(NumberParserImpl::createSimpleParser("en", u"0", 0, status));
if (status.errDataIfFailureAndReset("createSimpleParser() failed")) {
return;
}
ParsedNumber resultObject;
parser->parse(inputString, false, resultObject, status);
assertFalse("Default recursion limit, success", resultObject.success());
assertEquals("Default recursion limit, chars consumed", 1, resultObject.charEnd);
parser.adoptInstead(NumberParserImpl::createSimpleParser(
"en", u"0", PARSE_FLAG_ALLOW_INFINITE_RECURSION, status));
resultObject.clear();
parser->parse(inputString, false, resultObject, status);
assertTrue("Unlimited recursion, success", resultObject.success());
assertEquals("Unlimited recursion, chars consumed", 202, resultObject.charEnd);
assertEquals("Unlimited recursion, expected double", -5.0, resultObject.getDouble(status));
}
#endif

View file

@ -321,9 +321,13 @@ public class NumberParserImpl {
0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE));
segment.adjustOffset(start);
if (greedy) {
parseGreedyRecursive(segment, result);
parseGreedy(segment, result);
} else if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_ALLOW_INFINITE_RECURSION)) {
// Start at 1 so that recursionLevels never gets to 0
parseLongestRecursive(segment, result, 1);
} else {
parseLongestRecursive(segment, result);
// Arbitrary recursion safety limit: 100 levels.
parseLongestRecursive(segment, result, -100);
}
for (NumberParseMatcher matcher : matchers) {
matcher.postProcess(result);
@ -331,39 +335,46 @@ public class NumberParserImpl {
result.postProcess();
}
private void parseGreedyRecursive(StringSegment segment, ParsedNumber result) {
// Base Case
if (segment.length() == 0) {
return;
}
int initialOffset = segment.getOffset();
for (int i = 0; i < matchers.size(); i++) {
private void parseGreedy(StringSegment segment, ParsedNumber result) {
// Note: this method is not recursive in order to avoid stack overflow.
for (int i = 0; i < matchers.size();) {
// Base Case
if (segment.length() == 0) {
return;
}
NumberParseMatcher matcher = matchers.get(i);
if (!matcher.smokeTest(segment)) {
// Matcher failed smoke test: try the next one
i++;
continue;
}
int initialOffset = segment.getOffset();
matcher.match(segment, result);
if (segment.getOffset() != initialOffset) {
// In a greedy parse, recurse on only the first match.
parseGreedyRecursive(segment, result);
// The following line resets the offset so that the StringSegment says the same across
// the function
// call boundary. Since we recurse only once, this line is not strictly necessary.
segment.setOffset(initialOffset);
return;
// Greedy heuristic: accept the match and loop back
i = 0;
continue;
} else {
// Matcher did not match: try the next one
i++;
continue;
}
}
// NOTE: If we get here, the greedy parse completed without consuming the entire string.
}
private void parseLongestRecursive(StringSegment segment, ParsedNumber result) {
private void parseLongestRecursive(StringSegment segment, ParsedNumber result, int recursionLevels) {
// Base Case
if (segment.length() == 0) {
return;
}
// Safety against stack overflow
if (recursionLevels == 0) {
return;
}
// TODO: Give a nice way for the matcher to reset the ParsedNumber?
ParsedNumber initial = new ParsedNumber();
initial.copyFrom(result);
@ -388,7 +399,7 @@ public class NumberParserImpl {
// If the entire segment was consumed, recurse.
if (segment.getOffset() - initialOffset == charsToConsume) {
parseLongestRecursive(segment, candidate);
parseLongestRecursive(segment, candidate, recursionLevels + 1);
if (candidate.isBetterThan(result)) {
result.copyFrom(candidate);
}

View file

@ -24,6 +24,7 @@ public class ParsingUtils {
// public static final int PARSE_FLAG_OPTIMIZE = 0x0800; // no longer used
public static final int PARSE_FLAG_FORCE_BIG_DECIMAL = 0x1000;
public static final int PARSE_FLAG_NO_FOREIGN_CURRENCIES = 0x2000;
public static final int PARSE_FLAG_ALLOW_INFINITE_RECURSION = 0x4000;
public static void putLeadCodePoints(UnicodeSet input, UnicodeSet output) {
for (EntryRange range : input.ranges()) {

View file

@ -389,4 +389,52 @@ public class NumberParserTest {
result.charEnd);
}
}
@Test
public void test20360_BidiOverflow() {
StringBuilder inputString = new StringBuilder();
inputString.append('-');
for (int i=0; i<100000; i++) {
inputString.append('\u061C');
}
inputString.append('5');
NumberParserImpl parser = NumberParserImpl.createSimpleParser(ULocale.ENGLISH, "0", 0);
ParsedNumber resultObject = new ParsedNumber();
parser.parse(inputString.toString(), true, resultObject);
assertTrue("Greedy Parse, success", resultObject.success());
assertEquals("Greedy Parse, chars consumed", 100002, resultObject.charEnd);
assertEquals("Greedy Parse, expected double", -5, resultObject.getNumber().intValue());
resultObject.clear();
parser.parse(inputString.toString(), false, resultObject);
assertFalse("Non-Greedy Parse, success", resultObject.success());
assertEquals("Non-Greedy Parse, chars consumed", 1, resultObject.charEnd);
}
@Test
public void testInfiniteRecursion() {
StringBuilder inputString = new StringBuilder();
inputString.append('-');
for (int i=0; i<200; i++) {
inputString.append('\u061C');
}
inputString.append('5');
NumberParserImpl parser = NumberParserImpl.createSimpleParser(ULocale.ENGLISH, "0", 0);
ParsedNumber resultObject = new ParsedNumber();
parser.parse(inputString.toString(), false, resultObject);
assertFalse("Default recursion limit, success", resultObject.success());
assertEquals("Default recursion limit, chars consumed", 1, resultObject.charEnd);
parser = NumberParserImpl.createSimpleParser(
ULocale.ENGLISH, "0", ParsingUtils.PARSE_FLAG_ALLOW_INFINITE_RECURSION);
resultObject.clear();
parser.parse(inputString.toString(), false, resultObject);
assertTrue("Unlimited recursion, success", resultObject.success());
assertEquals("Unlimited recursion, chars consumed", 202, resultObject.charEnd);
assertEquals("Unlimited recursion, expected double", -5, resultObject.getNumber().intValue());
}
}