diff --git a/icu4c/source/i18n/numparse_impl.cpp b/icu4c/source/i18n/numparse_impl.cpp index 3192a395938..412ea89c86b 100644 --- a/icu4c/source/i18n/numparse_impl.cpp +++ b/icu4c/source/i18n/numparse_impl.cpp @@ -72,7 +72,7 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString& parser->addMatcher(parser->fLocalMatchers.padding = {u"@"}); parser->addMatcher(parser->fLocalMatchers.scientific = {symbols, grouper}); parser->addMatcher(parser->fLocalMatchers.currency = {currencySymbols, symbols, parseFlags, status}); -// parser.addMatcher(new RequireNumberMatcher()); + parser->addMatcher(parser->fLocalValidators.number = {}); parser->freeze(); return parser.orphan(); @@ -252,9 +252,13 @@ void NumberParserImpl::parse(const UnicodeString& input, int32_t start, bool gre StringSegment segment(input, 0 != (fParseFlags & PARSE_FLAG_IGNORE_CASE)); segment.adjustOffset(start); if (greedy) { - parseGreedyRecursive(segment, result, status); + parseGreedy(segment, result, status); + } else if (0 != (fParseFlags & PARSE_FLAG_ALLOW_INFINITE_RECURSION)) { + // Start at 1 so that recursionLevels never gets to 0 + parseLongestRecursive(segment, result, 1, status); } else { - parseLongestRecursive(segment, result, status); + // Arbitrary recursion safety limit: 100 levels. + parseLongestRecursive(segment, result, -100, status); } for (int32_t i = 0; i < fNumMatchers; i++) { fMatchers[i]->postProcess(result); @@ -262,44 +266,53 @@ void NumberParserImpl::parse(const UnicodeString& input, int32_t start, bool gre result.postProcess(); } -void NumberParserImpl::parseGreedyRecursive(StringSegment& segment, ParsedNumber& result, +void NumberParserImpl::parseGreedy(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const { - // Base Case - if (segment.length() == 0) { - return; - } - - int initialOffset = segment.getOffset(); - for (int32_t i = 0; i < fNumMatchers; i++) { + // Note: this method is not recursive in order to avoid stack overflow. + for (int i = 0; i smokeTest(segment)) { + // Matcher failed smoke test: try the next one + i++; continue; } + int32_t initialOffset = segment.getOffset(); matcher->match(segment, result, status); if (U_FAILURE(status)) { return; } if (segment.getOffset() != initialOffset) { - // In a greedy parse, recurse on only the first match. - parseGreedyRecursive(segment, result, status); - // The following line resets the offset so that the StringSegment says the same across - // the function - // call boundary. Since we recurse only once, this line is not strictly necessary. - segment.setOffset(initialOffset); - return; + // Greedy heuristic: accept the match and loop back + i = 0; + continue; + } else { + // Matcher did not match: try the next one + i++; + continue; } + UPRV_UNREACHABLE; } // NOTE: If we get here, the greedy parse completed without consuming the entire string. } void NumberParserImpl::parseLongestRecursive(StringSegment& segment, ParsedNumber& result, + int32_t recursionLevels, UErrorCode& status) const { // Base Case if (segment.length() == 0) { return; } + // Safety against stack overflow + if (recursionLevels == 0) { + return; + } + // TODO: Give a nice way for the matcher to reset the ParsedNumber? ParsedNumber initial(result); ParsedNumber candidate; @@ -326,7 +339,7 @@ void NumberParserImpl::parseLongestRecursive(StringSegment& segment, ParsedNumbe // If the entire segment was consumed, recurse. if (segment.getOffset() - initialOffset == charsToConsume) { - parseLongestRecursive(segment, candidate, status); + parseLongestRecursive(segment, candidate, recursionLevels + 1, status); if (U_FAILURE(status)) { return; } diff --git a/icu4c/source/i18n/numparse_impl.h b/icu4c/source/i18n/numparse_impl.h index 992114c7ede..7d5f0b6f0bd 100644 --- a/icu4c/source/i18n/numparse_impl.h +++ b/icu4c/source/i18n/numparse_impl.h @@ -95,9 +95,10 @@ class U_I18N_API NumberParserImpl : public MutableMatcherCollection, public UMem explicit NumberParserImpl(parse_flags_t parseFlags); - void parseGreedyRecursive(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const; + void parseGreedy(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const; - void parseLongestRecursive(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const; + void parseLongestRecursive( + StringSegment& segment, ParsedNumber& result, int32_t recursionLevels, UErrorCode& status) const; }; diff --git a/icu4c/source/i18n/numparse_parsednumber.cpp b/icu4c/source/i18n/numparse_parsednumber.cpp index 98da4e83192..3145f718dc2 100644 --- a/icu4c/source/i18n/numparse_parsednumber.cpp +++ b/icu4c/source/i18n/numparse_parsednumber.cpp @@ -52,7 +52,7 @@ bool ParsedNumber::seenNumber() const { return !quantity.bogus || 0 != (flags & FLAG_NAN) || 0 != (flags & FLAG_INFINITY); } -double ParsedNumber::getDouble() const { +double ParsedNumber::getDouble(UErrorCode& status) const { bool sawNaN = 0 != (flags & FLAG_NAN); bool sawInfinity = 0 != (flags & FLAG_INFINITY); @@ -69,7 +69,10 @@ double ParsedNumber::getDouble() const { return INFINITY; } } - U_ASSERT(!quantity.bogus); + if (quantity.bogus) { + status = U_INVALID_STATE_ERROR; + return 0.0; + } if (quantity.isZero() && quantity.isNegative()) { return -0.0; } diff --git a/icu4c/source/i18n/numparse_types.h b/icu4c/source/i18n/numparse_types.h index e5cf7be491e..0fd70faca99 100644 --- a/icu4c/source/i18n/numparse_types.h +++ b/icu4c/source/i18n/numparse_types.h @@ -49,6 +49,7 @@ enum ParseFlags { // PARSE_FLAG_OPTIMIZE = 0x0800, // no longer used // PARSE_FLAG_FORCE_BIG_DECIMAL = 0x1000, // not used in ICU4C PARSE_FLAG_NO_FOREIGN_CURRENCY = 0x2000, + PARSE_FLAG_ALLOW_INFINITE_RECURSION = 0x4000, }; @@ -160,7 +161,7 @@ class U_I18N_API ParsedNumber { bool seenNumber() const; - double getDouble() const; + double getDouble(UErrorCode& status) const; void populateFormattable(Formattable& output, parse_flags_t parseFlags) const; diff --git a/icu4c/source/test/intltest/numbertest.h b/icu4c/source/test/intltest/numbertest.h index 9efc12da68e..35463612e0c 100644 --- a/icu4c/source/test/intltest/numbertest.h +++ b/icu4c/source/test/intltest/numbertest.h @@ -237,6 +237,8 @@ class NumberParserTest : public IntlTest { void testAffixPatternMatcher(); void testGroupingDisabled(); void testCaseFolding(); + void test20360_BidiOverflow(); + void testInfiniteRecursion(); void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0); }; diff --git a/icu4c/source/test/intltest/numbertest_parse.cpp b/icu4c/source/test/intltest/numbertest_parse.cpp index d5276b4fac3..e391f5904e1 100644 --- a/icu4c/source/test/intltest/numbertest_parse.cpp +++ b/icu4c/source/test/intltest/numbertest_parse.cpp @@ -25,6 +25,8 @@ void NumberParserTest::runIndexedTest(int32_t index, UBool exec, const char*& na TESTCASE_AUTO(testSeriesMatcher); TESTCASE_AUTO(testCombinedCurrencyMatcher); TESTCASE_AUTO(testAffixPatternMatcher); + TESTCASE_AUTO(test20360_BidiOverflow); + TESTCASE_AUTO(testInfiniteRecursion); TESTCASE_AUTO_END; } @@ -139,10 +141,10 @@ void NumberParserTest::testBasic() { ParsedNumber resultObject; parser->parse(inputString, true, resultObject, status); assertTrue("Greedy Parse failed: " + message, resultObject.success()); - assertEquals( - "Greedy Parse failed: " + message, cas.expectedCharsConsumed, resultObject.charEnd); - assertEquals( - "Greedy Parse failed: " + message, cas.expectedResultDouble, resultObject.getDouble()); + assertEquals("Greedy Parse failed: " + message, + cas.expectedCharsConsumed, resultObject.charEnd); + assertEquals("Greedy Parse failed: " + message, + cas.expectedResultDouble, resultObject.getDouble(status)); } if (0 != (cas.flags & 0x02)) { @@ -157,7 +159,7 @@ void NumberParserTest::testBasic() { assertEquals( "Non-Greedy Parse failed: " + message, cas.expectedResultDouble, - resultObject.getDouble()); + resultObject.getDouble(status)); } if (0 != (cas.flags & 0x04)) { @@ -171,10 +173,10 @@ void NumberParserTest::testBasic() { ParsedNumber resultObject; parser->parse(inputString, true, resultObject, status); assertTrue("Strict Parse failed: " + message, resultObject.success()); - assertEquals( - "Strict Parse failed: " + message, cas.expectedCharsConsumed, resultObject.charEnd); - assertEquals( - "Strict Parse failed: " + message, cas.expectedResultDouble, resultObject.getDouble()); + assertEquals("Strict Parse failed: " + message, + cas.expectedCharsConsumed, resultObject.charEnd); + assertEquals("Strict Parse failed: " + message, + cas.expectedResultDouble, resultObject.getDouble(status)); } } } @@ -350,5 +352,59 @@ void NumberParserTest::testAffixPatternMatcher() { } } +void NumberParserTest::test20360_BidiOverflow() { + IcuTestErrorCode status(*this, "test20360_BidiOverflow"); + UnicodeString inputString; + inputString.append(u'-'); + for (int32_t i=0; i<100000; i++) { + inputString.append(u'\u061C'); + } + inputString.append(u'5'); + + LocalPointer parser(NumberParserImpl::createSimpleParser("en", u"0", 0, status)); + if (status.errDataIfFailureAndReset("createSimpleParser() failed")) { + return; + } + + ParsedNumber resultObject; + parser->parse(inputString, true, resultObject, status); + assertTrue("Greedy Parse, success", resultObject.success()); + assertEquals("Greedy Parse, chars consumed", 100002, resultObject.charEnd); + assertEquals("Greedy Parse, expected double", -5.0, resultObject.getDouble(status)); + + resultObject.clear(); + parser->parse(inputString, false, resultObject, status); + assertFalse("Non-Greedy Parse, success", resultObject.success()); + assertEquals("Non-Greedy Parse, chars consumed", 1, resultObject.charEnd); +} + +void NumberParserTest::testInfiniteRecursion() { + IcuTestErrorCode status(*this, "testInfiniteRecursion"); + UnicodeString inputString; + inputString.append(u'-'); + for (int32_t i=0; i<200; i++) { + inputString.append(u'\u061C'); + } + inputString.append(u'5'); + + LocalPointer parser(NumberParserImpl::createSimpleParser("en", u"0", 0, status)); + if (status.errDataIfFailureAndReset("createSimpleParser() failed")) { + return; + } + + ParsedNumber resultObject; + parser->parse(inputString, false, resultObject, status); + assertFalse("Default recursion limit, success", resultObject.success()); + assertEquals("Default recursion limit, chars consumed", 1, resultObject.charEnd); + + parser.adoptInstead(NumberParserImpl::createSimpleParser( + "en", u"0", PARSE_FLAG_ALLOW_INFINITE_RECURSION, status)); + resultObject.clear(); + parser->parse(inputString, false, resultObject, status); + assertTrue("Unlimited recursion, success", resultObject.success()); + assertEquals("Unlimited recursion, chars consumed", 202, resultObject.charEnd); + assertEquals("Unlimited recursion, expected double", -5.0, resultObject.getDouble(status)); +} + #endif diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java index f90775c7fcc..77698ebdb20 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java @@ -321,9 +321,13 @@ public class NumberParserImpl { 0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE)); segment.adjustOffset(start); if (greedy) { - parseGreedyRecursive(segment, result); + parseGreedy(segment, result); + } else if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_ALLOW_INFINITE_RECURSION)) { + // Start at 1 so that recursionLevels never gets to 0 + parseLongestRecursive(segment, result, 1); } else { - parseLongestRecursive(segment, result); + // Arbitrary recursion safety limit: 100 levels. + parseLongestRecursive(segment, result, -100); } for (NumberParseMatcher matcher : matchers) { matcher.postProcess(result); @@ -331,39 +335,46 @@ public class NumberParserImpl { result.postProcess(); } - private void parseGreedyRecursive(StringSegment segment, ParsedNumber result) { - // Base Case - if (segment.length() == 0) { - return; - } - - int initialOffset = segment.getOffset(); - for (int i = 0; i < matchers.size(); i++) { + private void parseGreedy(StringSegment segment, ParsedNumber result) { + // Note: this method is not recursive in order to avoid stack overflow. + for (int i = 0; i < matchers.size();) { + // Base Case + if (segment.length() == 0) { + return; + } NumberParseMatcher matcher = matchers.get(i); if (!matcher.smokeTest(segment)) { + // Matcher failed smoke test: try the next one + i++; continue; } + int initialOffset = segment.getOffset(); matcher.match(segment, result); if (segment.getOffset() != initialOffset) { - // In a greedy parse, recurse on only the first match. - parseGreedyRecursive(segment, result); - // The following line resets the offset so that the StringSegment says the same across - // the function - // call boundary. Since we recurse only once, this line is not strictly necessary. - segment.setOffset(initialOffset); - return; + // Greedy heuristic: accept the match and loop back + i = 0; + continue; + } else { + // Matcher did not match: try the next one + i++; + continue; } } // NOTE: If we get here, the greedy parse completed without consuming the entire string. } - private void parseLongestRecursive(StringSegment segment, ParsedNumber result) { + private void parseLongestRecursive(StringSegment segment, ParsedNumber result, int recursionLevels) { // Base Case if (segment.length() == 0) { return; } + // Safety against stack overflow + if (recursionLevels == 0) { + return; + } + // TODO: Give a nice way for the matcher to reset the ParsedNumber? ParsedNumber initial = new ParsedNumber(); initial.copyFrom(result); @@ -388,7 +399,7 @@ public class NumberParserImpl { // If the entire segment was consumed, recurse. if (segment.getOffset() - initialOffset == charsToConsume) { - parseLongestRecursive(segment, candidate); + parseLongestRecursive(segment, candidate, recursionLevels + 1); if (candidate.isBetterThan(result)) { result.copyFrom(candidate); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java index 1fbab42b900..8b325bd37b0 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java @@ -24,6 +24,7 @@ public class ParsingUtils { // public static final int PARSE_FLAG_OPTIMIZE = 0x0800; // no longer used public static final int PARSE_FLAG_FORCE_BIG_DECIMAL = 0x1000; public static final int PARSE_FLAG_NO_FOREIGN_CURRENCIES = 0x2000; + public static final int PARSE_FLAG_ALLOW_INFINITE_RECURSION = 0x4000; public static void putLeadCodePoints(UnicodeSet input, UnicodeSet output) { for (EntryRange range : input.ranges()) { diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java index f2f9c6a1c64..35f0a551c27 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java @@ -389,4 +389,52 @@ public class NumberParserTest { result.charEnd); } } + + @Test + public void test20360_BidiOverflow() { + StringBuilder inputString = new StringBuilder(); + inputString.append('-'); + for (int i=0; i<100000; i++) { + inputString.append('\u061C'); + } + inputString.append('5'); + + NumberParserImpl parser = NumberParserImpl.createSimpleParser(ULocale.ENGLISH, "0", 0); + + ParsedNumber resultObject = new ParsedNumber(); + parser.parse(inputString.toString(), true, resultObject); + assertTrue("Greedy Parse, success", resultObject.success()); + assertEquals("Greedy Parse, chars consumed", 100002, resultObject.charEnd); + assertEquals("Greedy Parse, expected double", -5, resultObject.getNumber().intValue()); + + resultObject.clear(); + parser.parse(inputString.toString(), false, resultObject); + assertFalse("Non-Greedy Parse, success", resultObject.success()); + assertEquals("Non-Greedy Parse, chars consumed", 1, resultObject.charEnd); + } + + @Test + public void testInfiniteRecursion() { + StringBuilder inputString = new StringBuilder(); + inputString.append('-'); + for (int i=0; i<200; i++) { + inputString.append('\u061C'); + } + inputString.append('5'); + + NumberParserImpl parser = NumberParserImpl.createSimpleParser(ULocale.ENGLISH, "0", 0); + + ParsedNumber resultObject = new ParsedNumber(); + parser.parse(inputString.toString(), false, resultObject); + assertFalse("Default recursion limit, success", resultObject.success()); + assertEquals("Default recursion limit, chars consumed", 1, resultObject.charEnd); + + parser = NumberParserImpl.createSimpleParser( + ULocale.ENGLISH, "0", ParsingUtils.PARSE_FLAG_ALLOW_INFINITE_RECURSION); + resultObject.clear(); + parser.parse(inputString.toString(), false, resultObject); + assertTrue("Unlimited recursion, success", resultObject.success()); + assertEquals("Unlimited recursion, chars consumed", 202, resultObject.charEnd); + assertEquals("Unlimited recursion, expected double", -5, resultObject.getNumber().intValue()); + } }