mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-13529 Parsing of redundant rule matches is slow when parsing with RuleBasedNumberFormat
X-SVN-Rev: 40913
This commit is contained in:
parent
a64895b4f8
commit
4bac703558
15 changed files with 102 additions and 36 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -635,6 +635,8 @@ icu4c/source/tools/ctestfw/libsicutest*
|
|||
icu4c/source/tools/ctestfw/release
|
||||
icu4c/source/tools/ctestfw/x64
|
||||
icu4c/source/tools/ctestfw/x86
|
||||
icu4c/source/tools/escapesrc/*.d
|
||||
icu4c/source/tools/escapesrc/Makefile
|
||||
icu4c/source/tools/genbrk/*.d
|
||||
icu4c/source/tools/genbrk/*.o
|
||||
icu4c/source/tools/genbrk/*.pdb
|
||||
|
|
|
@ -681,7 +681,7 @@ static void dumpUS(FILE* f, const UnicodeString& us) {
|
|||
#endif
|
||||
|
||||
UBool
|
||||
NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
|
||||
NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, int32_t nonNumericalExecutedRuleMask, Formattable& result) const
|
||||
{
|
||||
// try matching each rule in the rule set against the text being
|
||||
// parsed. Whichever one matches the most characters is the one
|
||||
|
@ -707,9 +707,12 @@ NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBoun
|
|||
#endif
|
||||
// Try each of the negative rules, fraction rules, infinity rules and NaN rules
|
||||
for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
|
||||
if (nonNumericalRules[i]) {
|
||||
if (nonNumericalRules[i] && ((nonNumericalExecutedRuleMask >> i) & 1) == 0) {
|
||||
// Mark this rule as being executed so that we don't try to execute it again.
|
||||
nonNumericalExecutedRuleMask |= 1 << i;
|
||||
|
||||
Formattable tempResult;
|
||||
UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
|
||||
UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, tempResult);
|
||||
if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
|
||||
result = tempResult;
|
||||
highWaterMark = workingPos;
|
||||
|
@ -748,7 +751,7 @@ NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBoun
|
|||
continue;
|
||||
}
|
||||
Formattable tempResult;
|
||||
UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
|
||||
UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, tempResult);
|
||||
if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
|
||||
result = tempResult;
|
||||
highWaterMark = workingPos;
|
||||
|
|
|
@ -55,7 +55,7 @@ public:
|
|||
void format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const;
|
||||
void format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const;
|
||||
|
||||
UBool parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const;
|
||||
UBool parse(const UnicodeString& text, ParsePosition& pos, double upperBound, int32_t nonNumericalExecutedRuleMask, Formattable& result) const;
|
||||
|
||||
void appendRules(UnicodeString& result) const; // toString
|
||||
|
||||
|
|
|
@ -900,6 +900,7 @@ NFRule::doParse(const UnicodeString& text,
|
|||
ParsePosition& parsePosition,
|
||||
UBool isFractionRule,
|
||||
double upperBound,
|
||||
int32_t nonNumericalExecutedRuleMask,
|
||||
Formattable& resVal) const
|
||||
{
|
||||
// internally we operate on a copy of the string being parsed
|
||||
|
@ -1002,6 +1003,7 @@ NFRule::doParse(const UnicodeString& text,
|
|||
temp.setTo(ruleText, sub1Pos, sub2Pos - sub1Pos);
|
||||
double partialResult = matchToDelimiter(workText, start, tempBaseValue,
|
||||
temp, pp, sub1,
|
||||
nonNumericalExecutedRuleMask,
|
||||
upperBound);
|
||||
|
||||
// if we got a successful match (or were trying to match a
|
||||
|
@ -1022,6 +1024,7 @@ NFRule::doParse(const UnicodeString& text,
|
|||
temp.setTo(ruleText, sub2Pos, ruleText.length() - sub2Pos);
|
||||
partialResult = matchToDelimiter(workText2, 0, partialResult,
|
||||
temp, pp2, sub2,
|
||||
nonNumericalExecutedRuleMask,
|
||||
upperBound);
|
||||
|
||||
// if we got a successful match on this second
|
||||
|
@ -1158,6 +1161,7 @@ NFRule::matchToDelimiter(const UnicodeString& text,
|
|||
const UnicodeString& delimiter,
|
||||
ParsePosition& pp,
|
||||
const NFSubstitution* sub,
|
||||
int32_t nonNumericalExecutedRuleMask,
|
||||
double upperBound) const
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
@ -1191,6 +1195,7 @@ NFRule::matchToDelimiter(const UnicodeString& text,
|
|||
#else
|
||||
formatter->isLenient(),
|
||||
#endif
|
||||
nonNumericalExecutedRuleMask,
|
||||
result);
|
||||
|
||||
// if the substitution could match all the text up to
|
||||
|
@ -1244,6 +1249,7 @@ NFRule::matchToDelimiter(const UnicodeString& text,
|
|||
#else
|
||||
formatter->isLenient(),
|
||||
#endif
|
||||
nonNumericalExecutedRuleMask,
|
||||
result);
|
||||
if (success && (tempPP.getIndex() != 0)) {
|
||||
// if there's a successful match (or it's a null
|
||||
|
|
|
@ -74,6 +74,7 @@ public:
|
|||
ParsePosition& pos,
|
||||
UBool isFractional,
|
||||
double upperBound,
|
||||
int32_t nonNumericalExecutedRuleMask,
|
||||
Formattable& result) const;
|
||||
|
||||
UBool shouldRollBack(int64_t number) const;
|
||||
|
@ -94,6 +95,7 @@ private:
|
|||
int32_t indexOfAnyRulePrefix() const;
|
||||
double matchToDelimiter(const UnicodeString& text, int32_t startPos, double baseValue,
|
||||
const UnicodeString& delimiter, ParsePosition& pp, const NFSubstitution* sub,
|
||||
int32_t nonNumericalExecutedRuleMask,
|
||||
double upperBound) const;
|
||||
void stripPrefix(UnicodeString& text, const UnicodeString& prefix, ParsePosition& pp) const;
|
||||
|
||||
|
|
|
@ -155,6 +155,7 @@ public:
|
|||
double baseValue,
|
||||
double upperBound,
|
||||
UBool lenientParse,
|
||||
int32_t nonNumericalExecutedRuleMask,
|
||||
Formattable& result) const;
|
||||
|
||||
virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const {
|
||||
|
@ -221,6 +222,7 @@ public:
|
|||
double baseValue,
|
||||
double upperBound,
|
||||
UBool lenientParse,
|
||||
int32_t nonNumericalExecutedRuleMask,
|
||||
Formattable& result) const;
|
||||
|
||||
virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue + oldRuleValue; }
|
||||
|
@ -292,6 +294,7 @@ public:
|
|||
double baseValue,
|
||||
double upperBound,
|
||||
UBool /*lenientParse*/,
|
||||
int32_t nonNumericalExecutedRuleMask,
|
||||
Formattable& result) const;
|
||||
|
||||
virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue / oldRuleValue; }
|
||||
|
@ -689,6 +692,7 @@ NFSubstitution::doParse(const UnicodeString& text,
|
|||
double baseValue,
|
||||
double upperBound,
|
||||
UBool lenientParse,
|
||||
int32_t nonNumericalExecutedRuleMask,
|
||||
Formattable& result) const
|
||||
{
|
||||
#ifdef RBNF_DEBUG
|
||||
|
@ -709,7 +713,7 @@ NFSubstitution::doParse(const UnicodeString& text,
|
|||
// on), then also try parsing the text using a default-
|
||||
// constructed NumberFormat
|
||||
if (ruleSet != NULL) {
|
||||
ruleSet->parse(text, parsePosition, upperBound, result);
|
||||
ruleSet->parse(text, parsePosition, upperBound, nonNumericalExecutedRuleMask, result);
|
||||
if (lenientParse && !ruleSet->isFractionRuleSet() && parsePosition.getIndex() == 0) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
NumberFormat* fmt = NumberFormat::createInstance(status);
|
||||
|
@ -931,18 +935,19 @@ ModulusSubstitution::doParse(const UnicodeString& text,
|
|||
double baseValue,
|
||||
double upperBound,
|
||||
UBool lenientParse,
|
||||
int32_t nonNumericalExecutedRuleMask,
|
||||
Formattable& result) const
|
||||
{
|
||||
// if this isn't a >>> substitution, we can just use the
|
||||
// inherited parse() routine to do the parsing
|
||||
if (ruleToUse == NULL) {
|
||||
return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, lenientParse, result);
|
||||
return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, lenientParse, nonNumericalExecutedRuleMask, result);
|
||||
|
||||
// but if it IS a >>> substitution, we have to do it here: we
|
||||
// use the specific rule's doParse() method, and then we have to
|
||||
// do some of the other work of NFRuleSet.parse()
|
||||
} else {
|
||||
ruleToUse->doParse(text, parsePosition, FALSE, upperBound, result);
|
||||
ruleToUse->doParse(text, parsePosition, FALSE, upperBound, nonNumericalExecutedRuleMask, result);
|
||||
|
||||
if (parsePosition.getIndex() != 0) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
@ -1118,12 +1123,13 @@ FractionalPartSubstitution::doParse(const UnicodeString& text,
|
|||
double baseValue,
|
||||
double /*upperBound*/,
|
||||
UBool lenientParse,
|
||||
int32_t nonNumericalExecutedRuleMask,
|
||||
Formattable& resVal) const
|
||||
{
|
||||
// if we're not in byDigits mode, we can just use the inherited
|
||||
// doParse()
|
||||
if (!byDigits) {
|
||||
return NFSubstitution::doParse(text, parsePosition, baseValue, 0, lenientParse, resVal);
|
||||
return NFSubstitution::doParse(text, parsePosition, baseValue, 0, lenientParse, nonNumericalExecutedRuleMask, resVal);
|
||||
|
||||
// if we ARE in byDigits mode, parse the text one digit at a time
|
||||
// using this substitution's owning rule set (we do this by setting
|
||||
|
@ -1141,7 +1147,7 @@ FractionalPartSubstitution::doParse(const UnicodeString& text,
|
|||
while (workText.length() > 0 && workPos.getIndex() != 0) {
|
||||
workPos.setIndex(0);
|
||||
Formattable temp;
|
||||
getRuleSet()->parse(workText, workPos, 10, temp);
|
||||
getRuleSet()->parse(workText, workPos, 10, nonNumericalExecutedRuleMask, temp);
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
digit = temp.getLong(status);
|
||||
// digit = temp.getType() == Formattable::kLong ?
|
||||
|
@ -1249,6 +1255,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text,
|
|||
double baseValue,
|
||||
double upperBound,
|
||||
UBool /*lenientParse*/,
|
||||
int32_t nonNumericalExecutedRuleMask,
|
||||
Formattable& result) const
|
||||
{
|
||||
// we don't have to do anything special to do the parsing here,
|
||||
|
@ -1267,7 +1274,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text,
|
|||
|
||||
while (workText.length() > 0 && workPos.getIndex() != 0) {
|
||||
workPos.setIndex(0);
|
||||
getRuleSet()->parse(workText, workPos, 1, temp); // parse zero or nothing at all
|
||||
getRuleSet()->parse(workText, workPos, 1, nonNumericalExecutedRuleMask, temp); // parse zero or nothing at all
|
||||
if (workPos.getIndex() == 0) {
|
||||
// we failed, either there were no more zeros, or the number was formatted with digits
|
||||
// either way, we're done
|
||||
|
@ -1289,7 +1296,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text,
|
|||
}
|
||||
|
||||
// we've parsed off the zeros, now let's parse the rest from our current position
|
||||
NFSubstitution::doParse(workText, parsePosition, withZeros ? 1 : baseValue, upperBound, FALSE, result);
|
||||
NFSubstitution::doParse(workText, parsePosition, withZeros ? 1 : baseValue, upperBound, FALSE, nonNumericalExecutedRuleMask, result);
|
||||
|
||||
if (withZeros) {
|
||||
// any base value will do in this case. is there a way to
|
||||
|
|
|
@ -191,6 +191,7 @@ public:
|
|||
double baseValue,
|
||||
double upperBound,
|
||||
UBool lenientParse,
|
||||
int32_t nonNumericalExecutedRuleMask,
|
||||
Formattable& result) const;
|
||||
|
||||
/**
|
||||
|
|
|
@ -1371,7 +1371,7 @@ RuleBasedNumberFormat::parse(const UnicodeString& text,
|
|||
ParsePosition working_pp(0);
|
||||
Formattable working_result;
|
||||
|
||||
rp->parse(workingText, working_pp, kMaxDouble, working_result);
|
||||
rp->parse(workingText, working_pp, kMaxDouble, 0, working_result);
|
||||
if (working_pp.getIndex() > high_pp.getIndex()) {
|
||||
high_pp = working_pp;
|
||||
high_result = working_result;
|
||||
|
|
|
@ -75,6 +75,7 @@ void IntlTestRBNF::runIndexedTest(int32_t index, UBool exec, const char* &name,
|
|||
TESTCASE(23, TestVariableDecimalPoint);
|
||||
TESTCASE(24, TestLargeNumbers);
|
||||
TESTCASE(25, TestCompactDecimalFormatStyle);
|
||||
TESTCASE(26, TestParseFailure);
|
||||
#else
|
||||
TESTCASE(0, TestRBNFDisabled);
|
||||
#endif
|
||||
|
@ -2283,6 +2284,25 @@ void IntlTestRBNF::TestCompactDecimalFormatStyle() {
|
|||
doTest(&rbnf, enTestFullData, false);
|
||||
}
|
||||
|
||||
void IntlTestRBNF::TestParseFailure() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
RuleBasedNumberFormat rbnf(URBNF_SPELLOUT, Locale::getJapanese(), status);
|
||||
static const char* testData[][1] = {
|
||||
{ "\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB" },
|
||||
{ NULL }
|
||||
};
|
||||
for (int i = 0; testData[i][0]; ++i) {
|
||||
const char* spelledNumber = testData[i][0]; // spelled-out number
|
||||
|
||||
UnicodeString spelledNumberString = UnicodeString(spelledNumber).unescape();
|
||||
Formattable actualNumber;
|
||||
rbnf.parse(spelledNumberString, actualNumber, status);
|
||||
if (status != U_INVALID_FORMAT_ERROR) { // I would have expected U_PARSE_ERROR, but NumberFormat::parse gives U_INVALID_FORMAT_ERROR
|
||||
errln("FAIL: string should be unparseable %s %s", spelledNumber, u_errorName(status));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* const testData[][2], UBool testParsing)
|
||||
{
|
||||
|
|
|
@ -147,6 +147,7 @@ class IntlTestRBNF : public IntlTest {
|
|||
void TestRounding();
|
||||
void TestLargeNumbers();
|
||||
void TestCompactDecimalFormatStyle();
|
||||
void TestParseFailure();
|
||||
|
||||
protected:
|
||||
virtual void doTest(RuleBasedNumberFormat* formatter, const char* const testData[][2], UBool testParsing);
|
||||
|
|
|
@ -903,7 +903,7 @@ final class NFRule {
|
|||
* result is an integer and Double otherwise. The result is never null.
|
||||
*/
|
||||
public Number doParse(String text, ParsePosition parsePosition, boolean isFractionRule,
|
||||
double upperBound) {
|
||||
double upperBound, int nonNumericalExecutedRuleMask) {
|
||||
|
||||
// internally we operate on a copy of the string being parsed
|
||||
// (because we're going to change it) and use our own ParsePosition
|
||||
|
@ -976,7 +976,7 @@ final class NFRule {
|
|||
pp.setIndex(0);
|
||||
double partialResult = matchToDelimiter(workText, start, tempBaseValue,
|
||||
ruleText.substring(sub1Pos, sub2Pos), rulePatternFormat,
|
||||
pp, sub1, upperBound).doubleValue();
|
||||
pp, sub1, upperBound, nonNumericalExecutedRuleMask).doubleValue();
|
||||
|
||||
// if we got a successful match (or were trying to match a
|
||||
// null substitution), pp is now pointing at the first unmatched
|
||||
|
@ -994,7 +994,7 @@ final class NFRule {
|
|||
// a real result
|
||||
partialResult = matchToDelimiter(workText2, 0, partialResult,
|
||||
ruleText.substring(sub2Pos), rulePatternFormat, pp2, sub2,
|
||||
upperBound).doubleValue();
|
||||
upperBound, nonNumericalExecutedRuleMask).doubleValue();
|
||||
|
||||
// if we got a successful match on this second
|
||||
// matchToDelimiter() call, update the high-water mark
|
||||
|
@ -1121,7 +1121,8 @@ final class NFRule {
|
|||
* Double.
|
||||
*/
|
||||
private Number matchToDelimiter(String text, int startPos, double baseVal,
|
||||
String delimiter, PluralFormat pluralFormatDelimiter, ParsePosition pp, NFSubstitution sub, double upperBound) {
|
||||
String delimiter, PluralFormat pluralFormatDelimiter, ParsePosition pp, NFSubstitution sub,
|
||||
double upperBound, int nonNumericalExecutedRuleMask) {
|
||||
// if "delimiter" contains real (i.e., non-ignorable) text, search
|
||||
// it for "delimiter" beginning at "start". If that succeeds, then
|
||||
// use "sub"'s doParse() method to match the text before the
|
||||
|
@ -1144,7 +1145,7 @@ final class NFRule {
|
|||
String subText = text.substring(0, dPos);
|
||||
if (subText.length() > 0) {
|
||||
tempResult = sub.doParse(subText, tempPP, baseVal, upperBound,
|
||||
formatter.lenientParseEnabled());
|
||||
formatter.lenientParseEnabled(), nonNumericalExecutedRuleMask);
|
||||
|
||||
// if the substitution could match all the text up to
|
||||
// where we found "delimiter", then this function has
|
||||
|
@ -1192,7 +1193,7 @@ final class NFRule {
|
|||
Number result = ZERO;
|
||||
// try to match the whole string against the substitution
|
||||
Number tempResult = sub.doParse(text, tempPP, baseVal, upperBound,
|
||||
formatter.lenientParseEnabled());
|
||||
formatter.lenientParseEnabled(), nonNumericalExecutedRuleMask);
|
||||
if (tempPP.getIndex() != 0) {
|
||||
// if there's a successful match (or it's a null
|
||||
// substitution), update pp to point to the first
|
||||
|
|
|
@ -748,7 +748,7 @@ final class NFRuleSet {
|
|||
* this function returns new Long(0), and the parse position is
|
||||
* left unchanged.
|
||||
*/
|
||||
public Number parse(String text, ParsePosition parsePosition, double upperBound) {
|
||||
public Number parse(String text, ParsePosition parsePosition, double upperBound, int nonNumericalExecutedRuleMask) {
|
||||
// try matching each rule in the rule set against the text being
|
||||
// parsed. Whichever one matches the most characters is the one
|
||||
// that determines the value we return.
|
||||
|
@ -763,9 +763,13 @@ final class NFRuleSet {
|
|||
}
|
||||
|
||||
// Try each of the negative rules, fraction rules, infinity rules and NaN rules
|
||||
for (NFRule fractionRule : nonNumericalRules) {
|
||||
if (fractionRule != null) {
|
||||
tempResult = fractionRule.doParse(text, parsePosition, false, upperBound);
|
||||
for (int nonNumericalRuleIdx = 0; nonNumericalRuleIdx < nonNumericalRules.length; nonNumericalRuleIdx++) {
|
||||
NFRule nonNumericalRule = nonNumericalRules[nonNumericalRuleIdx];
|
||||
if (nonNumericalRule != null && ((nonNumericalExecutedRuleMask >> nonNumericalRuleIdx) & 1) == 0) {
|
||||
// Mark this rule as being executed so that we don't try to execute it again.
|
||||
nonNumericalExecutedRuleMask |= 1 << nonNumericalRuleIdx;
|
||||
|
||||
tempResult = nonNumericalRule.doParse(text, parsePosition, false, upperBound, nonNumericalExecutedRuleMask);
|
||||
if (parsePosition.getIndex() > highWaterMark.getIndex()) {
|
||||
result = tempResult;
|
||||
highWaterMark.setIndex(parsePosition.getIndex());
|
||||
|
@ -792,7 +796,7 @@ final class NFRuleSet {
|
|||
continue;
|
||||
}
|
||||
|
||||
tempResult = rules[i].doParse(text, parsePosition, isFractionRuleSet, upperBound);
|
||||
tempResult = rules[i].doParse(text, parsePosition, isFractionRuleSet, upperBound, nonNumericalExecutedRuleMask);
|
||||
if (parsePosition.getIndex() > highWaterMark.getIndex()) {
|
||||
result = tempResult;
|
||||
highWaterMark.setIndex(parsePosition.getIndex());
|
||||
|
|
|
@ -425,7 +425,7 @@ abstract class NFSubstitution {
|
|||
* is left unchanged.
|
||||
*/
|
||||
public Number doParse(String text, ParsePosition parsePosition, double baseValue,
|
||||
double upperBound, boolean lenientParse) {
|
||||
double upperBound, boolean lenientParse, int nonNumericalExecutedRuleMask) {
|
||||
Number tempResult;
|
||||
|
||||
// figure out the highest base value a rule can have and match
|
||||
|
@ -443,7 +443,7 @@ abstract class NFSubstitution {
|
|||
// on), then also try parsing the text using a default-
|
||||
// constructed NumberFormat
|
||||
if (ruleSet != null) {
|
||||
tempResult = ruleSet.parse(text, parsePosition, upperBound);
|
||||
tempResult = ruleSet.parse(text, parsePosition, upperBound, nonNumericalExecutedRuleMask);
|
||||
if (lenientParse && !ruleSet.isFractionSet() && parsePosition.getIndex() == 0) {
|
||||
tempResult = ruleSet.owner.getDecimalFormat().parse(text, parsePosition);
|
||||
}
|
||||
|
@ -995,17 +995,17 @@ class ModulusSubstitution extends NFSubstitution {
|
|||
*/
|
||||
@Override
|
||||
public Number doParse(String text, ParsePosition parsePosition, double baseValue,
|
||||
double upperBound, boolean lenientParse) {
|
||||
double upperBound, boolean lenientParse, int nonNumericalExecutedRuleMask) {
|
||||
// if this isn't a >>> substitution, we can just use the
|
||||
// inherited parse() routine to do the parsing
|
||||
if (ruleToUse == null) {
|
||||
return super.doParse(text, parsePosition, baseValue, upperBound, lenientParse);
|
||||
return super.doParse(text, parsePosition, baseValue, upperBound, lenientParse, nonNumericalExecutedRuleMask);
|
||||
|
||||
} else {
|
||||
// but if it IS a >>> substitution, we have to do it here: we
|
||||
// use the specific rule's doParse() method, and then we have to
|
||||
// do some of the other work of NFRuleSet.parse()
|
||||
Number tempResult = ruleToUse.doParse(text, parsePosition, false, upperBound);
|
||||
Number tempResult = ruleToUse.doParse(text, parsePosition, false, upperBound, nonNumericalExecutedRuleMask);
|
||||
|
||||
if (parsePosition.getIndex() != 0) {
|
||||
double result = tempResult.doubleValue();
|
||||
|
@ -1300,11 +1300,11 @@ class FractionalPartSubstitution extends NFSubstitution {
|
|||
*/
|
||||
@Override
|
||||
public Number doParse(String text, ParsePosition parsePosition, double baseValue,
|
||||
double upperBound, boolean lenientParse) {
|
||||
double upperBound, boolean lenientParse, int nonNumericalExecutedRuleMask) {
|
||||
// if we're not in byDigits mode, we can just use the inherited
|
||||
// doParse()
|
||||
if (!byDigits) {
|
||||
return super.doParse(text, parsePosition, baseValue, 0, lenientParse);
|
||||
return super.doParse(text, parsePosition, baseValue, 0, lenientParse, nonNumericalExecutedRuleMask);
|
||||
}
|
||||
else {
|
||||
// if we ARE in byDigits mode, parse the text one digit at a time
|
||||
|
@ -1320,7 +1320,7 @@ class FractionalPartSubstitution extends NFSubstitution {
|
|||
int leadingZeros = 0;
|
||||
while (workText.length() > 0 && workPos.getIndex() != 0) {
|
||||
workPos.setIndex(0);
|
||||
digit = ruleSet.parse(workText, workPos, 10).intValue();
|
||||
digit = ruleSet.parse(workText, workPos, 10, nonNumericalExecutedRuleMask).intValue();
|
||||
if (lenientParse && workPos.getIndex() == 0) {
|
||||
Number n = ruleSet.owner.getDecimalFormat().parse(workText, workPos);
|
||||
if (n != null) {
|
||||
|
@ -1626,7 +1626,7 @@ class NumeratorSubstitution extends NFSubstitution {
|
|||
*/
|
||||
@Override
|
||||
public Number doParse(String text, ParsePosition parsePosition, double baseValue,
|
||||
double upperBound, boolean lenientParse) {
|
||||
double upperBound, boolean lenientParse, int nonNumericalExecutedRuleMask) {
|
||||
// we don't have to do anything special to do the parsing here,
|
||||
// but we have to turn lenient parsing off-- if we leave it on,
|
||||
// it SERIOUSLY messes up the algorithm
|
||||
|
@ -1641,7 +1641,7 @@ class NumeratorSubstitution extends NFSubstitution {
|
|||
|
||||
while (workText.length() > 0 && workPos.getIndex() != 0) {
|
||||
workPos.setIndex(0);
|
||||
/*digit = */ruleSet.parse(workText, workPos, 1).intValue(); // parse zero or nothing at all
|
||||
/*digit = */ruleSet.parse(workText, workPos, 1, nonNumericalExecutedRuleMask).intValue(); // parse zero or nothing at all
|
||||
if (workPos.getIndex() == 0) {
|
||||
// we failed, either there were no more zeros, or the number was formatted with digits
|
||||
// either way, we're done
|
||||
|
@ -1662,7 +1662,7 @@ class NumeratorSubstitution extends NFSubstitution {
|
|||
}
|
||||
|
||||
// we've parsed off the zeros, now let's parse the rest from our current position
|
||||
Number result = super.doParse(text, parsePosition, withZeros ? 1 : baseValue, upperBound, false);
|
||||
Number result = super.doParse(text, parsePosition, withZeros ? 1 : baseValue, upperBound, false, nonNumericalExecutedRuleMask);
|
||||
|
||||
if (withZeros) {
|
||||
// any base value will do in this case. is there a way to
|
||||
|
|
|
@ -1322,7 +1322,7 @@ public class RuleBasedNumberFormat extends NumberFormat {
|
|||
|
||||
// try parsing the string with the rule set. If it gets past the
|
||||
// high-water mark, update the high-water mark and the result
|
||||
tempResult = ruleSets[i].parse(workingText, workingPos, Double.MAX_VALUE);
|
||||
tempResult = ruleSets[i].parse(workingText, workingPos, Double.MAX_VALUE, 0);
|
||||
if (workingPos.getIndex() > highWaterMark.getIndex()) {
|
||||
result = tempResult;
|
||||
highWaterMark.setIndex(workingPos.getIndex());
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
*/
|
||||
package com.ibm.icu.dev.test.format;
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.junit.Test;
|
||||
|
@ -161,4 +162,22 @@ public class RBNFParseTest extends TestFmwk {
|
|||
logln("rbnf_fr:" + rbnf_en.getDefaultRuleSetName());
|
||||
parseList(rbnf_en, rbnf_fr, lists);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestBadParse() {
|
||||
RuleBasedNumberFormat rbnf = new RuleBasedNumberFormat(Locale.JAPAN, RuleBasedNumberFormat.SPELLOUT);
|
||||
String[] testData = {
|
||||
"\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB",
|
||||
};
|
||||
|
||||
for (String testString : testData) {
|
||||
try {
|
||||
rbnf.parse(testString);
|
||||
errln("Unexpected success: " + testString);
|
||||
}
|
||||
catch (ParseException e) {
|
||||
// success!
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue