From b0a0f67e21335bb4e9f91e54ccec529c2573b58c Mon Sep 17 00:00:00 2001 From: George Rhoten Date: Thu, 14 Aug 2014 17:20:42 +0000 Subject: [PATCH] ICU-10880 Add support for plural format syntax to RBNF. Perform a little cleanup to the RBNF code. X-SVN-Rev: 36160 --- .../text/RbnfScannerProviderImpl.java | 3 +- .../core/src/com/ibm/icu/text/NFRule.java | 405 ++++---------- .../src/com/ibm/icu/text/NFSubstitution.java | 176 ++---- .../src/com/ibm/icu/text/PluralFormat.java | 124 ++++- .../ibm/icu/text/RuleBasedNumberFormat.java | 10 +- .../test/format/RbnfLenientScannerTest.java | 4 +- .../com/ibm/icu/dev/test/format/RbnfTest.java | 508 ++++++++++-------- 7 files changed, 550 insertions(+), 680 deletions(-) rename icu4j/main/classes/collate/src/com/ibm/icu/{ => impl}/text/RbnfScannerProviderImpl.java (99%) diff --git a/icu4j/main/classes/collate/src/com/ibm/icu/text/RbnfScannerProviderImpl.java b/icu4j/main/classes/collate/src/com/ibm/icu/impl/text/RbnfScannerProviderImpl.java similarity index 99% rename from icu4j/main/classes/collate/src/com/ibm/icu/text/RbnfScannerProviderImpl.java rename to icu4j/main/classes/collate/src/com/ibm/icu/impl/text/RbnfScannerProviderImpl.java index 43750edf57b..c82b2327b92 100644 --- a/icu4j/main/classes/collate/src/com/ibm/icu/text/RbnfScannerProviderImpl.java +++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/text/RbnfScannerProviderImpl.java @@ -5,12 +5,13 @@ ******************************************************************************* */ -package com.ibm.icu.text; +package com.ibm.icu.impl.text; import java.util.HashMap; import java.util.Map; import com.ibm.icu.impl.ICUDebug; +import com.ibm.icu.text.*; import com.ibm.icu.util.ULocale; /** diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/NFRule.java b/icu4j/main/classes/core/src/com/ibm/icu/text/NFRule.java index 777fd33d93e..623844eb842 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/NFRule.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/NFRule.java @@ -6,7 +6,7 @@ */ package com.ibm.icu.text; -import java.lang.String; +import java.text.FieldPosition; import java.text.ParsePosition; import com.ibm.icu.impl.PatternProps; @@ -69,6 +69,13 @@ final class NFRule { */ private String ruleText = null; + /** + * The rule's plural format when defined. This is not a substitution + * because it only works on the current baseValue. It's normally not used + * due to the overhead. + */ + private PluralFormat rulePatternFormat = null; + /** * The rule's first substitution (the one with the lower offset * into the rule text) @@ -235,7 +242,8 @@ final class NFRule { int p = description.indexOf(":"); if (p == -1) { setBaseValue(0); - } else { + } + else { // copy the descriptor out into its own string and strip it, // along with any trailing whitespace, out of the original // description @@ -249,22 +257,14 @@ final class NFRule { // check first to see if the rule descriptor matches the token // for one of the special rules. If it does, set the base // value to the correct identifier value - if (descriptor.equals("-x")) { - setBaseValue(NEGATIVE_NUMBER_RULE); - } - else if (descriptor.equals("x.x")) { - setBaseValue(IMPROPER_FRACTION_RULE); - } - else if (descriptor.equals("0.x")) { + if (descriptor.equals("0.x")) { setBaseValue(PROPER_FRACTION_RULE); } - else if (descriptor.equals("x.0")) { - setBaseValue(MASTER_RULE); - } else if (descriptor.charAt(0) >= '0' && descriptor.charAt(0) <= '9') { // if the rule descriptor begins with a digit, it's a descriptor // for a normal rule long tempValue = 0; + int descriptorLength = descriptor.length(); char c = 0; p = 0; @@ -272,7 +272,7 @@ final class NFRule { // into "tempValue", skip periods, commas, and spaces, // stop on a slash or > sign (or at the end of the string), // and throw an exception on any other character - while (p < descriptor.length()) { + while (p < descriptorLength) { c = descriptor.charAt(p); if (c >= '0' && c <= '9') { tempValue = tempValue * 10 + (c - '0'); @@ -296,7 +296,7 @@ final class NFRule { if (c == '/') { tempValue = 0; ++p; - while (p < descriptor.length()) { + while (p < descriptorLength) { c = descriptor.charAt(p); if (c >= '0' && c <= '9') { tempValue = tempValue * 10 + (c - '0'); @@ -325,7 +325,7 @@ final class NFRule { // If we see another character before reaching the end of // the descriptor, that's also a syntax error. if (c == '>') { - while (p < descriptor.length()) { + while (p < descriptorLength) { c = descriptor.charAt(p); if (c == '>' && exponent > 0) { --exponent; @@ -336,6 +336,15 @@ final class NFRule { } } } + else if (descriptor.equals("-x")) { + setBaseValue(NEGATIVE_NUMBER_RULE); + } + else if (descriptor.equals("x.x")) { + setBaseValue(IMPROPER_FRACTION_RULE); + } + else if (descriptor.equals("x.0")) { + setBaseValue(MASTER_RULE); + } } // finally, if the rule body begins with an apostrophe, strip it off @@ -355,15 +364,42 @@ final class NFRule { * creates the substitutions, and removes the substitution tokens * from the rule's rule text. * @param owner The rule set containing this rule - * @param predecessor The rule preseding this one in "owners" rule list + * @param predecessor The rule preceding this one in "owners" rule list * @param ruleText The rule text */ private void extractSubstitutions(NFRuleSet owner, String ruleText, NFRule predecessor) { this.ruleText = ruleText; + this.rulePatternFormat = null; sub1 = extractSubstitution(owner, predecessor); - sub2 = extractSubstitution(owner, predecessor); + if (sub1.isNullSubstitution()) { + // Small optimization. There is no need to create a redundant NullSubstitution. + sub2 = sub1; + } + else { + sub2 = extractSubstitution(owner, predecessor); + } + ruleText = this.ruleText; + if (ruleText.startsWith("$(") && ruleText.endsWith(")")) { + int endType = ruleText.indexOf(','); + if (endType < 0) { + throw new IllegalArgumentException("Rule \"" + ruleText + "\" does not have a defined type"); + } + String type = this.ruleText.substring(2, endType); + PluralRules.PluralType pluralType; + if ("cardinal".equals(type)) { + pluralType = PluralRules.PluralType.CARDINAL; + } + else if ("ordinal".equals(type)) { + pluralType = PluralRules.PluralType.ORDINAL; + } + else { + throw new IllegalArgumentException(type + " is an unknown type"); + } + rulePatternFormat = formatter.createPluralFormat(pluralType, + ruleText.substring(endType + 1, ruleText.length() - 1)); + } } private static final String[] RULE_PREFIXES = new String[] { @@ -575,14 +611,13 @@ final class NFRule { else if (baseValue == MASTER_RULE) { result.append("x.0: "); } - - // for a normal rule, write out its base value, and if the radix is - // something other than 10, write out the radix (with the preceding - // slash, of course). Then calculate the expected exponent and if - // if isn't the same as the actual exponent, write an appropriate - // number of > signs. Finally, terminate the whole thing with - // a colon. else { + // for a normal rule, write out its base value, and if the radix is + // something other than 10, write out the radix (with the preceding + // slash, of course). Then calculate the expected exponent and if + // if isn't the same as the actual exponent, write an appropriate + // number of > signs. Finally, terminate the whole thing with + // a colon. result.append(String.valueOf(baseValue)); if (radix != 10) { result.append('/').append(radix); @@ -653,9 +688,18 @@ final class NFRule { // into the right places in toInsertInto (notice we do the // substitutions in reverse order so that the offsets don't get // messed up) - toInsertInto.insert(pos, ruleText); - sub2.doSubstitution(number, toInsertInto, pos); - sub1.doSubstitution(number, toInsertInto, pos); + if (rulePatternFormat == null) { + toInsertInto.insert(pos, ruleText); + } + else { + toInsertInto.insert(pos, rulePatternFormat.format(baseValue == 0 ? number : number/baseValue)); + } + if (!sub2.isNullSubstitution()) { + sub2.doSubstitution(number, toInsertInto, pos); + } + if (!sub1.isNullSubstitution()) { + sub1.doSubstitution(number, toInsertInto, pos); + } } /** @@ -674,9 +718,18 @@ final class NFRule { // [again, we have two copies of this routine that do the same thing // so that we don't sacrifice precision in a long by casting it // to a double] - toInsertInto.insert(pos, ruleText); - sub2.doSubstitution(number, toInsertInto, pos); - sub1.doSubstitution(number, toInsertInto, pos); + if (rulePatternFormat == null) { + toInsertInto.insert(pos, ruleText); + } + else { + toInsertInto.insert(pos, rulePatternFormat.format(number)); + } + if (!sub2.isNullSubstitution()) { + sub2.doSubstitution(number, toInsertInto, pos); + } + if (!sub1.isNullSubstitution()) { + sub1.doSubstitution(number, toInsertInto, pos); + } } /** @@ -791,8 +844,8 @@ final class NFRule { // the substitution, giving us a new partial parse result pp.setIndex(0); double partialResult = matchToDelimiter(workText, start, tempBaseValue, - ruleText.substring(sub1.getPos(), sub2.getPos()), pp, sub1, - upperBound).doubleValue(); + ruleText.substring(sub1.getPos(), sub2.getPos()), rulePatternFormat, + pp, sub1, upperBound).doubleValue(); // if we got a successful match (or were trying to match a // null substitution), pp is now pointing at the first unmatched @@ -809,7 +862,7 @@ final class NFRule { // substitution if there's a successful match, giving us // a real result partialResult = matchToDelimiter(workText2, 0, partialResult, - ruleText.substring(sub2.getPos()), pp2, sub2, + ruleText.substring(sub2.getPos()), rulePatternFormat, pp2, sub2, upperBound).doubleValue(); // if we got a successful match on this second @@ -936,7 +989,7 @@ final class NFRule { * Double. */ private Number matchToDelimiter(String text, int startPos, double baseVal, - String delimiter, ParsePosition pp, NFSubstitution sub, double upperBound) { + String delimiter, PluralFormat pluralFormatDelimiter, ParsePosition pp, NFSubstitution sub, double upperBound) { // if "delimiter" contains real (i.e., non-ignorable) text, search // it for "delimiter" beginning at "start". If that succeeds, then // use "sub"'s doParse() method to match the text before the @@ -949,7 +1002,7 @@ final class NFRule { // element array: element 0 is the position of the match, and // element 1 is the number of characters that matched // "delimiter". - int[] temp = findText(text, delimiter, startPos); + int[] temp = findText(text, delimiter, pluralFormatDelimiter, startPos); int dPos = temp[0]; int dLen = temp[1]; @@ -985,7 +1038,7 @@ final class NFRule { // copy of "delimiter" in "text" and repeat the loop if // we find it tempPP.setIndex(0); - temp = findText(text, delimiter, dPos + dLen); + temp = findText(text, delimiter, pluralFormatDelimiter, dPos + dLen); dPos = temp[0]; dLen = temp[1]; } @@ -1053,144 +1106,14 @@ final class NFRule { return scanner.prefixLength(str, prefix); } - // go through all this grief if we're in lenient-parse mode - // if (formatter.lenientParseEnabled()) { - // // get the formatter's collator and use it to create two - // // collation element iterators, one over the target string - // // and another over the prefix (right now, we'll throw an - // // exception if the collator we get back from the formatter - // // isn't a RuleBasedCollator, because RuleBasedCollator defines - // // the CollationElementIteratoer protocol. Hopefully, this - // // will change someday.) - // // - // // Previous code was matching "fifty-" against " fifty" and leaving - // // the number " fifty-7" to parse as 43 (50 - 7). - // // Also it seems that if we consume the entire prefix, that's ok even - // // if we've consumed the entire string, so I switched the logic to - // // reflect this. - // RuleBasedCollator collator = (RuleBasedCollator)formatter.getCollator(); - // CollationElementIterator strIter = collator.getCollationElementIterator(str); - // CollationElementIterator prefixIter = collator.getCollationElementIterator(prefix); - - // // match collation elements between the strings - // int oStr = strIter.next(); - // int oPrefix = prefixIter.next(); - - // while (oPrefix != CollationElementIterator.NULLORDER) { - // // skip over ignorable characters in the target string - // while (CollationElementIterator.primaryOrder(oStr) == 0 && oStr != - // CollationElementIterator.NULLORDER) { - // oStr = strIter.next(); - // } - - // // skip over ignorable characters in the prefix - // while (CollationElementIterator.primaryOrder(oPrefix) == 0 && oPrefix != - // CollationElementIterator.NULLORDER) { - // oPrefix = prefixIter.next(); - // } - - // // if skipping over ignorables brought to the end of - // // the prefix, we DID match: drop out of the loop - // if (oPrefix == CollationElementIterator.NULLORDER) { - // break; - // } - - // // if skipping over ignorables brought us to the end - // // of the target string, we didn't match and return 0 - // if (oStr == CollationElementIterator.NULLORDER) { - // return 0; - // } - - // // match collation elements from the two strings - // // (considering only primary differences). If we - // // get a mismatch, dump out and return 0 - // if (CollationElementIterator.primaryOrder(oStr) != CollationElementIterator. - // primaryOrder(oPrefix)) { - // return 0; - // } - // // otherwise, advance to the next character in each string - // // and loop (we drop out of the loop when we exhaust - // // collation elements in the prefix) - - // oStr = strIter.next(); - // oPrefix = prefixIter.next(); - // } - - // // we are not compatible with jdk 1.1 any longer - // int result = strIter.getOffset(); - // if (oStr != CollationElementIterator.NULLORDER) { - // --result; - // } - // return result; - - /* - //---------------------------------------------------------------- - // JDK 1.2-specific API call - // return strIter.getOffset(); - //---------------------------------------------------------------- - // JDK 1.1 HACK (take out for 1.2-specific code) - - // if we make it to here, we have a successful match. Now we - // have to find out HOW MANY characters from the target string - // matched the prefix (there isn't necessarily a one-to-one - // mapping between collation elements and characters). - // In JDK 1.2, there's a simple getOffset() call we can use. - // In JDK 1.1, on the other hand, we have to go through some - // ugly contortions. First, use the collator to compare the - // same number of characters from the prefix and target string. - // If they're equal, we're done. - collator.setStrength(Collator.PRIMARY); - if (str.length() >= prefix.length() - && collator.equals(str.substring(0, prefix.length()), prefix)) { - return prefix.length(); - } - - // if they're not equal, then we have to compare successively - // larger and larger substrings of the target string until we - // get to one that matches the prefix. At that point, we know - // how many characters matched the prefix, and we can return. - int p = 1; - while (p <= str.length()) { - if (collator.equals(str.substring(0, p), prefix)) { - return p; - } else { - ++p; - } - } - - // SHOULKD NEVER GET HERE!!! - return 0; - //---------------------------------------------------------------- - */ - - // If lenient parsing is turned off, forget all that crap above. - // Just use String.startsWith() and be done with it. - // } else { - if (str.startsWith(prefix)) { - return prefix.length(); - } else { - return 0; - } - // } + // If lenient parsing is turned off, forget all that crap above. + // Just use String.startsWith() and be done with it. + if (str.startsWith(prefix)) { + return prefix.length(); + } + return 0; } - /* - * Searches a string for another string. If lenient parsing is off, - * this just calls indexOf(). If lenient parsing is on, this function - * uses CollationElementIterator to match characters, and only - * primary-order differences are significant in determining whether - * there's a match. - * @param str The string to search - * @param key The string to search "str" for - * @return A two-element array of ints. Element 0 is the position - * of the match, or -1 if there was no match. Element 1 is the - * number of characters in "str" that matched (which isn't necessarily - * the same as the length of "key") - */ -/* private int[] findText(String str, String key) { - return findText(str, key, 0); - }*/ - /** * Searches a string for another string. If lenient parsing is off, * this just calls indexOf(). If lenient parsing is on, this function @@ -1206,100 +1129,27 @@ final class NFRule { * number of characters in "str" that matched (which isn't necessarily * the same as the length of "key") */ - private int[] findText(String str, String key, int startingAt) { - // if lenient parsing is turned off, this is easy: just call - // String.indexOf() and we're done + private int[] findText(String str, String key, PluralFormat pluralFormatKey, int startingAt) { RbnfLenientScanner scanner = formatter.getLenientScanner(); -// if (!formatter.lenientParseEnabled()) { - if (scanner == null) { - return new int[] { str.indexOf(key, startingAt), key.length() }; - - // but if lenient parsing is turned ON, we've got some work - // ahead of us - } else { - return scanner.findText(str, key, startingAt); - - // //---------------------------------------------------------------- - // // JDK 1.1 HACK (take out of 1.2-specific code) - - // // in JDK 1.2, CollationElementIterator provides us with an - // // API to map between character offsets and collation elements - // // and we can do this by marching through the string comparing - // // collation elements. We can't do that in JDK 1.1. Insted, - // // we have to go through this horrible slow mess: - // int p = startingAt; - // int keyLen = 0; - - // // basically just isolate smaller and smaller substrings of - // // the target string (each running to the end of the string, - // // and with the first one running from startingAt to the end) - // // and then use prefixLength() to see if the search key is at - // // the beginning of each substring. This is excruciatingly - // // slow, but it will locate the key and tell use how long the - // // matching text was. - // while (p < str.length() && keyLen == 0) { - // keyLen = prefixLength(str.substring(p), key); - // if (keyLen != 0) { - // return new int[] { p, keyLen }; - // } - // ++p; - // } - // // if we make it to here, we didn't find it. Return -1 for the - // // location. The length should be ignored, but set it to 0, - // // which should be "safe" - // return new int[] { -1, 0 }; - - //---------------------------------------------------------------- - // JDK 1.2 version of this routine - //RuleBasedCollator collator = (RuleBasedCollator)formatter.getCollator(); - // - //CollationElementIterator strIter = collator.getCollationElementIterator(str); - //CollationElementIterator keyIter = collator.getCollationElementIterator(key); - // - //int keyStart = -1; - // - //str.setOffset(startingAt); - // - //int oStr = strIter.next(); - //int oKey = keyIter.next(); - //while (oKey != CollationElementIterator.NULLORDER) { - // while (oStr != CollationElementIterator.NULLORDER && - // CollationElementIterator.primaryOrder(oStr) == 0) - // oStr = strIter.next(); - // - // while (oKey != CollationElementIterator.NULLORDER && - // CollationElementIterator.primaryOrder(oKey) == 0) - // oKey = keyIter.next(); - // - // if (oStr == CollationElementIterator.NULLORDER) { - // return new int[] { -1, 0 }; - // } - // - // if (oKey == CollationElementIterator.NULLORDER) { - // break; - // } - // - // if (CollationElementIterator.primaryOrder(oStr) == - // CollationElementIterator.primaryOrder(oKey)) { - // keyStart = strIter.getOffset(); - // oStr = strIter.next(); - // oKey = keyIter.next(); - // } else { - // if (keyStart != -1) { - // keyStart = -1; - // keyIter.reset(); - // } else { - // oStr = strIter.next(); - // } - // } - //} - // - //if (oKey == CollationElementIterator.NULLORDER) { - // return new int[] { keyStart, strIter.getOffset() - keyStart }; - //} else { - // return new int[] { -1, 0 }; - //} + if (pluralFormatKey != null) { + FieldPosition position = new FieldPosition(NumberFormat.INTEGER_FIELD); + position.setBeginIndex(startingAt); + pluralFormatKey.parseType(str, scanner, position); + int start = position.getBeginIndex(); + if (start >= 0) { + return new int[]{start, position.getEndIndex() - start}; + } + return new int[]{-1, 0}; } + + if (scanner != null) { + // if lenient parsing is turned ON, we've got some work + // ahead of us + return scanner.findText(str, key, startingAt); + } + // if lenient parsing is turned off, this is easy. Just call + // String.indexOf() and we're done + return new int[]{str.indexOf(key, startingAt), key.length()}; } /** @@ -1316,31 +1166,6 @@ final class NFRule { return true; } RbnfLenientScanner scanner = formatter.getLenientScanner(); - if (scanner != null) { - return scanner.allIgnorable(str); - } - return false; - - // if lenient parsing is turned on, walk through the string with - // a collation element iterator and make sure each collation - // element is 0 (ignorable) at the primary level - // if (formatter.lenientParseEnabled()) { - // {dlf} - //return false; - // RuleBasedCollator collator = (RuleBasedCollator)(formatter.getCollator()); - // CollationElementIterator iter = collator.getCollationElementIterator(str); - - // int o = iter.next(); - // while (o != CollationElementIterator.NULLORDER - // && CollationElementIterator.primaryOrder(o) == 0) { - // o = iter.next(); - // } - // return o == CollationElementIterator.NULLORDER; - - // if lenient parsing is turned off, there is no such thing as - // an ignorable character: return true only if the string is empty - // } else { - // return false; - // } + return scanner != null && scanner.allIgnorable(str); } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/NFSubstitution.java b/icu4j/main/classes/core/src/com/ibm/icu/text/NFSubstitution.java index 8af12ff23a2..49a0d2b60b0 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/NFSubstitution.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/NFSubstitution.java @@ -79,84 +79,72 @@ abstract class NFSubstitution { } switch (description.charAt(0)) { - // if the description begins with '<'... case '<': - // throw an exception if the rule is a negative number - // rule - ///CLOVER:OFF - // If you look at the call hierarchy of this method, the rule would - // never be directly modified by the user and therefore makes the - // following pointless unless the user changes the ruleset. if (rule.getBaseValue() == NFRule.NEGATIVE_NUMBER_RULE) { + // throw an exception if the rule is a negative number rule + ///CLOVER:OFF + // If you look at the call hierarchy of this method, the rule would + // never be directly modified by the user and therefore makes the + // following pointless unless the user changes the ruleset. throw new IllegalArgumentException("<< not allowed in negative-number rule"); + ///CLOVER:ON } - ///CLOVER:ON - - // if the rule is a fraction rule, return an - // IntegralPartSubstitution else if (rule.getBaseValue() == NFRule.IMPROPER_FRACTION_RULE || rule.getBaseValue() == NFRule.PROPER_FRACTION_RULE - || rule.getBaseValue() == NFRule.MASTER_RULE) { + || rule.getBaseValue() == NFRule.MASTER_RULE) + { + // if the rule is a fraction rule, return an IntegralPartSubstitution return new IntegralPartSubstitution(pos, ruleSet, formatter, description); } - - // if the rule set containing the rule is a fraction - // rule set, return a NumeratorSubstitution else if (ruleSet.isFractionSet()) { + // if the rule set containing the rule is a fraction + // rule set, return a NumeratorSubstitution return new NumeratorSubstitution(pos, rule.getBaseValue(), formatter.getDefaultRuleSet(), formatter, description); } - - // otherwise, return a MultiplierSubstitution else { + // otherwise, return a MultiplierSubstitution return new MultiplierSubstitution(pos, rule.getDivisor(), ruleSet, formatter, description); } - // if the description begins with '>'... case '>': - // if the rule is a negative-number rule, return - // an AbsoluteValueSubstitution if (rule.getBaseValue() == NFRule.NEGATIVE_NUMBER_RULE) { + // if the rule is a negative-number rule, return + // an AbsoluteValueSubstitution return new AbsoluteValueSubstitution(pos, ruleSet, formatter, description); } - - // if the rule is a fraction rule, return a - // FractionalPartSubstitution else if (rule.getBaseValue() == NFRule.IMPROPER_FRACTION_RULE || rule.getBaseValue() == NFRule.PROPER_FRACTION_RULE - || rule.getBaseValue() == NFRule.MASTER_RULE) { + || rule.getBaseValue() == NFRule.MASTER_RULE) + { + // if the rule is a fraction rule, return a + // FractionalPartSubstitution return new FractionalPartSubstitution(pos, ruleSet, formatter, description); } - - // if the rule set owning the rule is a fraction rule set, - // throw an exception - ///CLOVER:OFF - // If you look at the call hierarchy of this method, the rule would - // never be directly modified by the user and therefore makes the - // following pointless unless the user changes the ruleset. else if (ruleSet.isFractionSet()) { + // if the rule set owning the rule is a fraction rule set, + // throw an exception + ///CLOVER:OFF + // If you look at the call hierarchy of this method, the rule would + // never be directly modified by the user and therefore makes the + // following pointless unless the user changes the ruleset. throw new IllegalArgumentException(">> not allowed in fraction rule set"); + ///CLOVER:ON } - ///CLOVER:ON - - // otherwise, return a ModulusSubstitution else { + // otherwise, return a ModulusSubstitution return new ModulusSubstitution(pos, rule.getDivisor(), rulePredecessor, ruleSet, formatter, description); } - - // if the description begins with '=', always return a - // SameValueSubstitution case '=': return new SameValueSubstitution(pos, ruleSet, formatter, description); - + default: // and if it's anything else, throw an exception ///CLOVER:OFF // If you look at the call hierarchy of this method, the rule would // never be directly modified by the user and therefore makes the // following pointless unless the user changes the ruleset. - default: throw new IllegalArgumentException("Illegal substitution character"); ///CLOVER:ON } @@ -184,8 +172,7 @@ abstract class NFSubstitution { // If it doesn't that's a syntax error. Otherwise, // makeSubstitution() was the only thing that needed to know // about these characters, so strip them off - if (description.length() >= 2 && description.charAt(0) == description.charAt( - description.length() - 1)) { + if (description.length() >= 2 && description.charAt(0) == description.charAt(description.length() - 1)) { description = description.substring(1, description.length() - 1); } else if (description.length() != 0) { @@ -681,10 +668,10 @@ class MultiplierSubstitution extends NFSubstitution { // rule, we keep a copy of the divisor this.divisor = divisor; - if (divisor == 0) { // this will cause recursion - throw new IllegalStateException("Substitution with bad divisor (" + divisor + ") " + description.substring(0, pos) + - " | " + description.substring(pos)); - } + if (divisor == 0) { // this will cause recursion + throw new IllegalStateException("Substitution with divisor 0 " + description.substring(0, pos) + + " | " + description.substring(pos)); + } } /** @@ -695,9 +682,9 @@ class MultiplierSubstitution extends NFSubstitution { public void setDivisor(int radix, int exponent) { divisor = Math.pow(radix, exponent); - if (divisor == 0) { - throw new IllegalStateException("Substitution with divisor 0"); - } + if (divisor == 0) { + throw new IllegalStateException("Substitution with divisor 0"); + } } //----------------------------------------------------------------------- @@ -719,11 +706,6 @@ class MultiplierSubstitution extends NFSubstitution { } } - public int hashCode() { - assert false : "hashCode not designed"; - return 42; - } - //----------------------------------------------------------------------- // formatting //----------------------------------------------------------------------- @@ -899,11 +881,6 @@ class ModulusSubstitution extends NFSubstitution { } } - public int hashCode() { - assert false : "hashCode not designed"; - return 42; - } - //----------------------------------------------------------------------- // formatting //----------------------------------------------------------------------- @@ -1176,12 +1153,6 @@ class FractionalPartSubstitution extends NFSubstitution { */ private boolean useSpaces = true; - /* - * The largest number of digits after the decimal point that this - * object will show in "by digits" mode - */ - //private static final int MAXDECIMALDIGITS = 18; // 8 - //----------------------------------------------------------------------- // construction //----------------------------------------------------------------------- @@ -1196,9 +1167,6 @@ class FractionalPartSubstitution extends NFSubstitution { RuleBasedNumberFormat formatter, String description) { super(pos, ruleSet, formatter, description); -// boolean chevron = description.startsWith(">>") || ruleSet == this.ruleSet; -// if (chevron || ruleSet == this.ruleSet) { - if (description.equals(">>") || description.equals(">>>") || ruleSet == this.ruleSet) { byDigits = true; if (description.equals(">>>")) { @@ -1224,46 +1192,23 @@ class FractionalPartSubstitution extends NFSubstitution { * toInsertInto */ public void doSubstitution(double number, StringBuffer toInsertInto, int position) { - // if we're not in "byDigits" mode, just use the inherited - // doSubstitution() routine if (!byDigits) { + // if we're not in "byDigits" mode, just use the inherited + // doSubstitution() routine super.doSubstitution(number, toInsertInto, position); - - // if we're in "byDigits" mode, transform the value into an integer - // by moving the decimal point eight places to the right and - // pulling digits off the right one at a time, formatting each digit - // as an integer using this substitution's owning rule set - // (this is slower, but more accurate, than doing it from the - // other end) - } else { -// int numberToFormat = (int)Math.round(transformNumber(number) * Math.pow( -// 10, MAXDECIMALDIGITS)); -// long numberToFormat = (long)Math.round(transformNumber(number) * Math.pow(10, MAXDECIMALDIGITS)); + } + else { + // if we're in "byDigits" mode, transform the value into an integer + // by moving the decimal point eight places to the right and + // pulling digits off the right one at a time, formatting each digit + // as an integer using this substitution's owning rule set + // (this is slower, but more accurate, than doing it from the + // other end) // just print to string and then use that DigitList dl = new DigitList(); dl.set(number, 20, true); - // this flag keeps us from formatting trailing zeros. It starts - // out false because we're pulling from the right, and switches - // to true the first time we encounter a non-zero digit -// boolean doZeros = false; -// System.out.println("class: " + getClass().getName()); -// System.out.println("number: " + number + " transformed: " + transformNumber(number)); -// System.out.println("formatting " + numberToFormat); -// for (int i = 0; i < MAXDECIMALDIGITS; i++) { -// int digit = (int)(numberToFormat % 10); -// System.out.println(" #: '" + numberToFormat + "'" + " digit '" + digit + "'"); -// if (digit != 0 || doZeros) { -// if (doZeros && useSpaces) { -// toInsertInto.insert(pos + this.pos, ' '); -// } -// doZeros = true; -// ruleSet.format(digit, toInsertInto, pos + this.pos); -// } -// numberToFormat /= 10; -// } - boolean pad = false; while (dl.count > Math.max(0, dl.decimalAt)) { if (pad && useSpaces) { @@ -1340,27 +1285,6 @@ class FractionalPartSubstitution extends NFSubstitution { ParsePosition workPos = new ParsePosition(1); double result = 0; int digit; -// double p10 = 0.1; - -// while (workText.length() > 0 && workPos.getIndex() != 0) { -// workPos.setIndex(0); -// digit = ruleSet.parse(workText, workPos, 10).intValue(); -// if (lenientParse && workPos.getIndex() == 0) { -// digit = NumberFormat.getInstance().parse(workText, workPos).intValue(); -// } - -// if (workPos.getIndex() != 0) { -// result += digit * p10; -// p10 /= 10; -// parsePosition.setIndex(parsePosition.getIndex() + workPos.getIndex()); -// workText = workText.substring(workPos.getIndex()); -// while (workText.length() > 0 && workText.charAt(0) == ' ') { -// workText = workText.substring(1); -// parsePosition.setIndex(parsePosition.getIndex() + 1); -// } -// } -// } - DigitList dl = new DigitList(); while (workText.length() > 0 && workPos.getIndex() != 0) { @@ -1580,11 +1504,6 @@ class NumeratorSubstitution extends NFSubstitution { } } - public int hashCode() { - assert false : "hashCode not designed"; - return 42; - } - //----------------------------------------------------------------------- // formatting //----------------------------------------------------------------------- @@ -1789,11 +1708,6 @@ class NullSubstitution extends NFSubstitution { return super.equals(that); } - public int hashCode() { - assert false : "hashCode not designed"; - return 42; - } - /** * NullSubstitutions don't show up in the textual representation * of a RuleBasedNumberFormat diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/PluralFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/PluralFormat.java index 8d80a4d4c1e..3e34ed8f7c9 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/PluralFormat.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/PluralFormat.java @@ -186,7 +186,7 @@ public class PluralFormat extends UFormat { * @stable ICU 3.8 */ public PluralFormat() { - init(null, PluralType.CARDINAL, ULocale.getDefault(Category.FORMAT)); + init(null, PluralType.CARDINAL, ULocale.getDefault(Category.FORMAT), null); } /** @@ -197,7 +197,7 @@ public class PluralFormat extends UFormat { * @stable ICU 3.8 */ public PluralFormat(ULocale ulocale) { - init(null, PluralType.CARDINAL, ulocale); + init(null, PluralType.CARDINAL, ulocale, null); } /** @@ -221,7 +221,7 @@ public class PluralFormat extends UFormat { * @stable ICU 3.8 */ public PluralFormat(PluralRules rules) { - init(rules, PluralType.CARDINAL, ULocale.getDefault(Category.FORMAT)); + init(rules, PluralType.CARDINAL, ULocale.getDefault(Category.FORMAT), null); } /** @@ -234,7 +234,7 @@ public class PluralFormat extends UFormat { * @stable ICU 3.8 */ public PluralFormat(ULocale ulocale, PluralRules rules) { - init(rules, PluralType.CARDINAL, ulocale); + init(rules, PluralType.CARDINAL, ulocale, null); } /** @@ -260,7 +260,7 @@ public class PluralFormat extends UFormat { * @stable ICU 50 */ public PluralFormat(ULocale ulocale, PluralType type) { - init(null, type, ulocale); + init(null, type, ulocale, null); } /** @@ -286,7 +286,7 @@ public class PluralFormat extends UFormat { * @stable ICU 3.8 */ public PluralFormat(String pattern) { - init(null, PluralType.CARDINAL, ULocale.getDefault(Category.FORMAT)); + init(null, PluralType.CARDINAL, ULocale.getDefault(Category.FORMAT), null); applyPattern(pattern); } @@ -304,7 +304,7 @@ public class PluralFormat extends UFormat { * @stable ICU 3.8 */ public PluralFormat(ULocale ulocale, String pattern) { - init(null, PluralType.CARDINAL, ulocale); + init(null, PluralType.CARDINAL, ulocale, null); applyPattern(pattern); } @@ -320,7 +320,7 @@ public class PluralFormat extends UFormat { * @stable ICU 3.8 */ public PluralFormat(PluralRules rules, String pattern) { - init(rules, PluralType.CARDINAL, ULocale.getDefault(Category.FORMAT)); + init(rules, PluralType.CARDINAL, ULocale.getDefault(Category.FORMAT), null); applyPattern(pattern); } @@ -337,7 +337,7 @@ public class PluralFormat extends UFormat { * @stable ICU 3.8 */ public PluralFormat(ULocale ulocale, PluralRules rules, String pattern) { - init(rules, PluralType.CARDINAL, ulocale); + init(rules, PluralType.CARDINAL, ulocale, null); applyPattern(pattern); } @@ -353,7 +353,24 @@ public class PluralFormat extends UFormat { * @stable ICU 50 */ public PluralFormat(ULocale ulocale, PluralType type, String pattern) { - init(null, type, ulocale); + init(null, type, ulocale, null); + applyPattern(pattern); + } + + /** + * Creates a new PluralFormat for a plural type, a + * pattern and a locale. + * @param ulocale the PluralFormat will be configured with + * rules for this locale. This locale will also be used for standard + * number formatting. + * @param type The plural type (e.g., cardinal or ordinal). + * @param pattern the pattern for this PluralFormat. + * @param numberFormat The number formatter to use. + * @throws IllegalArgumentException if the pattern is invalid. + * @stable ICU 50 + */ + /*package*/ PluralFormat(ULocale ulocale, PluralType type, String pattern, NumberFormat numberFormat) { + init(null, type, ulocale, numberFormat); applyPattern(pattern); } @@ -370,12 +387,12 @@ public class PluralFormat extends UFormat { * numberFormat: a NumberFormat for the locale * ulocale. */ - private void init(PluralRules rules, PluralType type, ULocale locale) { + private void init(PluralRules rules, PluralType type, ULocale locale, NumberFormat numberFormat) { ulocale = locale; pluralRules = (rules == null) ? PluralRules.forLocale(ulocale, type) : rules; resetPattern(); - numberFormat = NumberFormat.getInstance(ulocale); + this.numberFormat = (numberFormat == null) ? NumberFormat.getInstance(ulocale) : numberFormat; } private void resetPattern() { @@ -588,7 +605,7 @@ public class PluralFormat extends UFormat { return toAppendTo; } - private final String format(Number numberObject, double number) { + private String format(Number numberObject, double number) { // If no pattern was applied, return the formatted number. if (msgPattern == null || msgPattern.countParts() == 0) { return numberFormat.format(numberObject); @@ -660,6 +677,7 @@ public class PluralFormat extends UFormat { * @stable ICU 3.8 */ public Number parse(String text, ParsePosition parsePosition) { + // You get number ranges from this. You can't get an exact number. throw new UnsupportedOperationException(); } @@ -677,6 +695,84 @@ public class PluralFormat extends UFormat { throw new UnsupportedOperationException(); } + /** + * This method returns the PluralRules type found from parsing. + * @param source the string to be parsed. + * @param pos defines the position where parsing is to begin, + * and upon return, the position where parsing left off. If the position + * is a negative index, then parsing failed. + * @return Returns the PluralRules type. For example, it could be "zero", "one", "two", "few", "many" or "other") + */ + /*package*/ String parseType(String source, RbnfLenientScanner scanner, FieldPosition pos) { + // If no pattern was applied, return null. + if (msgPattern == null || msgPattern.countParts() == 0) { + pos.setBeginIndex(-1); + pos.setEndIndex(-1); + return null; + } + int partIndex = 0; + int currMatchIndex; + int count=msgPattern.countParts(); + int startingAt = pos.getBeginIndex(); + if (startingAt < 0) { + startingAt = 0; + } + + // The keyword is null until we need to match against a non-explicit, not-"other" value. + // Then we get the keyword from the selector. + // (In other words, we never call the selector if we match against an explicit value, + // or if the only non-explicit keyword is "other".) + String keyword = null; + String matchedWord = null; + int matchedIndex = -1; + // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples + // until the end of the plural-only pattern. + do { + MessagePattern.Part partSelector=msgPattern.getPart(partIndex++); + if (partSelector.getType() != MessagePattern.Part.Type.ARG_SELECTOR) { + // Bad format + continue; + } + + MessagePattern.Part partStart=msgPattern.getPart(partIndex++); + if (partStart.getType() != MessagePattern.Part.Type.MSG_START) { + // Bad format + continue; + } + + MessagePattern.Part partLimit=msgPattern.getPart(partIndex++); + if (partLimit.getType() != MessagePattern.Part.Type.MSG_LIMIT) { + // Bad format + continue; + } + + String currArg = pattern.substring(partStart.getLimit(), partLimit.getIndex()); + if (scanner != null) { + // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us. + int[] scannerMatchResult = scanner.findText(source, currArg, startingAt); + currMatchIndex = scannerMatchResult[0]; + } + else { + currMatchIndex = source.indexOf(currArg); + } + if (currMatchIndex > matchedIndex && (matchedWord == null || currArg.length() > matchedWord.length())) { + matchedIndex = currMatchIndex; + matchedWord = currArg; + keyword = pattern.substring(partStart.getLimit(), partLimit.getIndex()); + } + } while(partIndexPluraFormat object. * Note: Calling this method resets this PluraFormat object, @@ -698,7 +794,7 @@ public class PluralFormat extends UFormat { if (ulocale == null) { ulocale = ULocale.getDefault(Category.FORMAT); } - init(null, PluralType.CARDINAL, ulocale); + init(null, PluralType.CARDINAL, ulocale, null); } /** diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java index 6ad0477ef66..426099f1f68 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedNumberFormat.java @@ -1309,17 +1309,15 @@ public class RuleBasedNumberFormat extends NumberFormat { // the same time, but you get what you get, and you shouldn't be using this from // multiple threads anyway. if (scannerProvider == null && lenientParse && !lookedForScanner) { - ///CLOVER:OFF try { lookedForScanner = true; - Class cls = Class.forName("com.ibm.icu.text.RbnfScannerProviderImpl"); + Class cls = Class.forName("com.ibm.icu.impl.text.RbnfScannerProviderImpl"); RbnfLenientScannerProvider provider = (RbnfLenientScannerProvider)cls.newInstance(); setLenientScannerProvider(provider); } catch (Exception e) { // any failure, we just ignore and return null } - ///CLOVER:ON } return scannerProvider; @@ -1474,7 +1472,7 @@ public class RuleBasedNumberFormat extends NumberFormat { DecimalFormat getDecimalFormat() { if (decimalFormat == null) { decimalFormat = (DecimalFormat)NumberFormat.getInstance(locale); - + if (decimalFormatSymbols != null) { decimalFormat.setDecimalFormatSymbols(decimalFormatSymbols); } @@ -1482,6 +1480,10 @@ public class RuleBasedNumberFormat extends NumberFormat { return decimalFormat; } + PluralFormat createPluralFormat(PluralRules.PluralType pluralType, String pattern) { + return new PluralFormat(locale, pluralType, pattern, getDecimalFormat()); + } + //----------------------------------------------------------------------- // construction implementation //----------------------------------------------------------------------- diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/format/RbnfLenientScannerTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/format/RbnfLenientScannerTest.java index 089ac70f4a1..187493fd285 100644 --- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/format/RbnfLenientScannerTest.java +++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/format/RbnfLenientScannerTest.java @@ -1,6 +1,6 @@ /* ******************************************************************************* - * Copyright (C) 2009-2013, International Business Machines Corporation and * + * Copyright (C) 2009-2014, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ @@ -13,7 +13,7 @@ import java.util.Random; import com.ibm.icu.dev.test.TestFmwk; import com.ibm.icu.text.RbnfLenientScannerProvider; -import com.ibm.icu.text.RbnfScannerProviderImpl; +import com.ibm.icu.impl.text.RbnfScannerProviderImpl; import com.ibm.icu.text.RuleBasedNumberFormat; import com.ibm.icu.util.ULocale; diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/RbnfTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/RbnfTest.java index 22ca2e8ae3e..89366c9b2f0 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/RbnfTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/RbnfTest.java @@ -68,77 +68,53 @@ public class RbnfTest extends TestFmwk { " 9: <0>];\n" - // use %%hr to format values greater than 3,600 seconds - // (the ">>>" below causes us to see the number of minutes - // when when there are zero minutes) - + " 3600/60: <%%hr<[, >>>];\n" - // this rule set takes care of the singular and plural forms - // of "minute" - + "%%min:\n" - + " 0 minutes; 1 minute; =0= minutes;\n" - // this rule set takes care of the singular and plural forms - // of "hour" - + "%%hr:\n" - + " 0 hours; 1 hour; =0= hours;\n" - - // main rule set for formatting in numerals - + "%in-numerals:\n" - // values below 60 seconds are shown with "sec." - + " =0= sec.;\n" - // higher values are shown with colons: %%min-sec is used for - // values below 3,600 seconds... - + " 60: =%%min-sec=;\n" - // ...and %%hr-min-sec is used for values of 3,600 seconds - // and above - + " 3600: =%%hr-min-sec=;\n" - // this rule causes values of less than 10 minutes to show without - // a leading zero - + "%%min-sec:\n" - + " 0: :=00=;\n" - + " 60/60: <0<>>;\n" - // this rule set is used for values of 3,600 or more. Minutes are always - // shown, and always shown with two digits - + "%%hr-min-sec:\n" - + " 0: :=00=;\n" - + " 60/60: <00<>>;\n" - + " 3600/60: <#,##0<:>>>;\n" - // the lenient-parse rules allow several different characters to be used - // as delimiters between hours, minutes, and seconds - + "%%lenient-parse:\n" - + " & : = . = ' ' = -;\n"; - public void TestCoverage() { + String durationInSecondsRules = + // main rule set for formatting with words + "%with-words:\n" + // take care of singular and plural forms of "second" + + " 0 seconds; 1 second; =0= seconds;\n" + // use %%min to format values greater than 60 seconds + + " 60/60: <%%min<[, >>];\n" + // use %%hr to format values greater than 3,600 seconds + // (the ">>>" below causes us to see the number of minutes + // when when there are zero minutes) + + " 3600/60: <%%hr<[, >>>];\n" + // this rule set takes care of the singular and plural forms + // of "minute" + + "%%min:\n" + + " 0 minutes; 1 minute; =0= minutes;\n" + // this rule set takes care of the singular and plural forms + // of "hour" + + "%%hr:\n" + + " 0 hours; 1 hour; =0= hours;\n" + + // main rule set for formatting in numerals + + "%in-numerals:\n" + // values below 60 seconds are shown with "sec." + + " =0= sec.;\n" + // higher values are shown with colons: %%min-sec is used for + // values below 3,600 seconds... + + " 60: =%%min-sec=;\n" + // ...and %%hr-min-sec is used for values of 3,600 seconds + // and above + + " 3600: =%%hr-min-sec=;\n" + // this rule causes values of less than 10 minutes to show without + // a leading zero + + "%%min-sec:\n" + + " 0: :=00=;\n" + + " 60/60: <0<>>;\n" + // this rule set is used for values of 3,600 or more. Minutes are always + // shown, and always shown with two digits + + "%%hr-min-sec:\n" + + " 0: :=00=;\n" + + " 60/60: <00<>>;\n" + + " 3600/60: <#,##0<:>>>;\n" + // the lenient-parse rules allow several different characters to be used + // as delimiters between hours, minutes, and seconds + + "%%lenient-parse:\n" + + " & : = . = ' ' = -;\n"; + // extra calls to boost coverage numbers RuleBasedNumberFormat fmt0 = new RuleBasedNumberFormat(RuleBasedNumberFormat.SPELLOUT); RuleBasedNumberFormat fmt1 = (RuleBasedNumberFormat)fmt0.clone(); @@ -574,39 +550,90 @@ public class RbnfTest extends TestFmwk { doTest(formatter, testData, true); } -// /** -// * Perform a simple spot check on the ordinal spellout rules -// */ -// public void TestOrdinalSpellout() { -// String rules = "%%digits-ordinal-indicator:" -// + "0=1: th;" -// + "1=1: st;" -// + "2=1: nd;" -// + "3=1: rd;" -// + "4=1: th;" -// + "20=1: >>;" -// + "100=1: >>;" -// + "%digits-ordinal:" -// + "-x: −>>;" -// + "0: =#,##0==%%digits-ordinal-indicator=;"; -// RuleBasedNumberFormat formatter = new RuleBasedNumberFormat(rules); -// String[][] testData = { -// { "1", "1st" }, -// { "2", "2nd" }, -// { "3", "3rd" }, -// { "4", "4th" }, -// { "11", "11th" }, -// { "12", "12th" }, -// { "13", "13th" }, -// { "14", "14th" }, -// { "21", "21st" }, -// { "22", "22nd" }, -// { "23", "23rd" }, -// { "24", "24th" }, -// }; -// -// doTest(formatter, testData, true); -// } + /** + * Perform a simple spot check on the ordinal spellout rules + */ + public void TestPluralRules() { + String enRules = "%digits-ordinal:" + + "-x: −>>;" + + "0: =#,##0=$(ordinal,one{st}two{nd}few{rd}other{th});"; + RuleBasedNumberFormat enFormatter = new RuleBasedNumberFormat(enRules, ULocale.ENGLISH); + String[][] enTestData = { + { "1", "1st" }, + { "2", "2nd" }, + { "3", "3rd" }, + { "4", "4th" }, + { "11", "11th" }, + { "12", "12th" }, + { "13", "13th" }, + { "14", "14th" }, + { "21", "21st" }, + { "22", "22nd" }, + { "23", "23rd" }, + { "24", "24th" }, + }; + + doTest(enFormatter, enTestData, true); + + // This is trying to model the feminine form, but don't worry about the details too much. + // We're trying to test the plural rules. + String ruRules = "%spellout-numbering:" + + "-x: минус >>;" + + "x.x: << запятая >>;" + + "0: ноль;" + + "1: один;" + + "2: два;" + + "3: три;" + + "4: четыре;" + + "5: пять;" + + "6: шесть;" + + "7: семь;" + + "8: восемь;" + + "9: девять;" + + "10: десять;" + + "11: одиннадцать;" + + "12: двенадцать;" + + "13: тринадцать;" + + "14: четырнадцать;" + + "15: пятнадцать;" + + "16: шестнадцать;" + + "17: семнадцать;" + + "18: восемнадцать;" + + "19: девятнадцать;" + + "20: двадцать[ >>];" + + "30: тридцать[ >>];" + + "40: сорок[ >>];" + + "50: пятьдесят[ >>];" + + "60: шестьдесят[ >>];" + + "70: семьдесят[ >>];" + + "80: восемьдесят[ >>];" + + "90: девяносто[ >>];" + + "100: сто[ >>];" + + "200: <<сти[ >>];" + + "300: <<ста[ >>];" + + "500: <<сот[ >>];" + + "1000: <<$(cardinal,one{ тысяча}few{ тысячи}other{ тысяч})[ >>];"; + RuleBasedNumberFormat ruFormatter = new RuleBasedNumberFormat(ruRules, new ULocale("ru")); + String[][] ruTestData = { + { "1", "один" }, + { "100", "сто" }, + { "125", "сто двадцать пять" }, + { "399", "триста девяносто девять" }, + { "1,000", "один тысяча" }, + { "2,000", "два тысячи" }, + { "5,000", "пять тысяч" }, + { "21,000", "двадцать один тысяча" }, + { "22,000", "двадцать два тысячи" }, + }; + + doTest(ruFormatter, ruTestData, true); + + // Make sure there are no divide by 0 errors. + String result = new RuleBasedNumberFormat(ruRules, new ULocale("ru")).format(21000); + if (!"двадцать один тысяча".equals(result)) { + errln("Got " + result + " for 21000"); + } + } public void TestFractionalRuleSet() { RuleBasedNumberFormat formatter = new RuleBasedNumberFormat(fracRules, @@ -714,38 +741,38 @@ public class RbnfTest extends TestFmwk { logln("big dec: " + buf.toString()); } - public void TestTrailingSemicolon() { - String thaiRules = - "%default:\n" + - " -x: \u0e25\u0e1a>>;\n" + - " x.x: <<\u0e08\u0e38\u0e14>>>;\n" + - " \u0e28\u0e39\u0e19\u0e22\u0e4c; \u0e2b\u0e19\u0e36\u0e48\u0e07; \u0e2a\u0e2d\u0e07; \u0e2a\u0e32\u0e21;\n" + - " \u0e2a\u0e35\u0e48; \u0e2b\u0e49\u0e32; \u0e2b\u0e01; \u0e40\u0e08\u0e47\u0e14; \u0e41\u0e1b\u0e14;\n" + - " \u0e40\u0e01\u0e49\u0e32; \u0e2a\u0e34\u0e1a; \u0e2a\u0e34\u0e1a\u0e40\u0e2d\u0e47\u0e14;\n" + - " \u0e2a\u0e34\u0e1a\u0e2a\u0e2d\u0e07; \u0e2a\u0e34\u0e1a\u0e2a\u0e32\u0e21;\n" + - " \u0e2a\u0e34\u0e1a\u0e2a\u0e35\u0e48; \u0e2a\u0e34\u0e1a\u0e2b\u0e49\u0e32;\n" + - " \u0e2a\u0e34\u0e1a\u0e2b\u0e01; \u0e2a\u0e34\u0e1a\u0e40\u0e08\u0e47\u0e14;\n" + - " \u0e2a\u0e34\u0e1a\u0e41\u0e1b\u0e14; \u0e2a\u0e34\u0e1a\u0e40\u0e01\u0e49\u0e32;\n" + - " 20: \u0e22\u0e35\u0e48\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n" + - " 30: \u0e2a\u0e32\u0e21\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n" + - " 40: \u0e2a\u0e35\u0e48\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n" + - " 50: \u0e2b\u0e49\u0e32\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n" + - " 60: \u0e2b\u0e01\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n" + - " 70: \u0e40\u0e08\u0e47\u0e14\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n" + - " 80: \u0e41\u0e1b\u0e14\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n" + - " 90: \u0e40\u0e01\u0e49\u0e32\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n" + - " 100: <<\u0e23\u0e49\u0e2d\u0e22[>>];\n" + - " 1000: <<\u0e1e\u0e31\u0e19[>>];\n" + - " 10000: <<\u0e2b\u0e21\u0e37\u0e48\u0e19[>>];\n" + - " 100000: <<\u0e41\u0e2a\u0e19[>>];\n" + - " 1,000,000: <<\u0e25\u0e49\u0e32\u0e19[>>];\n" + - " 1,000,000,000: <<\u0e1e\u0e31\u0e19\u0e25\u0e49\u0e32\u0e19[>>];\n" + - " 1,000,000,000,000: <<\u0e25\u0e49\u0e32\u0e19\u0e25\u0e49\u0e32\u0e19[>>];\n" + - " 1,000,000,000,000,000: =#,##0=;\n" + - "%%alt-ones:\n" + - " \u0e28\u0e39\u0e19\u0e22\u0e4c;\n" + - " \u0e40\u0e2d\u0e47\u0e14;\n" + - " =%default=;\n ; ;; "; + public void TestTrailingSemicolon() { + String thaiRules = + "%default:\n" + + " -x: \u0e25\u0e1a>>;\n" + + " x.x: <<\u0e08\u0e38\u0e14>>>;\n" + + " \u0e28\u0e39\u0e19\u0e22\u0e4c; \u0e2b\u0e19\u0e36\u0e48\u0e07; \u0e2a\u0e2d\u0e07; \u0e2a\u0e32\u0e21;\n" + + " \u0e2a\u0e35\u0e48; \u0e2b\u0e49\u0e32; \u0e2b\u0e01; \u0e40\u0e08\u0e47\u0e14; \u0e41\u0e1b\u0e14;\n" + + " \u0e40\u0e01\u0e49\u0e32; \u0e2a\u0e34\u0e1a; \u0e2a\u0e34\u0e1a\u0e40\u0e2d\u0e47\u0e14;\n" + + " \u0e2a\u0e34\u0e1a\u0e2a\u0e2d\u0e07; \u0e2a\u0e34\u0e1a\u0e2a\u0e32\u0e21;\n" + + " \u0e2a\u0e34\u0e1a\u0e2a\u0e35\u0e48; \u0e2a\u0e34\u0e1a\u0e2b\u0e49\u0e32;\n" + + " \u0e2a\u0e34\u0e1a\u0e2b\u0e01; \u0e2a\u0e34\u0e1a\u0e40\u0e08\u0e47\u0e14;\n" + + " \u0e2a\u0e34\u0e1a\u0e41\u0e1b\u0e14; \u0e2a\u0e34\u0e1a\u0e40\u0e01\u0e49\u0e32;\n" + + " 20: \u0e22\u0e35\u0e48\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n" + + " 30: \u0e2a\u0e32\u0e21\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n" + + " 40: \u0e2a\u0e35\u0e48\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n" + + " 50: \u0e2b\u0e49\u0e32\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n" + + " 60: \u0e2b\u0e01\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n" + + " 70: \u0e40\u0e08\u0e47\u0e14\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n" + + " 80: \u0e41\u0e1b\u0e14\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n" + + " 90: \u0e40\u0e01\u0e49\u0e32\u0e2a\u0e34\u0e1a[>%%alt-ones>];\n" + + " 100: <<\u0e23\u0e49\u0e2d\u0e22[>>];\n" + + " 1000: <<\u0e1e\u0e31\u0e19[>>];\n" + + " 10000: <<\u0e2b\u0e21\u0e37\u0e48\u0e19[>>];\n" + + " 100000: <<\u0e41\u0e2a\u0e19[>>];\n" + + " 1,000,000: <<\u0e25\u0e49\u0e32\u0e19[>>];\n" + + " 1,000,000,000: <<\u0e1e\u0e31\u0e19\u0e25\u0e49\u0e32\u0e19[>>];\n" + + " 1,000,000,000,000: <<\u0e25\u0e49\u0e32\u0e19\u0e25\u0e49\u0e32\u0e19[>>];\n" + + " 1,000,000,000,000,000: =#,##0=;\n" + + "%%alt-ones:\n" + + " \u0e28\u0e39\u0e19\u0e22\u0e4c;\n" + + " \u0e40\u0e2d\u0e47\u0e14;\n" + + " =%default=;\n ; ;; "; RuleBasedNumberFormat formatter = new RuleBasedNumberFormat(thaiRules, new Locale("th", "TH", "")); @@ -798,6 +825,83 @@ public class RbnfTest extends TestFmwk { } public void TestRuleSetDisplayName() { + /** + * Spellout rules for U.K. English. + * I borrow the rule sets for TestRuleSetDisplayName() + */ + final String ukEnglish = + "%simplified:\n" + + " -x: minus >>;\n" + + " x.x: << point >>;\n" + + " zero; one; two; three; four; five; six; seven; eight; nine;\n" + + " ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen;\n" + + " seventeen; eighteen; nineteen;\n" + + " 20: twenty[->>];\n" + + " 30: thirty[->>];\n" + + " 40: forty[->>];\n" + + " 50: fifty[->>];\n" + + " 60: sixty[->>];\n" + + " 70: seventy[->>];\n" + + " 80: eighty[->>];\n" + + " 90: ninety[->>];\n" + + " 100: << hundred[ >>];\n" + + " 1000: << thousand[ >>];\n" + + " 1,000,000: << million[ >>];\n" + + " 1,000,000,000,000: << billion[ >>];\n" + + " 1,000,000,000,000,000: =#,##0=;\n" + + "%alt-teens:\n" + + " =%simplified=;\n" + + " 1000>: <%%alt-hundreds<[ >>];\n" + + " 10,000: =%simplified=;\n" + + " 1,000,000: << million[ >%simplified>];\n" + + " 1,000,000,000,000: << billion[ >%simplified>];\n" + + " 1,000,000,000,000,000: =#,##0=;\n" + + "%%alt-hundreds:\n" + + " 0: SHOULD NEVER GET HERE!;\n" + + " 10: <%simplified< thousand;\n" + + " 11: =%simplified= hundred>%%empty>;\n" + + "%%empty:\n" + + " 0:;" + + "%ordinal:\n" + + " zeroth; first; second; third; fourth; fifth; sixth; seventh;\n" + + " eighth; ninth;\n" + + " tenth; eleventh; twelfth; thirteenth; fourteenth;\n" + + " fifteenth; sixteenth; seventeenth; eighteenth;\n" + + " nineteenth;\n" + + " twentieth; twenty->>;\n" + + " 30: thirtieth; thirty->>;\n" + + " 40: fortieth; forty->>;\n" + + " 50: fiftieth; fifty->>;\n" + + " 60: sixtieth; sixty->>;\n" + + " 70: seventieth; seventy->>;\n" + + " 80: eightieth; eighty->>;\n" + + " 90: ninetieth; ninety->>;\n" + + " 100: <%simplified< hundredth; <%simplified< hundred >>;\n" + + " 1000: <%simplified< thousandth; <%simplified< thousand >>;\n" + + " 1,000,000: <%simplified< millionth; <%simplified< million >>;\n" + + " 1,000,000,000,000: <%simplified< billionth;\n" + + " <%simplified< billion >>;\n" + + " 1,000,000,000,000,000: =#,##0=;" + + "%default:\n" + + " -x: minus >>;\n" + + " x.x: << point >>;\n" + + " =%simplified=;\n" + + " 100: << hundred[ >%%and>];\n" + + " 1000: << thousand[ >%%and>];\n" + + " 100,000>>: << thousand[>%%commas>];\n" + + " 1,000,000: << million[>%%commas>];\n" + + " 1,000,000,000,000: << billion[>%%commas>];\n" + + " 1,000,000,000,000,000: =#,##0=;\n" + + "%%and:\n" + + " and =%default=;\n" + + " 100: =%default=;\n" + + "%%commas:\n" + + " ' and =%default=;\n" + + " 100: , =%default=;\n" + + " 1000: , <%default< thousand, >%default>;\n" + + " 1,000,000: , =%default=;" + + "%%lenient-parse:\n" + + " & ' ' , ',' ;\n"; ULocale.setDefault(ULocale.US); String[][] localizations = new String[][] { /* public rule sets*/ @@ -860,7 +964,7 @@ public class RbnfTest extends TestFmwk { } public void TestAllLocales() { - StringBuffer errors = new StringBuffer(); + StringBuilder errors = new StringBuilder(); String[] names = { " (spellout) ", " (ordinal) " @@ -883,18 +987,22 @@ public class RbnfTest extends TestFmwk { if (c < numbers.length) { n = numbers[c]; } else { - n = ((int)(r.nextInt(10000) - 3000)) / 16d; + n = (r.nextInt(10000) - 3000) / 16d; } String s = fmt.format(n); - logln(loc.getName() + names[j] + "success format: " + n + " -> " + s); + if (isVerbose()) { + logln(loc.getName() + names[j] + "success format: " + n + " -> " + s); + } try { // RBNF parse is extremely slow when lenient option is enabled. // non-lenient parse fmt.setLenientParseMode(false); Number num = fmt.parse(s); - logln(loc.getName() + names[j] + "success parse: " + s + " -> " + num); + if (isVerbose()) { + logln(loc.getName() + names[j] + "success parse: " + s + " -> " + num); + } if (j != 0) { // TODO: Fix the ordinal rules. continue; @@ -923,7 +1031,9 @@ public class RbnfTest extends TestFmwk { for (int i = 0; i < testData.length; i++) { String number = testData[i][0]; String expectedWords = testData[i][1]; - logln("test[" + i + "] number: " + number + " target: " + expectedWords); + if (isVerbose()) { + logln("test[" + i + "] number: " + number + " target: " + expectedWords); + } Number num = decFmt.parse(number); String actualWords = formatter.format(num); @@ -950,99 +1060,21 @@ public class RbnfTest extends TestFmwk { } } - /** - * Spellout rules for U.K. English. - * I borrow the rule sets for TestRuleSetDisplayName() - */ - public static final String ukEnglish = - "%simplified:\n" - + " -x: minus >>;\n" - + " x.x: << point >>;\n" - + " zero; one; two; three; four; five; six; seven; eight; nine;\n" - + " ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen;\n" - + " seventeen; eighteen; nineteen;\n" - + " 20: twenty[->>];\n" - + " 30: thirty[->>];\n" - + " 40: forty[->>];\n" - + " 50: fifty[->>];\n" - + " 60: sixty[->>];\n" - + " 70: seventy[->>];\n" - + " 80: eighty[->>];\n" - + " 90: ninety[->>];\n" - + " 100: << hundred[ >>];\n" - + " 1000: << thousand[ >>];\n" - + " 1,000,000: << million[ >>];\n" - + " 1,000,000,000,000: << billion[ >>];\n" - + " 1,000,000,000,000,000: =#,##0=;\n" - + "%alt-teens:\n" - + " =%simplified=;\n" - + " 1000>: <%%alt-hundreds<[ >>];\n" - + " 10,000: =%simplified=;\n" - + " 1,000,000: << million[ >%simplified>];\n" - + " 1,000,000,000,000: << billion[ >%simplified>];\n" - + " 1,000,000,000,000,000: =#,##0=;\n" - + "%%alt-hundreds:\n" - + " 0: SHOULD NEVER GET HERE!;\n" - + " 10: <%simplified< thousand;\n" - + " 11: =%simplified= hundred>%%empty>;\n" - + "%%empty:\n" - + " 0:;" - + "%ordinal:\n" - + " zeroth; first; second; third; fourth; fifth; sixth; seventh;\n" - + " eighth; ninth;\n" - + " tenth; eleventh; twelfth; thirteenth; fourteenth;\n" - + " fifteenth; sixteenth; seventeenth; eighteenth;\n" - + " nineteenth;\n" - + " twentieth; twenty->>;\n" - + " 30: thirtieth; thirty->>;\n" - + " 40: fortieth; forty->>;\n" - + " 50: fiftieth; fifty->>;\n" - + " 60: sixtieth; sixty->>;\n" - + " 70: seventieth; seventy->>;\n" - + " 80: eightieth; eighty->>;\n" - + " 90: ninetieth; ninety->>;\n" - + " 100: <%simplified< hundredth; <%simplified< hundred >>;\n" - + " 1000: <%simplified< thousandth; <%simplified< thousand >>;\n" - + " 1,000,000: <%simplified< millionth; <%simplified< million >>;\n" - + " 1,000,000,000,000: <%simplified< billionth;\n" - + " <%simplified< billion >>;\n" - + " 1,000,000,000,000,000: =#,##0=;" - + "%default:\n" - + " -x: minus >>;\n" - + " x.x: << point >>;\n" - + " =%simplified=;\n" - + " 100: << hundred[ >%%and>];\n" - + " 1000: << thousand[ >%%and>];\n" - + " 100,000>>: << thousand[>%%commas>];\n" - + " 1,000,000: << million[>%%commas>];\n" - + " 1,000,000,000,000: << billion[>%%commas>];\n" - + " 1,000,000,000,000,000: =#,##0=;\n" - + "%%and:\n" - + " and =%default=;\n" - + " 100: =%default=;\n" - + "%%commas:\n" - + " ' and =%default=;\n" - + " 100: , =%default=;\n" - + " 1000: , <%default< thousand, >%default>;\n" - + " 1,000,000: , =%default=;" - + "%%lenient-parse:\n" - + " & ' ' , ',' ;\n"; - /* Tests the method * public boolean equals(Object that) */ public void TestEquals(){ // Tests when "if (!(that instanceof RuleBasedNumberFormat))" is true RuleBasedNumberFormat rbnf = new RuleBasedNumberFormat("dummy"); - if (rbnf.equals(new String("dummy")) != false || - rbnf.equals(new Character('a')) != false || - rbnf.equals(new Object()) != false || - rbnf.equals(-1) != false || - rbnf.equals(0) != false || - rbnf.equals(1) != false || - rbnf.equals(-1.0) != false || - rbnf.equals(0.0) != false || - rbnf.equals(1.0) != false) + if (rbnf.equals("dummy") || + rbnf.equals(new Character('a')) || + rbnf.equals(new Object()) || + rbnf.equals(-1) || + rbnf.equals(0) || + rbnf.equals(1) || + rbnf.equals(-1.0) || + rbnf.equals(0.0) || + rbnf.equals(1.0)) { errln("RuleBasedNumberFormat.equals(Object that) was suppose to " + "be false for an invalid object."); @@ -1056,29 +1088,29 @@ public class RbnfTest extends TestFmwk { RuleBasedNumberFormat rbnf3 = new RuleBasedNumberFormat("dummy", new Locale("sp")); RuleBasedNumberFormat rbnf4 = new RuleBasedNumberFormat("dummy", new Locale("fr")); - if(rbnf1.equals(rbnf2) != false || rbnf1.equals(rbnf3) != false || - rbnf1.equals(rbnf4) != false || rbnf2.equals(rbnf3) != false || - rbnf2.equals(rbnf4) != false || rbnf3.equals(rbnf4) != false){ + if(rbnf1.equals(rbnf2) || rbnf1.equals(rbnf3) || + rbnf1.equals(rbnf4) || rbnf2.equals(rbnf3) || + rbnf2.equals(rbnf4) || rbnf3.equals(rbnf4)){ errln("RuleBasedNumberFormat.equals(Object that) was suppose to " + "be false for an invalid object."); } - if(rbnf1.equals(rbnf1) == false){ + if(!rbnf1.equals(rbnf1)){ errln("RuleBasedNumberFormat.equals(Object that) was not suppose to " + "be false for an invalid object."); } - if(rbnf2.equals(rbnf2) == false){ + if(!rbnf2.equals(rbnf2)){ errln("RuleBasedNumberFormat.equals(Object that) was not suppose to " + "be false for an invalid object."); } - if(rbnf3.equals(rbnf3) == false){ + if(!rbnf3.equals(rbnf3)){ errln("RuleBasedNumberFormat.equals(Object that) was not suppose to " + "be false for an invalid object."); } - if(rbnf4.equals(rbnf4) == false){ + if(!rbnf4.equals(rbnf4)){ errln("RuleBasedNumberFormat.equals(Object that) was not suppose to " + "be false for an invalid object."); } @@ -1086,20 +1118,20 @@ public class RbnfTest extends TestFmwk { RuleBasedNumberFormat rbnf5 = new RuleBasedNumberFormat("dummy", new Locale("en")); RuleBasedNumberFormat rbnf6 = new RuleBasedNumberFormat("dummy", new Locale("en")); - if(rbnf5.equals(rbnf6) == false){ + if(!rbnf5.equals(rbnf6)){ errln("RuleBasedNumberFormat.equals(Object that) was not suppose to " + "be false for an invalid object."); } rbnf6.setLenientParseMode(true); - if(rbnf5.equals(rbnf6) != false){ + if(rbnf5.equals(rbnf6)){ errln("RuleBasedNumberFormat.equals(Object that) was suppose to " + "be false for an invalid object."); } // Tests when "if (!ruleSets[i].equals(that2.ruleSets[i]))" is true RuleBasedNumberFormat rbnf7 = new RuleBasedNumberFormat("not_dummy", new Locale("en")); - if(rbnf5.equals(rbnf7) != false){ + if(rbnf5.equals(rbnf7)){ errln("RuleBasedNumberFormat.equals(Object that) was suppose to " + "be false for an invalid object."); } @@ -1321,7 +1353,7 @@ public class RbnfTest extends TestFmwk { value = val; expectedResult = expRes; } - }; + } final TextContextItem[] items = { new TextContextItem( "sv", RuleBasedNumberFormat.SPELLOUT, DisplayContext.CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, 123.45, "ett\u00ADhundra\u00ADtjugo\u00ADtre komma fyra fem" ), new TextContextItem( "sv", RuleBasedNumberFormat.SPELLOUT, DisplayContext.CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, 123.45, "Ett\u00ADhundra\u00ADtjugo\u00ADtre komma fyra fem" ),