ICU-13569 RBBI state table optimizations, ICU4J, work in progress, plus branch refresh.

X-SVN-Rev: 40914
This commit is contained in:
Andy Heninger 2018-02-14 01:31:35 +00:00
parent acae049ee1
commit ff3ebb8c32
32 changed files with 626 additions and 227 deletions

View file

@ -91,8 +91,7 @@ public class LocaleDisplayNamesImpl extends LocaleDisplayNames {
CaseMap.toTitle().wholeString().noLowercase();
private static String toTitleWholeStringNoLowercase(ULocale locale, String s) {
return TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply(
locale.toLocale(), null, s, new StringBuilder(), null).toString();
return TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply(locale.toLocale(), null, s);
}
public static LocaleDisplayNames getInstance(ULocale locale, DialectHandling dialectHandling) {

View file

@ -37,7 +37,7 @@ public class Grouper {
return GROUPER_AUTO;
case ON_ALIGNED:
return GROUPER_ON_ALIGNED;
case WESTERN:
case THOUSANDS:
return GROUPER_WESTERN;
default:
throw new AssertionError();
@ -63,9 +63,9 @@ public class Grouper {
return GROUPER_WESTERN;
} else if (grouping1 == 3 && grouping2 == 2 && minGrouping == 1) {
return GROUPER_INDIC;
} else if (grouping1 == 3 && grouping2 == 3 && minGrouping == 1) {
} else if (grouping1 == 3 && grouping2 == 3 && minGrouping == 2) {
return GROUPER_WESTERN_MIN2;
} else if (grouping1 == 3 && grouping2 == 2 && minGrouping == 1) {
} else if (grouping1 == 3 && grouping2 == 2 && minGrouping == 2) {
return GROUPER_INDIC_MIN2;
} else {
return new Grouper(grouping1, grouping2, minGrouping);

View file

@ -22,7 +22,7 @@ public class RoundingUtils {
* The maximum number of fraction places, integer numerals, or significant digits. TODO: This does
* not feel like the best home for this value.
*/
public static final int MAX_INT_FRAC_SIG = 100;
public static final int MAX_INT_FRAC_SIG = 999;
/**
* Converts a rounding mode and metadata about the quantity being rounded to a boolean determining

View file

@ -35,7 +35,6 @@ public class AffixPatternMatcher extends SeriesMatcher implements AffixUtils.Tok
return null;
}
affixPattern = ParsingUtils.maybeFold(affixPattern, parseFlags);
AffixPatternMatcher series = new AffixPatternMatcher(affixPattern);
series.factory = factory;
series.ignorables = (0 != (parseFlags & ParsingUtils.PARSE_FLAG_EXACT_AFFIX)) ? null

View file

@ -24,8 +24,8 @@ public class CodePointMatcher implements NumberParseMatcher {
@Override
public boolean match(StringSegment segment, ParsedNumber result) {
if (segment.getCodePoint() == cp) {
segment.adjustOffset(Character.charCount(cp));
if (segment.matches(cp)) {
segment.adjustOffsetByCodePoint();
result.setCharsConsumed(segment);
}
return false;

View file

@ -15,10 +15,10 @@ public class CurrencyMatcher implements NumberParseMatcher {
private final String currency1;
private final String currency2;
public static CurrencyMatcher getInstance(Currency currency, ULocale loc, int setupFlags) {
public static CurrencyMatcher getInstance(Currency currency, ULocale loc) {
return new CurrencyMatcher(currency.getSubtype(),
ParsingUtils.maybeFold(currency.getSymbol(loc), setupFlags),
ParsingUtils.maybeFold(currency.getCurrencyCode(), setupFlags));
currency.getSymbol(loc),
currency.getCurrencyCode());
}
private CurrencyMatcher(String isoCode, String currency1, String currency2) {

View file

@ -15,7 +15,6 @@ public class MatcherFactory {
DecimalFormatSymbols symbols;
IgnorablesMatcher ignorables;
ULocale locale;
int parseFlags;
public MinusSignMatcher minusSign(boolean allowTrailing) {
return MinusSignMatcher.getInstance(symbols, allowTrailing);
@ -35,7 +34,7 @@ public class MatcherFactory {
public AnyMatcher currency() {
AnyMatcher any = new AnyMatcher();
any.addMatcher(CurrencyMatcher.getInstance(currency, locale, parseFlags));
any.addMatcher(CurrencyMatcher.getInstance(currency, locale));
any.addMatcher(CurrencyTrieMatcher.getInstance(locale));
any.freeze();
return any;

View file

@ -2,7 +2,6 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.DecimalFormatSymbols;
import com.ibm.icu.text.UnicodeSet;
@ -13,14 +12,11 @@ import com.ibm.icu.text.UnicodeSet;
public class NanMatcher extends SymbolMatcher {
private static final NanMatcher DEFAULT = new NanMatcher("NaN");
private static final NanMatcher DEFAULT_FOLDED = new NanMatcher(UCharacter.foldCase("NaN", true));
public static NanMatcher getInstance(DecimalFormatSymbols symbols, int parseFlags) {
String symbolString = ParsingUtils.maybeFold(symbols.getNaN(), parseFlags);
String symbolString = symbols.getNaN();
if (DEFAULT.string.equals(symbolString)) {
return DEFAULT;
} else if (DEFAULT_FOLDED.string.equals(symbolString)) {
return DEFAULT_FOLDED;
} else {
return new NanMatcher(symbolString);
}

View file

@ -31,6 +31,30 @@ import com.ibm.icu.util.ULocale;
*/
public class NumberParserImpl {
@Deprecated
public static NumberParserImpl removeMeWhenMerged(ULocale locale, String pattern, int parseFlags) {
NumberParserImpl parser = new NumberParserImpl(parseFlags);
DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
IgnorablesMatcher ignorables = IgnorablesMatcher.DEFAULT;
MatcherFactory factory = new MatcherFactory();
factory.currency = Currency.getInstance("USD");
factory.symbols = symbols;
factory.ignorables = ignorables;
factory.locale = locale;
ParsedPatternInfo patternInfo = PatternStringParser.parseToPatternInfo(pattern);
AffixMatcher.newGenerate(patternInfo, parser, factory, ignorables, parseFlags);
Grouper grouper = Grouper.forStrategy(GroupingStrategy.AUTO).withLocaleData(locale, patternInfo);
parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
parser.freeze();
return parser;
}
// TODO: Find a better place for this enum.
/** Controls the set of rules for parsing a string. */
public static enum ParseMode {
@ -74,12 +98,13 @@ public class NumberParserImpl {
// Temporary frontend for testing.
int parseFlags = ParsingUtils.PARSE_FLAG_IGNORE_CASE
| ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
| ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES
| ParsingUtils.PARSE_FLAG_OPTIMIZE;
if (strictGrouping) {
parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_GROUPING_SIZE;
}
NumberParserImpl parser = new NumberParserImpl(parseFlags, true);
NumberParserImpl parser = new NumberParserImpl(parseFlags);
DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
IgnorablesMatcher ignorables = IgnorablesMatcher.DEFAULT;
@ -88,7 +113,6 @@ public class NumberParserImpl {
factory.symbols = symbols;
factory.ignorables = ignorables;
factory.locale = locale;
factory.parseFlags = parseFlags;
ParsedPatternInfo patternInfo = PatternStringParser.parseToPatternInfo(pattern);
AffixMatcher.newGenerate(patternInfo, parser, factory, ignorables, parseFlags);
@ -99,7 +123,7 @@ public class NumberParserImpl {
parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
parser.addMatcher(MinusSignMatcher.getInstance(symbols, false));
parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper, parseFlags));
parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
parser.addMatcher(new RequireNumberMatcher());
@ -193,16 +217,18 @@ public class NumberParserImpl {
if (parseCurrency || patternInfo.hasCurrencySign()) {
parseFlags |= ParsingUtils.PARSE_FLAG_MONETARY_SEPARATORS;
}
if (optimize) {
parseFlags |= ParsingUtils.PARSE_FLAG_OPTIMIZE;
}
IgnorablesMatcher ignorables = isStrict ? IgnorablesMatcher.STRICT : IgnorablesMatcher.DEFAULT;
NumberParserImpl parser = new NumberParserImpl(parseFlags, optimize);
NumberParserImpl parser = new NumberParserImpl(parseFlags);
MatcherFactory factory = new MatcherFactory();
factory.currency = currency;
factory.symbols = symbols;
factory.ignorables = ignorables;
factory.locale = locale;
factory.parseFlags = parseFlags;
//////////////////////
/// AFFIX MATCHERS ///
@ -216,7 +242,7 @@ public class NumberParserImpl {
////////////////////////
if (parseCurrency || patternInfo.hasCurrencySign()) {
parser.addMatcher(CurrencyMatcher.getInstance(currency, locale, parseFlags));
parser.addMatcher(CurrencyMatcher.getInstance(currency, locale));
parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
}
@ -239,7 +265,7 @@ public class NumberParserImpl {
parser.addMatcher(ignorables);
parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
if (!properties.getParseNoExponent()) {
parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper, parseFlags));
parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
}
//////////////////
@ -281,18 +307,12 @@ public class NumberParserImpl {
/**
* Creates a new, empty parser.
*
* @param ignoreCase
* If true, perform case-folding. This parameter needs to go into the constructor because
* its value is used during the construction of the matcher chain.
* @param optimize
* If true, compute "lead chars" UnicodeSets for the matchers. This reduces parsing
* runtime but increases construction runtime. If the parser is going to be used only once
* or twice, set this to false; if it is going to be used hundreds of times, set it to
* true.
* @param parseFlags
* The parser settings defined in the PARSE_FLAG_* fields.
*/
public NumberParserImpl(int parseFlags, boolean optimize) {
public NumberParserImpl(int parseFlags) {
matchers = new ArrayList<NumberParseMatcher>();
if (optimize) {
if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_OPTIMIZE)) {
leadCodePointses = new ArrayList<UnicodeSet>();
} else {
leadCodePointses = null;
@ -306,9 +326,7 @@ public class NumberParserImpl {
assert !frozen;
this.matchers.add(matcher);
if (leadCodePointses != null) {
UnicodeSet leadCodePoints = matcher.getLeadCodePoints();
assert leadCodePoints.isFrozen();
this.leadCodePointses.add(leadCodePoints);
addLeadCodePointsForMatcher(matcher);
}
}
@ -317,13 +335,22 @@ public class NumberParserImpl {
this.matchers.addAll(matchers);
if (leadCodePointses != null) {
for (NumberParseMatcher matcher : matchers) {
UnicodeSet leadCodePoints = matcher.getLeadCodePoints();
assert leadCodePoints.isFrozen();
this.leadCodePointses.add(leadCodePoints);
addLeadCodePointsForMatcher(matcher);
}
}
}
private void addLeadCodePointsForMatcher(NumberParseMatcher matcher) {
UnicodeSet leadCodePoints = matcher.getLeadCodePoints();
assert leadCodePoints.isFrozen();
// TODO: Avoid the clone operation here.
if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE)) {
leadCodePoints = leadCodePoints.cloneAsThawed().closeOver(UnicodeSet.ADD_CASE_MAPPINGS)
.freeze();
}
this.leadCodePointses.add(leadCodePoints);
}
public void setComparator(Comparator<ParsedNumber> comparator) {
assert !frozen;
this.comparator = comparator;
@ -353,7 +380,7 @@ public class NumberParserImpl {
public void parse(String input, int start, boolean greedy, ParsedNumber result) {
assert frozen;
assert start >= 0 && start < input.length();
StringSegment segment = new StringSegment(ParsingUtils.maybeFold(input, parseFlags));
StringSegment segment = new StringSegment(input, parseFlags);
segment.adjustOffset(start);
if (greedy) {
parseGreedyRecursive(segment, result);

View file

@ -2,7 +2,6 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSet.EntryRange;
@ -23,6 +22,7 @@ public class ParsingUtils {
public static final int PARSE_FLAG_EXACT_AFFIX = 0x0200;
public static final int PARSE_FLAG_PLUS_SIGN_ALLOWED = 0x0400;
public static final int PARSE_FLAG_FRACTION_GROUPING_DISABLED = 0x0800;
public static final int PARSE_FLAG_OPTIMIZE = 0x1000;
public static void putLeadCodePoints(UnicodeSet input, UnicodeSet output) {
for (EntryRange range : input.ranges()) {
@ -39,16 +39,4 @@ public class ParsingUtils {
}
}
/**
* Case-folds the string if IGNORE_CASE flag is set; otherwise, returns the same string.
*/
public static String maybeFold(String input, int parseFlags) {
UnicodeSet cwcf = UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.CWCF);
if (0 != (parseFlags & PARSE_FLAG_IGNORE_CASE) && cwcf.containsSome(input)) {
return UCharacter.foldCase(input, true);
} else {
return input;
}
}
}

View file

@ -15,16 +15,13 @@ public class ScientificMatcher implements NumberParseMatcher {
private final String exponentSeparatorString;
private final DecimalMatcher exponentMatcher;
public static ScientificMatcher getInstance(
DecimalFormatSymbols symbols,
Grouper grouper,
int parseFlags) {
public static ScientificMatcher getInstance(DecimalFormatSymbols symbols, Grouper grouper) {
// TODO: Static-initialize most common instances?
return new ScientificMatcher(symbols, grouper, parseFlags);
return new ScientificMatcher(symbols, grouper);
}
private ScientificMatcher(DecimalFormatSymbols symbols, Grouper grouper, int parseFlags) {
exponentSeparatorString = ParsingUtils.maybeFold(symbols.getExponentSeparator(), parseFlags);
private ScientificMatcher(DecimalFormatSymbols symbols, Grouper grouper) {
exponentSeparatorString = symbols.getExponentSeparator();
exponentMatcher = DecimalMatcher.getInstance(symbols,
grouper,
ParsingUtils.PARSE_FLAG_DECIMAL_SCIENTIFIC | ParsingUtils.PARSE_FLAG_INTEGER_ONLY);
@ -47,19 +44,14 @@ public class ScientificMatcher implements NumberParseMatcher {
if (segment.length() == 0) {
return true;
}
int leadCp = segment.getCodePoint();
if (leadCp == -1) {
// Partial code point match
return true;
}
// Allow a sign, and then try to match digits.
boolean minusSign = false;
if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.MINUS_SIGN).contains(leadCp)) {
if (segment.matches(UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.MINUS_SIGN))) {
minusSign = true;
segment.adjustOffset(Character.charCount(leadCp));
} else if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.PLUS_SIGN).contains(leadCp)) {
segment.adjustOffset(Character.charCount(leadCp));
segment.adjustOffsetByCodePoint();
} else if (segment.matches(UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.PLUS_SIGN))) {
segment.adjustOffsetByCodePoint();
}
int digitsOffset = segment.getOffset();

View file

@ -2,6 +2,9 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.UnicodeSet;
/**
* A mutable class allowing for a String with a variable offset and length. The charAt, length, and
* subSequence methods all operate relative to the fixed offset into the String.
@ -12,11 +15,13 @@ public class StringSegment implements CharSequence {
private final String str;
private int start;
private int end;
private boolean foldCase;
public StringSegment(String str) {
public StringSegment(String str, int parseFlags) {
this.str = str;
this.start = 0;
this.end = str.length();
this.foldCase = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE);
}
public int getOffset() {
@ -42,6 +47,13 @@ public class StringSegment implements CharSequence {
start += delta;
}
/**
* Adjusts the offset by the width of the current code point, either 1 or 2 chars.
*/
public void adjustOffsetByCodePoint() {
start += Character.charCount(getCodePoint());
}
public void setLength(int length) {
assert length >= 0;
assert start + length <= str.length();
@ -72,28 +84,73 @@ public class StringSegment implements CharSequence {
/**
* Returns the first code point in the string segment, or -1 if the string starts with an invalid
* code point.
*
* <p>
* <strong>Important:</strong> Most of the time, you should use {@link #matches}, which handles case
* folding logic, instead of this method.
*/
public int getCodePoint() {
assert start < end;
char lead = str.charAt(start);
if (Character.isHighSurrogate(lead) && start + 1 < end) {
return Character.toCodePoint(lead, str.charAt(start + 1));
} else if (Character.isSurrogate(lead)) {
return -1;
} else {
return lead;
char trail;
if (Character.isHighSurrogate(lead)
&& start + 1 < end
&& Character.isLowSurrogate(trail = str.charAt(start + 1))) {
return Character.toCodePoint(lead, trail);
}
return lead;
}
/**
* Returns true if the first code point of this StringSegment equals the given code point.
*
* <p>
* This method will perform case folding if case folding is enabled for the parser.
*/
public boolean matches(int otherCp) {
return codePointsEqual(getCodePoint(), otherCp, foldCase);
}
/**
* Returns true if the first code point of this StringSegment is in the given UnicodeSet.
*/
public boolean matches(UnicodeSet uniset) {
// TODO: Move UnicodeSet case-folding logic here.
// TODO: Handle string matches here instead of separately.
int cp = getCodePoint();
if (cp == -1) {
return false;
}
return uniset.contains(cp);
}
/**
* Returns the length of the prefix shared by this StringSegment and the given CharSequence. For
* example, if this string segment is "aab", and the char sequence is "aac", this method returns 2,
* since the first 2 characters are the same.
*
* <p>
* This method will perform case folding if case folding is enabled for the parser.
*/
public int getCommonPrefixLength(CharSequence other) {
return getPrefixLengthInternal(other, foldCase);
}
/**
* Like {@link #getCommonPrefixLength}, but never performs case folding, even if case folding is
* enabled for the parser.
*/
public int getCaseSensitivePrefixLength(CharSequence other) {
return getPrefixLengthInternal(other, false);
}
private int getPrefixLengthInternal(CharSequence other, boolean foldCase) {
int offset = 0;
for (; offset < Math.min(length(), other.length());) {
if (charAt(offset) != other.charAt(offset)) {
// TODO: case-fold code points, not chars
char c1 = charAt(offset);
char c2 = other.charAt(offset);
if (!codePointsEqual(c1, c2, foldCase)) {
break;
}
offset++;
@ -101,6 +158,30 @@ public class StringSegment implements CharSequence {
return offset;
}
// /**
// * Case-folds the string if IGNORE_CASE flag is set; otherwise, returns the same string.
// */
// public static String maybeFold(String input, int parseFlags) {
// UnicodeSet cwcf = UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.CWCF);
// if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE) && cwcf.containsSome(input)) {
// return UCharacter.foldCase(input, true);
// } else {
// return input;
// }
// }
private static final boolean codePointsEqual(int cp1, int cp2, boolean foldCase) {
if (cp1 == cp2) {
return true;
}
if (!foldCase) {
return false;
}
cp1 = UCharacter.foldCase(cp1, true);
cp2 = UCharacter.foldCase(cp2, true);
return cp1 == cp2;
}
@Override
public String toString() {
return str.substring(0, start) + "[" + str.substring(start, end) + "]" + str.substring(end);

View file

@ -47,9 +47,8 @@ public abstract class SymbolMatcher implements NumberParseMatcher {
}
}
int cp = segment.getCodePoint();
if (cp != -1 && uniSet.contains(cp)) {
segment.adjustOffset(Character.charCount(cp));
if (segment.matches(uniSet)) {
segment.adjustOffsetByCodePoint();
accept(segment, result);
return false;
}

View file

@ -51,7 +51,7 @@ public class UnicodeSetStaticCache {
DIGITS,
NAN_LEAD,
SCIENTIFIC_LEAD,
CWCF,
CWCF, // TODO: Check if this is being used and remove it if not.
// Combined Separators with Digits (for lead code points)
DIGITS_OR_ALL_SEPARATORS,

View file

@ -5123,37 +5123,6 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str);
}
/**
* Return a string with just the first word titlecased, for menus and UI, etc. This does not affect most of the string,
* and sometimes has no effect at all; the original string is returned whenever casing
* would not be appropriate for the first word (such as for CJK characters or initial numbers).
* Initial non-letters are skipped in order to find the character to change.
* Characters past the first affected are left untouched: see also TITLECASE_NO_LOWERCASE.
* <p>Examples:
* <table border='1'><tr><th>Source</th><th>Result</th><th>Locale</th></tr>
* <tr><td>anglo-American locale</td><td>Anglo-American locale</td></tr>
* <tr><td>contact us</td><td>Contact us</td></tr>
* <tr><td>49ers win!</td><td>49ers win!</td></tr>
* <tr><td>(abc)</td><td>(abc)</td></tr>
* <tr><td>«ijs»</td><td>«Ijs»</td></tr>
* <tr><td>«ijs»</td><td>«IJs»</td><td>nl-BE</td></tr>
* <tr><td>«ijs»</td><td>«İjs»</td><td>tr-DE</td></tr>
* </table>
* @param locale the locale for accessing exceptional behavior (eg for tr).
* @param str the source string to change
* @return the modified string, or the original if no modifications were necessary.
* @internal
* @deprecated ICU internal only
*/
@Deprecated
public static String toTitleFirst(ULocale locale, String str) {
// TODO: Remove this function. Inline it where it is called in CLDR.
return TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply(locale.toLocale(), null, str);
}
private static final com.ibm.icu.text.CaseMap.Title TO_TITLE_WHOLE_STRING_NO_LOWERCASE =
com.ibm.icu.text.CaseMap.toTitle().wholeString().noLowercase();
/**
* {@icu} <p>Returns the titlecase version of the argument string.
* <p>Position for titlecasing is determined by the argument break

View file

@ -15,8 +15,8 @@ import com.ibm.icu.text.PluralRules.IFixedDecimal;
import com.ibm.icu.util.ICUUncheckedIOException;
/**
* The result of a number formatting operation. This class allows the result to be exported in several data types,
* including a String, an AttributedCharacterIterator, and a BigDecimal.
* The result of a number formatting operation. This class allows the result to be exported in several
* data types, including a String, an AttributedCharacterIterator, and a BigDecimal.
*
* @draft ICU 60
* @provisional This API might change or be removed in a future release.
@ -47,12 +47,12 @@ public class FormattedNumber {
}
/**
* Append the formatted number to an Appendable, such as a StringBuilder. This may be slightly more efficient than
* creating a String.
* Append the formatted number to an Appendable, such as a StringBuilder. This may be slightly more
* efficient than creating a String.
*
* <p>
* If an IOException occurs when appending to the Appendable, an unchecked {@link ICUUncheckedIOException} is thrown
* instead.
* If an IOException occurs when appending to the Appendable, an unchecked
* {@link ICUUncheckedIOException} is thrown instead.
*
* @param appendable
* The Appendable to which to append the formatted number string.
@ -73,16 +73,18 @@ public class FormattedNumber {
}
/**
* Determine the start and end indices of the first occurrence of the given <em>field</em> in the output string.
* This allows you to determine the locations of the integer part, fraction part, and sign.
* Determine the start and end indices of the first occurrence of the given <em>field</em> in the
* output string. This allows you to determine the locations of the integer part, fraction part, and
* sign.
*
* <p>
* If multiple different field attributes are needed, this method can be called repeatedly, or if <em>all</em> field
* attributes are needed, consider using getFieldIterator().
* If multiple different field attributes are needed, this method can be called repeatedly, or if
* <em>all</em> field attributes are needed, consider using getFieldIterator().
*
* <p>
* If a field occurs multiple times in an output string, such as a grouping separator, this method will only ever
* return the first occurrence. Use getFieldIterator() to access all occurrences of an attribute.
* If a field occurs multiple times in an output string, such as a grouping separator, this method
* will only ever return the first occurrence. Use getFieldIterator() to access all occurrences of an
* attribute.
*
* @param fieldPosition
* The FieldPosition to populate with the start and end indices of the desired field.
@ -106,13 +108,15 @@ public class FormattedNumber {
}
/**
* Export the formatted number as an AttributedCharacterIterator. This allows you to determine which characters in
* the output string correspond to which <em>fields</em>, such as the integer part, fraction part, and sign.
* Export the formatted number as an AttributedCharacterIterator. This allows you to determine which
* characters in the output string correspond to which <em>fields</em>, such as the integer part,
* fraction part, and sign.
*
* <p>
* If information on only one field is needed, consider using populateFieldPosition() instead.
*
* @return An AttributedCharacterIterator, containing information on the field attributes of the number string.
* @return An AttributedCharacterIterator, containing information on the field attributes of the
* number string.
* @draft ICU 60
* @provisional This API might change or be removed in a future release.
* @see com.ibm.icu.text.NumberFormat.Field
@ -124,8 +128,9 @@ public class FormattedNumber {
}
/**
* Export the formatted number as a BigDecimal. This endpoint is useful for obtaining the exact number being printed
* after scaling and rounding have been applied by the number formatting pipeline.
* Export the formatted number as a BigDecimal. This endpoint is useful for obtaining the exact
* number being printed after scaling and rounding have been applied by the number formatting
* pipeline.
*
* @return A BigDecimal representation of the formatted number.
* @draft ICU 60
@ -138,31 +143,29 @@ public class FormattedNumber {
/**
* @internal
* @deprecated This API is ICU internal only.
* @deprecated This API is ICU internal only. Use {@link #populateFieldPosition} or
* {@link #getFieldIterator} for similar functionality.
*/
@Deprecated
public String getPrefix() {
NumberStringBuilder temp = new NumberStringBuilder();
int length = micros.modOuter.apply(temp, 0, 0);
length += micros.modMiddle.apply(temp, 0, length);
/* length += */ micros.modInner.apply(temp, 0, length);
int prefixLength = micros.modOuter.getPrefixLength() + micros.modMiddle.getPrefixLength()
+ micros.modInner.getPrefixLength();
// #13453: DecimalFormat wants the affixes from the pattern only (modMiddle).
micros.modMiddle.apply(temp, 0, 0);
int prefixLength = micros.modMiddle.getPrefixLength();
return temp.subSequence(0, prefixLength).toString();
}
/**
* @internal
* @deprecated This API is ICU internal only.
* @deprecated This API is ICU internal only. Use {@link #populateFieldPosition} or
* {@link #getFieldIterator} for similar functionality.
*/
@Deprecated
public String getSuffix() {
NumberStringBuilder temp = new NumberStringBuilder();
int length = micros.modOuter.apply(temp, 0, 0);
length += micros.modMiddle.apply(temp, 0, length);
length += micros.modInner.apply(temp, 0, length);
int prefixLength = micros.modOuter.getPrefixLength() + micros.modMiddle.getPrefixLength()
+ micros.modInner.getPrefixLength();
// #13453: DecimalFormat wants the affixes from the pattern only (modMiddle).
int length = micros.modMiddle.apply(temp, 0, 0);
int prefixLength = micros.modMiddle.getPrefixLength();
return temp.subSequence(prefixLength, length).toString();
}
@ -185,7 +188,9 @@ public class FormattedNumber {
public int hashCode() {
// NumberStringBuilder and BigDecimal are mutable, so we can't call
// #equals() or #hashCode() on them directly.
return Arrays.hashCode(nsb.toCharArray()) ^ Arrays.hashCode(nsb.toFieldArray()) ^ fq.toBigDecimal().hashCode();
return Arrays.hashCode(nsb.toCharArray())
^ Arrays.hashCode(nsb.toFieldArray())
^ fq.toBigDecimal().hashCode();
}
/**
@ -206,7 +211,7 @@ public class FormattedNumber {
// #equals() or #hashCode() on them directly.
FormattedNumber _other = (FormattedNumber) other;
return Arrays.equals(nsb.toCharArray(), _other.nsb.toCharArray())
^ Arrays.equals(nsb.toFieldArray(), _other.nsb.toFieldArray())
^ fq.toBigDecimal().equals(_other.fq.toBigDecimal());
&& Arrays.equals(nsb.toFieldArray(), _other.nsb.toFieldArray())
&& fq.toBigDecimal().equals(_other.fq.toBigDecimal());
}
}

View file

@ -171,7 +171,7 @@ public final class NumberFormatter {
* <li>MIN2: 1234 and 12,34,567
* <li>AUTO: 1,234 and 12,34,567
* <li>ON_ALIGNED: 1,234 and 12,34,567
* <li>WESTERN: 1,234 and 1,234,567
* <li>THOUSANDS: 1,234 and 1,234,567
* </ul>
*
* <p>
@ -259,7 +259,7 @@ public final class NumberFormatter {
* @provisional This API might change or be removed in a future release.
* @see NumberFormatter
*/
WESTERN
THOUSANDS
}
/**

View file

@ -1044,7 +1044,7 @@ public class MeasureFormat extends UFormat {
case TIME_UNIT_FORMAT:
return createTimeUnitFormat();
case CURRENCY_FORMAT:
return new CurrencyFormat(locale);
return MeasureFormat.getCurrencyFormat(locale);
default:
throw new InvalidObjectException("Unknown subclass: " + subClass);
}

View file

@ -18,17 +18,19 @@ import com.ibm.icu.impl.ICUBinary.Authenticate;
import com.ibm.icu.impl.Trie2;
/**
* <p>Internal class used for Rule Based Break Iterators</p>
* <p>Internal class used for Rule Based Break Iterators.</p>
* <p>This class provides access to the compiled break rule data, as
* it is stored in a .brk file.
* Not intended for public use; declared public for testing purposes only.
* @internal
*/
final class RBBIDataWrapper {
public final class RBBIDataWrapper {
//
// These fields are the ready-to-use compiled rule data, as
// read from the file.
//
RBBIDataHeader fHeader;
short fFTable[];
public RBBIDataHeader fHeader;
public short fFTable[];
short fRTable[];
short fSFTable[];
short fSRTable[];
@ -78,11 +80,16 @@ final class RBBIDataWrapper {
// Index offsets to the fields in a state table row.
// Corresponds to struct RBBIStateTableRow in the C version.
//
final static int ACCEPTING = 0;
final static int LOOKAHEAD = 1;
final static int TAGIDX = 2;
final static int RESERVED = 3;
final static int NEXTSTATES = 4;
/** @internal */
public final static int ACCEPTING = 0;
/** @internal */
public final static int LOOKAHEAD = 1;
/** @internal */
public final static int TAGIDX = 2;
/** @internal */
public final static int RESERVED = 3;
/** @internal */
public final static int NEXTSTATES = 4;
// Index offsets to header fields of a state table
// struct RBBIStateTable {... in the C version.
@ -101,13 +108,15 @@ final class RBBIDataWrapper {
/**
* Data Header. A struct-like class with the fields from the RBBI data file header.
* Not intended for public use, declared public for testing purposes only.
* @internal
*/
final static class RBBIDataHeader {
public final static class RBBIDataHeader {
int fMagic; // == 0xbla0
byte[] fFormatVersion; // For ICU 3.4 and later.
int fLength; // Total length in bytes of this RBBI Data,
// including all sections, not just the header.
int fCatCount; // Number of character categories.
public int fCatCount; // Number of character categories.
//
// Offsets and sizes of each of the subsections within the RBBI data.
@ -139,9 +148,9 @@ final class RBBIDataWrapper {
/**
* RBBI State Table Indexing Function. Given a state number, return the
* array index of the start of the state table row for that state.
*
* @internal
*/
int getRowIndex(int state){
public int getRowIndex(int state){
return ROW_DATA + state * (fHeader.fCatCount + 4);
}
@ -311,17 +320,17 @@ final class RBBIDataWrapper {
return This;
}
///CLOVER:OFF
// Getters for fields from the state table header
//
private int getStateTableNumStates(short table[]) {
/**
* Getters for fields from the state table header
* @internal
*/
public int getStateTableNumStates(short table[]) {
if (isBigEndian) {
return (table[NUMSTATES] << 16) | (table[NUMSTATES+1] & 0xffff);
} else {
return (table[NUMSTATES+1] << 16) | (table[NUMSTATES] & 0xffff);
}
}
///CLOVER:ON
int getStateTableFlags(short table[]) {
// This works for up to 15 flags bits.

View file

@ -342,10 +342,10 @@ class RBBIRuleBuilder {
//
// UnicodeSet processing.
// Munge the Unicode Sets to create a set of character categories.
// Generate the mapping tables (TRIE) from input 32-bit characters to
// Generate the mapping tables (TRIE) from input code points to
// the character categories.
//
builder.fSetBuilder.build();
builder.fSetBuilder.buildRanges();
//
// Generate the DFA state transition table.
@ -363,10 +363,34 @@ class RBBIRuleBuilder {
builder.fForwardTables.printRuleStatusTable();
}
builder.optimizeTables();
builder.fSetBuilder.buildTrie();
//
// Package up the compiled data, writing it to an output stream
// in the serialization format. This is the same as the ICU4C runtime format.
//
builder.flattenData(os);
}
static class ClassPair {
int left = 3;
int right = 0;
}
void optimizeTables() {
ClassPair duplPair = new ClassPair();
while (fForwardTables.findDuplCharClassFrom(duplPair)) {
fSetBuilder.mergeCategories(duplPair);
fForwardTables.removeColumn(duplPair.right);
fReverseTables.removeColumn(duplPair.right);
fSafeFwdTables.removeColumn(duplPair.right);
fSafeRevTables.removeColumn(duplPair.right);
}
fForwardTables.removeDuplicateStates();
fReverseTables.removeDuplicateStates();
fSafeFwdTables.removeDuplicateStates();
fSafeRevTables.removeDuplicateStates();
}

View file

@ -112,7 +112,7 @@ class RBBISetBuilder {
}
}
if (setName.equals("dictionary")) {
this.fNum |= 0x4000;
this.fNum |= DICT_BIT;
break;
}
}
@ -138,6 +138,8 @@ class RBBISetBuilder {
boolean fSawBOF;
static final int DICT_BIT = 0x4000;
//------------------------------------------------------------------------
//
@ -156,7 +158,7 @@ class RBBISetBuilder {
// from the Unicode Sets.
//
//------------------------------------------------------------------------
void build() {
void buildRanges() {
RangeDescriptor rlRange;
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("usets")>=0) {printSets();}
@ -280,6 +282,15 @@ class RBBISetBuilder {
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("rgroup")>=0) {printRangeGroups();}
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("esets")>=0) {printSets();}
}
/**
* Build the Trie table for mapping UChar32 values to the corresponding
* range group number.
*/
void buildTrie() {
RangeDescriptor rlRange;
fTrie = new Trie2Writable(0, // Initial value for all code points.
0); // Error value for out-of-range input.
@ -294,7 +305,20 @@ class RBBISetBuilder {
}
}
void mergeCategories(int left, int right) {
assert(left >= 1);
assert(right > left);
for (RangeDescriptor rd = fRangeList; rd != null; rd = rd.fNext) {
int rangeNum = rd.fNum & ~DICT_BIT;
int rangeDict = rd.fNum & DICT_BIT;
if (rangeNum == right) {
rd.fNum = left | rangeDict;
} else if (rangeNum > right) {
rd.fNum--;
}
}
--fGroupCount;
}
//-----------------------------------------------------------------------------------
//
// getTrieSize() Return the size that will be required to serialize the Trie.
@ -457,7 +481,7 @@ class RBBISetBuilder {
if (groupNum<10) {System.out.print(" ");}
System.out.print(groupNum + " ");
if ((rlRange.fNum & 0x4000) != 0) { System.out.print(" <DICT> ");}
if ((rlRange.fNum & DICT_BIT) != 0) { System.out.print(" <DICT> ");}
for (i=0; i<rlRange.fIncludesSets.size(); i++) {
RBBINode usetNode = rlRange.fIncludesSets.get(i);

View file

@ -655,7 +655,7 @@ class RBBITableBuilder {
// if sd.fAccepting already had a value other than 0 or -1, leave it be.
// If the end marker node is from a look-ahead rule, set
// the fLookAhead field or this state also.
// the fLookAhead field for this state also.
if (endMarker.fLookAheadEnd) {
// TODO: don't change value if already set?
// TODO: allow for more than one active look-ahead rule in engine.
@ -832,6 +832,129 @@ class RBBITableBuilder {
//
// findDuplCharClassFrom()
//
boolean findDuplCharClassFrom(RBBIRuleBuilder.ClassPair classPair) {
int numStates = fDStates.size();
int numCols = fRB.fSetBuilder.getNumCharCategories();
uint16_t table_base;
uint16_t table_dupl;
for (; baseCategory < numCols-1; ++baseCategory) {
for (duplCategory=baseCategory+1; duplCategory < numCols; ++duplCategory) {
for (int state=0; state<numStates; state++) {
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates.elementAt(state);
table_base = (uint16_t)sd.fDtran.elementAti(baseCategory);
table_dupl = (uint16_t)sd.fDtran.elementAti(duplCategory);
if (table_base != table_dupl) {
break;
}
}
if (table_base == table_dupl) {
return true;
}
}
}
return false;
}
//
// removeColumn()
//
void removeColumn(int column) {
int numStates = fDStates.size();
for (int state=0; state<numStates; state++) {
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates.elementAt(state);
U_ASSERT(column < sd.fDtran.size());
sd.fDtran.removeElementAt(column);
}
}
/*
* findDuplicateState
*/
bool findDuplicateState(int &firstState, int &duplState) {
int numStates = fDStates.size();
int numCols = fRB.fSetBuilder.getNumCharCategories();
for (; firstState<numStates-1; ++firstState) {
RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates.elementAt(firstState);
for (duplState=firstState+1; duplState<numStates; ++duplState) {
RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates.elementAt(duplState);
if (firstSD.fAccepting != duplSD.fAccepting ||
firstSD.fLookAhead != duplSD.fLookAhead ||
firstSD.fTagsIdx != duplSD.fTagsIdx) {
continue;
}
bool rowsMatch = true;
for (int col=0; col < numCols; ++col) {
int firstVal = firstSD.fDtran.elementAti(col);
int duplVal = duplSD.fDtran.elementAti(col);
if (!((firstVal == duplVal) ||
((firstVal == firstState || firstVal == duplState) &&
(duplVal == firstState || duplVal == duplState)))) {
rowsMatch = false;
break;
}
}
if (rowsMatch) {
return true;
}
}
}
return false;
}
void removeState(int keepState, int duplState) {
U_ASSERT(keepState < duplState);
U_ASSERT(duplState < fDStates.size());
RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates.elementAt(duplState);
fDStates.removeElementAt(duplState);
delete duplSD;
int numStates = fDStates.size();
int numCols = fRB.fSetBuilder.getNumCharCategories();
for (int state=0; state<numStates; ++state) {
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates.elementAt(state);
for (int col=0; col<numCols; col++) {
int existingVal = sd.fDtran.elementAti(col);
int newVal = existingVal;
if (existingVal == duplState) {
newVal = keepState;
} else if (existingVal > duplState) {
newVal = existingVal - 1;
}
sd.fDtran.setElementAt(newVal, col);
}
if (sd.fAccepting == duplState) {
sd.fAccepting = keepState;
} else if (sd.fAccepting > duplState) {
sd.fAccepting--;
}
if (sd.fLookAhead == duplState) {
sd.fLookAhead = keepState;
} else if (sd.fLookAhead > duplState) {
sd.fLookAhead--;
}
}
}
/*
* RemoveDuplicateStates
*/
void removeDuplicateStates() {
int firstState = 3;
int duplicateState = 0;
while (findDuplicateState(firstState, duplicateState)) {
// printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState);
removeState(firstState, duplicateState);
}
}
//-----------------------------------------------------------------------------
//

View file

@ -222,9 +222,11 @@ public class RuleBasedBreakIterator extends BreakIterator {
private CharacterIterator fText = new java.text.StringCharacterIterator("");
/**
* The rule data for this BreakIterator instance. Package private.
* The rule data for this BreakIterator instance.
* Not intended for public use. Declared public for testing purposes only.
* @internal
*/
RBBIDataWrapper fRData;
public RBBIDataWrapper fRData;
/**
* The iteration state - current position, rule status for the current position,

View file

@ -84,19 +84,12 @@ public class TimeUnitFormat extends MeasureFormat {
private static final long serialVersionUID = -3707773153184971529L;
// These fields are supposed to be the same as the fields in mf. They
// are here for serialization backward compatibility and to support parsing.
// Unlike MeasureFormat, this class is mutable and allows a new NumberFormat to be set after
// initialization. Keep a second copy of NumberFormat and use it instead of the one from the parent.
private NumberFormat format;
private ULocale locale;
private int style;
// We use this field in lieu of the super class because the super class
// is immutable while this class is mutable. The contents of the super class
// is an empty shell. Every public method of the super class is overridden to
// delegate to this field. Each time this object mutates, it replaces this field with
// a new immutable instance.
// private transient MeasureFormat mf;
private transient Map<TimeUnit, Map<String, Object[]>> timeUnitToCountToPatterns;
private transient PluralRules pluralRules;
private transient boolean isReady;

View file

@ -2521,5 +2521,25 @@ public class CalendarRegressionTest extends com.ibm.icu.dev.test.TestFmwk {
}
}
}
}
@Test
public void TestIslamicCalOverflow() {
String localeID = "ar@calendar=islamic-civil";
Calendar cal = Calendar.getInstance(new ULocale(localeID));
int maxMonth = cal.getMaximum(Calendar.MONTH);
int maxDayOfMonth = cal.getMaximum(Calendar.DATE);
int jd, year, month, dayOfMonth;
for (jd = 73530872; jd <= 73530876; jd++) { // year 202002, int32_t overflow if jd >= 73530874
cal.clear();
cal.set(Calendar.JULIAN_DAY, jd);
year = cal.get(Calendar.YEAR);
month = cal.get(Calendar.MONTH);
dayOfMonth = cal.get(Calendar.DATE);
if (month > maxMonth || dayOfMonth > maxDayOfMonth) {
errln("Error: localeID " + localeID + ", julianDay " + jd + "; got year " + year + "; maxMonth " + maxMonth +
", got month " + month + "; maxDayOfMonth " + maxDayOfMonth + ", got dayOfMonth " + dayOfMonth);
}
}
}
}
//eof

View file

@ -311,6 +311,36 @@ public class IntlTestDecimalFormatSymbols extends TestFmwk
errln("ERROR: Char digits should be Latin digits");
}
// Check on copy
DecimalFormatSymbols copy = (DecimalFormatSymbols) symbols.clone();
if (!Arrays.equals(copy.getDigitStrings(), osmanyaDigitStrings)) {
errln("ERROR: Osmanya digits (supplementary) should be set");
}
if (Character.codePointAt(osmanyaDigitStrings[0], 0) != copy.getCodePointZero()) {
errln("ERROR: Code point zero be Osmanya code point zero");
}
if (defZero != copy.getZeroDigit()) {
errln("ERROR: Zero digit should be 0");
}
if (!Arrays.equals(copy.getDigits(), defDigits)) {
errln("ERROR: Char digits should be Latin digits");
}
// Check on resource bundle
DecimalFormatSymbols fromData = DecimalFormatSymbols.getInstance(new ULocale("en@numbers=osma"));
if (!Arrays.equals(fromData.getDigitStrings(), osmanyaDigitStrings)) {
errln("ERROR: Osmanya digits (supplementary) should be set");
}
if (Character.codePointAt(osmanyaDigitStrings[0], 0) != fromData.getCodePointZero()) {
errln("ERROR: Code point zero be Osmanya code point zero");
}
if (defZero != fromData.getZeroDigit()) {
errln("ERROR: Zero digit should be 0");
}
if (!Arrays.equals(fromData.getDigits(), defDigits)) {
errln("ERROR: Char digits should be Latin digits");
}
symbols.setDigitStrings(differentDigitStrings);
if (!Arrays.equals(symbols.getDigitStrings(), differentDigitStrings)) {
errln("ERROR: Different digits should be set");

View file

@ -16,6 +16,7 @@ import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.lang.reflect.Field;
import java.text.FieldPosition;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@ -42,6 +43,7 @@ import com.ibm.icu.text.MeasureFormat;
import com.ibm.icu.text.MeasureFormat.FormatWidth;
import com.ibm.icu.text.NumberFormat;
import com.ibm.icu.util.Currency;
import com.ibm.icu.util.CurrencyAmount;
import com.ibm.icu.util.Measure;
import com.ibm.icu.util.MeasureUnit;
import com.ibm.icu.util.NoUnit;
@ -1925,6 +1927,15 @@ public class MeasureUnitTest extends TestFmwk {
assertEquals("getCurrencyFormat ULocale/Locale", mfu, mfj);
}
@Test
public void testCurrencyFormatParseIsoCode() throws ParseException {
MeasureFormat mf = MeasureFormat.getCurrencyFormat(ULocale.ENGLISH);
CurrencyAmount result = (CurrencyAmount) mf.parseObject("GTQ 34.56");
assertEquals("Parse should succeed", result.getNumber().doubleValue(), 34.56, 0.0);
assertEquals("Should parse ISO code GTQ even though the currency is USD",
"GTQ", result.getCurrency().getCurrencyCode());
}
@Test
public void testDoubleZero() {
ULocale en = new ULocale("en");

View file

@ -868,7 +868,7 @@ public class NumberFormatTest extends TestFmwk {
new ParseCurrencyItem( "en_GB", "euros4", "4,00\u00A0\u20AC", 6,400, "EUR" ),
new ParseCurrencyItem( "en_GB", "euros6", "6\u00A0\u20AC", 3, 6, "EUR" ),
new ParseCurrencyItem( "en_GB", "euros8", "\u20AC8", 2, 8, "EUR" ),
new ParseCurrencyItem( "en_GB", "dollars4", "US$4", 0, 0, "USD" ),
new ParseCurrencyItem( "en_GB", "dollars4", "US$4", 4, 4, "USD" ),
new ParseCurrencyItem( "fr_FR", "euros4", "4,00\u00A0\u20AC", 6, 4, "EUR" ),
new ParseCurrencyItem( "fr_FR", "euros6", "6\u00A0\u20AC", 3, 6, "EUR" ),
@ -2018,7 +2018,6 @@ public class NumberFormatTest extends TestFmwk {
};
@SuppressWarnings("resource") // InputStream is will be closed by the ResourceReader.
@Ignore("TODO: http://bugs.icu-project.org/trac/ticket/13571")
@Test
public void TestCases() {
String caseFileName = "NumberFormatTestCases.txt";
@ -5331,6 +5330,23 @@ public class NumberFormatTest extends TestFmwk {
assertEquals("Grouping should be off", false, df.isGroupingUsed());
}
@Test
public void Test13453_AffixContent() {
DecimalFormat df = (DecimalFormat) DecimalFormat.getScientificInstance();
assertEquals("Scientific should NOT be included", "", df.getPositiveSuffix());
df = CompactDecimalFormat.getInstance(ULocale.ENGLISH, CompactDecimalFormat.CompactStyle.SHORT);
assertEquals("Compact should NOT be included", "", df.getPositiveSuffix());
df = (DecimalFormat) DecimalFormat.getInstance(NumberFormat.ISOCURRENCYSTYLE);
df.setCurrency(Currency.getInstance("GBP"));
assertEquals("ISO currency SHOULD be included", "GBP", df.getPositivePrefix());
df = (DecimalFormat) DecimalFormat.getInstance(NumberFormat.PLURALCURRENCYSTYLE);
df.setCurrency(Currency.getInstance("GBP"));
assertEquals("Plural name SHOULD be included", " British pounds", df.getPositiveSuffix());
}
@Test
public void Test11035_FormatCurrencyAmount() {
double amount = 12345.67;

View file

@ -25,7 +25,6 @@ import com.ibm.icu.impl.number.Padder;
import com.ibm.icu.impl.number.Padder.PadPosition;
import com.ibm.icu.impl.number.PatternStringParser;
import com.ibm.icu.number.CompactNotation;
import com.ibm.icu.number.FormattedNumber;
import com.ibm.icu.number.FractionRounder;
import com.ibm.icu.number.IntegerWidth;
import com.ibm.icu.number.LocalizedNumberFormatter;
@ -1176,6 +1175,21 @@ public class NumberFormatterApiTest {
"8.765",
"0");
assertFormatDescendingBig(
"Indic locale with THOUSANDS grouping",
"",
NumberFormatter.with().grouping(GroupingStrategy.THOUSANDS),
new ULocale("en-IN"),
"87,650,000",
"8,765,000",
"876,500",
"87,650",
"8,765",
"876.5",
"87.65",
"8.765",
"0");
// NOTE: Hungarian is interesting because it has minimumGroupingDigits=4 in locale data
// If this test breaks due to data changes, find another locale that has minimumGroupingDigits.
assertFormatDescendingBig(
@ -1860,29 +1874,6 @@ public class NumberFormatterApiTest {
assertNotEquals(NumberFormatter.with().locale(ULocale.ENGLISH), NumberFormatter.with().locale(Locale.FRENCH));
}
@Test
public void getPrefixSuffix() {
Object[][] cases = {
{ NumberFormatter.withLocale(ULocale.ENGLISH).unit(GBP).unitWidth(UnitWidth.ISO_CODE), "GBP", "",
"-GBP", "" },
{ NumberFormatter.withLocale(ULocale.ENGLISH).unit(GBP).unitWidth(UnitWidth.FULL_NAME), "",
" British pounds", "-", " British pounds" } };
for (Object[] cas : cases) {
LocalizedNumberFormatter f = (LocalizedNumberFormatter) cas[0];
String posPrefix = (String) cas[1];
String posSuffix = (String) cas[2];
String negPrefix = (String) cas[3];
String negSuffix = (String) cas[4];
FormattedNumber positive = f.format(1);
FormattedNumber negative = f.format(-1);
assertEquals(posPrefix, positive.getPrefix());
assertEquals(posSuffix, positive.getSuffix());
assertEquals(negPrefix, negative.getPrefix());
assertEquals(negSuffix, negative.getSuffix());
}
}
@Test
public void plurals() {
// TODO: Expand this test.
@ -1921,12 +1912,12 @@ public class NumberFormatterApiTest {
Rounder.class.getDeclaredMethod("minMaxFraction", Integer.TYPE, Integer.TYPE),
Rounder.class.getDeclaredMethod("minMaxDigits", Integer.TYPE, Integer.TYPE), };
final int EXPECTED_MAX_INT_FRAC_SIG = 100;
final String expectedSubstring0 = "between 0 and 100 (inclusive)";
final String expectedSubstring1 = "between 1 and 100 (inclusive)";
final String expectedSubstringN1 = "between -1 and 100 (inclusive)";
final int EXPECTED_MAX_INT_FRAC_SIG = 999;
final String expectedSubstring0 = "between 0 and 999 (inclusive)";
final String expectedSubstring1 = "between 1 and 999 (inclusive)";
final String expectedSubstringN1 = "between -1 and 999 (inclusive)";
// We require that the upper bounds all be 100 inclusive.
// We require that the upper bounds all be 999 inclusive.
// The lower bound may be either -1, 0, or 1.
Set<String> methodsWithLowerBound1 = new HashSet();
methodsWithLowerBound1.add("fixedDigits");
@ -1936,6 +1927,12 @@ public class NumberFormatterApiTest {
methodsWithLowerBound1.add("withMinDigits");
methodsWithLowerBound1.add("withMaxDigits");
methodsWithLowerBound1.add("withMinExponentDigits");
// Methods with lower bound 0:
// fixedFraction
// minFraction
// maxFraction
// minMaxFraction
// zeroFillTo
Set<String> methodsWithLowerBoundN1 = new HashSet();
methodsWithLowerBoundN1.add("truncateAt");

View file

@ -13,6 +13,7 @@ import com.ibm.icu.impl.number.parse.IgnorablesMatcher;
import com.ibm.icu.impl.number.parse.MinusSignMatcher;
import com.ibm.icu.impl.number.parse.NumberParserImpl;
import com.ibm.icu.impl.number.parse.ParsedNumber;
import com.ibm.icu.impl.number.parse.ParsingUtils;
import com.ibm.icu.impl.number.parse.PercentMatcher;
import com.ibm.icu.impl.number.parse.PlusSignMatcher;
import com.ibm.icu.impl.number.parse.SeriesMatcher;
@ -191,7 +192,7 @@ public class NumberParserTest {
int expectedOffset = (Integer) cas[1];
boolean expectedMaybeMore = (Boolean) cas[2];
StringSegment segment = new StringSegment(input);
StringSegment segment = new StringSegment(input, 0);
ParsedNumber result = new ParsedNumber();
boolean actualMaybeMore = series.match(segment, result);
int actualOffset = segment.getOffset();
@ -215,4 +216,39 @@ public class NumberParserTest {
result.getNumber().doubleValue(),
0.0);
}
@Test
public void testCaseFolding() {
Object[][] cases = new Object[][] {
// pattern, input string, case sensitive chars, case insensitive chars
{ "0", "JP¥3456", 7, 7 },
{ "0", "jp¥3456", 0, 0 }, // not to be accepted, even in case insensitive mode
{ "A0", "A5", 2, 2 },
{ "A0", "a5", 0, 2 },
{ "0", "NaN", 3, 3 },
{ "0", "nan", 0, 3 } };
for (Object[] cas : cases) {
String patternString = (String) cas[0];
String inputString = (String) cas[1];
int expectedCaseSensitiveChars = (Integer) cas[2];
int expectedCaseFoldingChars = (Integer) cas[3];
NumberParserImpl caseSensitiveParser = NumberParserImpl
.removeMeWhenMerged(ULocale.ENGLISH, patternString, ParsingUtils.PARSE_FLAG_OPTIMIZE);
ParsedNumber result = new ParsedNumber();
caseSensitiveParser.parse(inputString, true, result);
assertEquals("Case-Sensitive: " + inputString + " on " + patternString,
expectedCaseSensitiveChars,
result.charEnd);
NumberParserImpl caseFoldingParser = NumberParserImpl.removeMeWhenMerged(ULocale.ENGLISH,
patternString,
ParsingUtils.PARSE_FLAG_IGNORE_CASE | ParsingUtils.PARSE_FLAG_OPTIMIZE);
result = new ParsedNumber();
caseFoldingParser.parse(inputString, true, result);
assertEquals("Folded: " + inputString + " on " + patternString,
expectedCaseFoldingChars,
result.charEnd);
}
}
}

View file

@ -17,7 +17,7 @@ public class StringSegmentTest {
@Test
public void testOffset() {
StringSegment segment = new StringSegment(SAMPLE_STRING);
StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
assertEquals(0, segment.getOffset());
segment.adjustOffset(3);
assertEquals(3, segment.getOffset());
@ -29,7 +29,7 @@ public class StringSegmentTest {
@Test
public void testLength() {
StringSegment segment = new StringSegment(SAMPLE_STRING);
StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
assertEquals(11, segment.length());
segment.adjustOffset(3);
assertEquals(8, segment.length());
@ -43,7 +43,7 @@ public class StringSegmentTest {
@Test
public void testCharAt() {
StringSegment segment = new StringSegment(SAMPLE_STRING);
StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
assertCharSequenceEquals(SAMPLE_STRING, segment);
segment.adjustOffset(3);
assertCharSequenceEquals("radio 📻", segment);
@ -53,20 +53,20 @@ public class StringSegmentTest {
@Test
public void testGetCodePoint() {
StringSegment segment = new StringSegment(SAMPLE_STRING);
StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
assertEquals(0x1F4FB, segment.getCodePoint());
segment.setLength(1);
assertEquals(-1, segment.getCodePoint());
assertEquals(0xD83D, segment.getCodePoint());
segment.resetLength();
segment.adjustOffset(1);
assertEquals(-1, segment.getCodePoint());
assertEquals(0xDCFB, segment.getCodePoint());
segment.adjustOffset(1);
assertEquals(0x20, segment.getCodePoint());
}
@Test
public void testCommonPrefixLength() {
StringSegment segment = new StringSegment(SAMPLE_STRING);
StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
assertEquals(11, segment.getCommonPrefixLength(SAMPLE_STRING));
assertEquals(4, segment.getCommonPrefixLength("📻 r"));
assertEquals(3, segment.getCommonPrefixLength("📻 x"));

View file

@ -19,6 +19,7 @@ package com.ibm.icu.dev.test.rbbi;
import java.text.CharacterIterator;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import org.junit.Test;
import org.junit.runner.RunWith;
@ -26,6 +27,7 @@ import org.junit.runners.JUnit4;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.RBBIDataWrapper;
import com.ibm.icu.text.RuleBasedBreakIterator;
import com.ibm.icu.util.ULocale;
@ -562,4 +564,62 @@ public class RBBITest extends TestFmwk {
String rtRules = bi.toString(); // getRules() in C++
assertEquals("Break Iterator rule stripping test", "!!forward; $x = [ab#]; '#' '?'; ", rtRules);
}
@Test
public void TestTableRedundancies() {
RuleBasedBreakIterator bi = (RuleBasedBreakIterator)BreakIterator.getLineInstance(Locale.ENGLISH);
String rules = bi.toString();
bi = new RuleBasedBreakIterator(rules);
// Build a break iterator from source rules.
// Want to check the rule builder in Java, not the pre-built rules that are imported from ICU4C.
RBBIDataWrapper dw = bi.fRData;
short[] fwtbl = dw.fFTable;
int numCharClasses = dw.fHeader.fCatCount;
// Check for duplicate columns (character categories)
List<String> columns = new ArrayList<String>();
for (int column=0; column<numCharClasses; column++) {
StringBuilder s = new StringBuilder();
for (int r = 1; r < dw.getStateTableNumStates(fwtbl); r++) {
int row = dw.getRowIndex(r);
short tableVal = fwtbl[row + RBBIDataWrapper.NEXTSTATES + column];
s.append((char)tableVal);
}
columns.add(s.toString());
}
// Ignore column (char class) 0 while checking; it's special, and may have duplicates.
for (int c1=1; c1<numCharClasses; c1++) {
for (int c2 = c1+1; c2 < numCharClasses; c2++) {
// assertFalse(String.format("Duplicate columns (%d, %d)", c1, c2), columns.get(c1).equals(columns.get(c2)));
if (columns.get(c1).equals(columns.get(c2))) {
System.out.printf("Duplicate columns (%d, %d)\n", c1, c2);
}
}
}
// Check for duplicate states.
List<String> rows = new ArrayList<String>();
for (int r=0; r<dw.getStateTableNumStates(fwtbl); r++) {
StringBuilder s = new StringBuilder();
int row = dw.getRowIndex(r);
assertTrue("Accepting < -1", fwtbl[row + RBBIDataWrapper.ACCEPTING] >= -1);
s.append(fwtbl[row + RBBIDataWrapper.ACCEPTING]);
s.append(fwtbl[row + RBBIDataWrapper.LOOKAHEAD]);
s.append(fwtbl[row + RBBIDataWrapper.TAGIDX]);
for (int column=0; column<numCharClasses; column++) {
short tableVal = fwtbl[row + RBBIDataWrapper.NEXTSTATES + column];
s.append((char)tableVal);
}
rows.add(s.toString());
}
for (int r1=0; r1 < dw.getStateTableNumStates(fwtbl); r1++) {
for (int r2= r1+1; r2 < dw.getStateTableNumStates(fwtbl); r2++) {
// assertFalse(String.format("Duplicate states (%d, %d)", r1, r2), rows.get(r1).equals(rows.get(r2)));
if (rows.get(r1).equals(rows.get(r2))) {
System.out.printf("Duplicate states (%d, %d)\n", r1, r2);
}
}
}
}
}