mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-13569 RBBI state table optimizations, ICU4J, work in progress, plus branch refresh.
X-SVN-Rev: 40914
This commit is contained in:
parent
acae049ee1
commit
ff3ebb8c32
32 changed files with 626 additions and 227 deletions
|
@ -91,8 +91,7 @@ public class LocaleDisplayNamesImpl extends LocaleDisplayNames {
|
|||
CaseMap.toTitle().wholeString().noLowercase();
|
||||
|
||||
private static String toTitleWholeStringNoLowercase(ULocale locale, String s) {
|
||||
return TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply(
|
||||
locale.toLocale(), null, s, new StringBuilder(), null).toString();
|
||||
return TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply(locale.toLocale(), null, s);
|
||||
}
|
||||
|
||||
public static LocaleDisplayNames getInstance(ULocale locale, DialectHandling dialectHandling) {
|
||||
|
|
|
@ -37,7 +37,7 @@ public class Grouper {
|
|||
return GROUPER_AUTO;
|
||||
case ON_ALIGNED:
|
||||
return GROUPER_ON_ALIGNED;
|
||||
case WESTERN:
|
||||
case THOUSANDS:
|
||||
return GROUPER_WESTERN;
|
||||
default:
|
||||
throw new AssertionError();
|
||||
|
@ -63,9 +63,9 @@ public class Grouper {
|
|||
return GROUPER_WESTERN;
|
||||
} else if (grouping1 == 3 && grouping2 == 2 && minGrouping == 1) {
|
||||
return GROUPER_INDIC;
|
||||
} else if (grouping1 == 3 && grouping2 == 3 && minGrouping == 1) {
|
||||
} else if (grouping1 == 3 && grouping2 == 3 && minGrouping == 2) {
|
||||
return GROUPER_WESTERN_MIN2;
|
||||
} else if (grouping1 == 3 && grouping2 == 2 && minGrouping == 1) {
|
||||
} else if (grouping1 == 3 && grouping2 == 2 && minGrouping == 2) {
|
||||
return GROUPER_INDIC_MIN2;
|
||||
} else {
|
||||
return new Grouper(grouping1, grouping2, minGrouping);
|
||||
|
|
|
@ -22,7 +22,7 @@ public class RoundingUtils {
|
|||
* The maximum number of fraction places, integer numerals, or significant digits. TODO: This does
|
||||
* not feel like the best home for this value.
|
||||
*/
|
||||
public static final int MAX_INT_FRAC_SIG = 100;
|
||||
public static final int MAX_INT_FRAC_SIG = 999;
|
||||
|
||||
/**
|
||||
* Converts a rounding mode and metadata about the quantity being rounded to a boolean determining
|
||||
|
|
|
@ -35,7 +35,6 @@ public class AffixPatternMatcher extends SeriesMatcher implements AffixUtils.Tok
|
|||
return null;
|
||||
}
|
||||
|
||||
affixPattern = ParsingUtils.maybeFold(affixPattern, parseFlags);
|
||||
AffixPatternMatcher series = new AffixPatternMatcher(affixPattern);
|
||||
series.factory = factory;
|
||||
series.ignorables = (0 != (parseFlags & ParsingUtils.PARSE_FLAG_EXACT_AFFIX)) ? null
|
||||
|
|
|
@ -24,8 +24,8 @@ public class CodePointMatcher implements NumberParseMatcher {
|
|||
|
||||
@Override
|
||||
public boolean match(StringSegment segment, ParsedNumber result) {
|
||||
if (segment.getCodePoint() == cp) {
|
||||
segment.adjustOffset(Character.charCount(cp));
|
||||
if (segment.matches(cp)) {
|
||||
segment.adjustOffsetByCodePoint();
|
||||
result.setCharsConsumed(segment);
|
||||
}
|
||||
return false;
|
||||
|
|
|
@ -15,10 +15,10 @@ public class CurrencyMatcher implements NumberParseMatcher {
|
|||
private final String currency1;
|
||||
private final String currency2;
|
||||
|
||||
public static CurrencyMatcher getInstance(Currency currency, ULocale loc, int setupFlags) {
|
||||
public static CurrencyMatcher getInstance(Currency currency, ULocale loc) {
|
||||
return new CurrencyMatcher(currency.getSubtype(),
|
||||
ParsingUtils.maybeFold(currency.getSymbol(loc), setupFlags),
|
||||
ParsingUtils.maybeFold(currency.getCurrencyCode(), setupFlags));
|
||||
currency.getSymbol(loc),
|
||||
currency.getCurrencyCode());
|
||||
}
|
||||
|
||||
private CurrencyMatcher(String isoCode, String currency1, String currency2) {
|
||||
|
|
|
@ -15,7 +15,6 @@ public class MatcherFactory {
|
|||
DecimalFormatSymbols symbols;
|
||||
IgnorablesMatcher ignorables;
|
||||
ULocale locale;
|
||||
int parseFlags;
|
||||
|
||||
public MinusSignMatcher minusSign(boolean allowTrailing) {
|
||||
return MinusSignMatcher.getInstance(symbols, allowTrailing);
|
||||
|
@ -35,7 +34,7 @@ public class MatcherFactory {
|
|||
|
||||
public AnyMatcher currency() {
|
||||
AnyMatcher any = new AnyMatcher();
|
||||
any.addMatcher(CurrencyMatcher.getInstance(currency, locale, parseFlags));
|
||||
any.addMatcher(CurrencyMatcher.getInstance(currency, locale));
|
||||
any.addMatcher(CurrencyTrieMatcher.getInstance(locale));
|
||||
any.freeze();
|
||||
return any;
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.number.parse;
|
||||
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.text.DecimalFormatSymbols;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
|
@ -13,14 +12,11 @@ import com.ibm.icu.text.UnicodeSet;
|
|||
public class NanMatcher extends SymbolMatcher {
|
||||
|
||||
private static final NanMatcher DEFAULT = new NanMatcher("NaN");
|
||||
private static final NanMatcher DEFAULT_FOLDED = new NanMatcher(UCharacter.foldCase("NaN", true));
|
||||
|
||||
public static NanMatcher getInstance(DecimalFormatSymbols symbols, int parseFlags) {
|
||||
String symbolString = ParsingUtils.maybeFold(symbols.getNaN(), parseFlags);
|
||||
String symbolString = symbols.getNaN();
|
||||
if (DEFAULT.string.equals(symbolString)) {
|
||||
return DEFAULT;
|
||||
} else if (DEFAULT_FOLDED.string.equals(symbolString)) {
|
||||
return DEFAULT_FOLDED;
|
||||
} else {
|
||||
return new NanMatcher(symbolString);
|
||||
}
|
||||
|
|
|
@ -31,6 +31,30 @@ import com.ibm.icu.util.ULocale;
|
|||
*/
|
||||
public class NumberParserImpl {
|
||||
|
||||
@Deprecated
|
||||
public static NumberParserImpl removeMeWhenMerged(ULocale locale, String pattern, int parseFlags) {
|
||||
NumberParserImpl parser = new NumberParserImpl(parseFlags);
|
||||
DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
|
||||
IgnorablesMatcher ignorables = IgnorablesMatcher.DEFAULT;
|
||||
|
||||
MatcherFactory factory = new MatcherFactory();
|
||||
factory.currency = Currency.getInstance("USD");
|
||||
factory.symbols = symbols;
|
||||
factory.ignorables = ignorables;
|
||||
factory.locale = locale;
|
||||
|
||||
ParsedPatternInfo patternInfo = PatternStringParser.parseToPatternInfo(pattern);
|
||||
AffixMatcher.newGenerate(patternInfo, parser, factory, ignorables, parseFlags);
|
||||
|
||||
Grouper grouper = Grouper.forStrategy(GroupingStrategy.AUTO).withLocaleData(locale, patternInfo);
|
||||
parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
|
||||
parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
|
||||
parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
|
||||
|
||||
parser.freeze();
|
||||
return parser;
|
||||
}
|
||||
|
||||
// TODO: Find a better place for this enum.
|
||||
/** Controls the set of rules for parsing a string. */
|
||||
public static enum ParseMode {
|
||||
|
@ -74,12 +98,13 @@ public class NumberParserImpl {
|
|||
// Temporary frontend for testing.
|
||||
|
||||
int parseFlags = ParsingUtils.PARSE_FLAG_IGNORE_CASE
|
||||
| ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
|
||||
| ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES
|
||||
| ParsingUtils.PARSE_FLAG_OPTIMIZE;
|
||||
if (strictGrouping) {
|
||||
parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_GROUPING_SIZE;
|
||||
}
|
||||
|
||||
NumberParserImpl parser = new NumberParserImpl(parseFlags, true);
|
||||
NumberParserImpl parser = new NumberParserImpl(parseFlags);
|
||||
DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
|
||||
IgnorablesMatcher ignorables = IgnorablesMatcher.DEFAULT;
|
||||
|
||||
|
@ -88,7 +113,6 @@ public class NumberParserImpl {
|
|||
factory.symbols = symbols;
|
||||
factory.ignorables = ignorables;
|
||||
factory.locale = locale;
|
||||
factory.parseFlags = parseFlags;
|
||||
|
||||
ParsedPatternInfo patternInfo = PatternStringParser.parseToPatternInfo(pattern);
|
||||
AffixMatcher.newGenerate(patternInfo, parser, factory, ignorables, parseFlags);
|
||||
|
@ -99,7 +123,7 @@ public class NumberParserImpl {
|
|||
parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
|
||||
parser.addMatcher(MinusSignMatcher.getInstance(symbols, false));
|
||||
parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
|
||||
parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper, parseFlags));
|
||||
parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
|
||||
parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
|
||||
parser.addMatcher(new RequireNumberMatcher());
|
||||
|
||||
|
@ -193,16 +217,18 @@ public class NumberParserImpl {
|
|||
if (parseCurrency || patternInfo.hasCurrencySign()) {
|
||||
parseFlags |= ParsingUtils.PARSE_FLAG_MONETARY_SEPARATORS;
|
||||
}
|
||||
if (optimize) {
|
||||
parseFlags |= ParsingUtils.PARSE_FLAG_OPTIMIZE;
|
||||
}
|
||||
IgnorablesMatcher ignorables = isStrict ? IgnorablesMatcher.STRICT : IgnorablesMatcher.DEFAULT;
|
||||
|
||||
NumberParserImpl parser = new NumberParserImpl(parseFlags, optimize);
|
||||
NumberParserImpl parser = new NumberParserImpl(parseFlags);
|
||||
|
||||
MatcherFactory factory = new MatcherFactory();
|
||||
factory.currency = currency;
|
||||
factory.symbols = symbols;
|
||||
factory.ignorables = ignorables;
|
||||
factory.locale = locale;
|
||||
factory.parseFlags = parseFlags;
|
||||
|
||||
//////////////////////
|
||||
/// AFFIX MATCHERS ///
|
||||
|
@ -216,7 +242,7 @@ public class NumberParserImpl {
|
|||
////////////////////////
|
||||
|
||||
if (parseCurrency || patternInfo.hasCurrencySign()) {
|
||||
parser.addMatcher(CurrencyMatcher.getInstance(currency, locale, parseFlags));
|
||||
parser.addMatcher(CurrencyMatcher.getInstance(currency, locale));
|
||||
parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
|
||||
}
|
||||
|
||||
|
@ -239,7 +265,7 @@ public class NumberParserImpl {
|
|||
parser.addMatcher(ignorables);
|
||||
parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
|
||||
if (!properties.getParseNoExponent()) {
|
||||
parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper, parseFlags));
|
||||
parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
|
||||
}
|
||||
|
||||
//////////////////
|
||||
|
@ -281,18 +307,12 @@ public class NumberParserImpl {
|
|||
/**
|
||||
* Creates a new, empty parser.
|
||||
*
|
||||
* @param ignoreCase
|
||||
* If true, perform case-folding. This parameter needs to go into the constructor because
|
||||
* its value is used during the construction of the matcher chain.
|
||||
* @param optimize
|
||||
* If true, compute "lead chars" UnicodeSets for the matchers. This reduces parsing
|
||||
* runtime but increases construction runtime. If the parser is going to be used only once
|
||||
* or twice, set this to false; if it is going to be used hundreds of times, set it to
|
||||
* true.
|
||||
* @param parseFlags
|
||||
* The parser settings defined in the PARSE_FLAG_* fields.
|
||||
*/
|
||||
public NumberParserImpl(int parseFlags, boolean optimize) {
|
||||
public NumberParserImpl(int parseFlags) {
|
||||
matchers = new ArrayList<NumberParseMatcher>();
|
||||
if (optimize) {
|
||||
if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_OPTIMIZE)) {
|
||||
leadCodePointses = new ArrayList<UnicodeSet>();
|
||||
} else {
|
||||
leadCodePointses = null;
|
||||
|
@ -306,9 +326,7 @@ public class NumberParserImpl {
|
|||
assert !frozen;
|
||||
this.matchers.add(matcher);
|
||||
if (leadCodePointses != null) {
|
||||
UnicodeSet leadCodePoints = matcher.getLeadCodePoints();
|
||||
assert leadCodePoints.isFrozen();
|
||||
this.leadCodePointses.add(leadCodePoints);
|
||||
addLeadCodePointsForMatcher(matcher);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -317,13 +335,22 @@ public class NumberParserImpl {
|
|||
this.matchers.addAll(matchers);
|
||||
if (leadCodePointses != null) {
|
||||
for (NumberParseMatcher matcher : matchers) {
|
||||
UnicodeSet leadCodePoints = matcher.getLeadCodePoints();
|
||||
assert leadCodePoints.isFrozen();
|
||||
this.leadCodePointses.add(leadCodePoints);
|
||||
addLeadCodePointsForMatcher(matcher);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void addLeadCodePointsForMatcher(NumberParseMatcher matcher) {
|
||||
UnicodeSet leadCodePoints = matcher.getLeadCodePoints();
|
||||
assert leadCodePoints.isFrozen();
|
||||
// TODO: Avoid the clone operation here.
|
||||
if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE)) {
|
||||
leadCodePoints = leadCodePoints.cloneAsThawed().closeOver(UnicodeSet.ADD_CASE_MAPPINGS)
|
||||
.freeze();
|
||||
}
|
||||
this.leadCodePointses.add(leadCodePoints);
|
||||
}
|
||||
|
||||
public void setComparator(Comparator<ParsedNumber> comparator) {
|
||||
assert !frozen;
|
||||
this.comparator = comparator;
|
||||
|
@ -353,7 +380,7 @@ public class NumberParserImpl {
|
|||
public void parse(String input, int start, boolean greedy, ParsedNumber result) {
|
||||
assert frozen;
|
||||
assert start >= 0 && start < input.length();
|
||||
StringSegment segment = new StringSegment(ParsingUtils.maybeFold(input, parseFlags));
|
||||
StringSegment segment = new StringSegment(input, parseFlags);
|
||||
segment.adjustOffset(start);
|
||||
if (greedy) {
|
||||
parseGreedyRecursive(segment, result);
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.number.parse;
|
||||
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.text.UnicodeSet.EntryRange;
|
||||
|
||||
|
@ -23,6 +22,7 @@ public class ParsingUtils {
|
|||
public static final int PARSE_FLAG_EXACT_AFFIX = 0x0200;
|
||||
public static final int PARSE_FLAG_PLUS_SIGN_ALLOWED = 0x0400;
|
||||
public static final int PARSE_FLAG_FRACTION_GROUPING_DISABLED = 0x0800;
|
||||
public static final int PARSE_FLAG_OPTIMIZE = 0x1000;
|
||||
|
||||
public static void putLeadCodePoints(UnicodeSet input, UnicodeSet output) {
|
||||
for (EntryRange range : input.ranges()) {
|
||||
|
@ -39,16 +39,4 @@ public class ParsingUtils {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Case-folds the string if IGNORE_CASE flag is set; otherwise, returns the same string.
|
||||
*/
|
||||
public static String maybeFold(String input, int parseFlags) {
|
||||
UnicodeSet cwcf = UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.CWCF);
|
||||
if (0 != (parseFlags & PARSE_FLAG_IGNORE_CASE) && cwcf.containsSome(input)) {
|
||||
return UCharacter.foldCase(input, true);
|
||||
} else {
|
||||
return input;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -15,16 +15,13 @@ public class ScientificMatcher implements NumberParseMatcher {
|
|||
private final String exponentSeparatorString;
|
||||
private final DecimalMatcher exponentMatcher;
|
||||
|
||||
public static ScientificMatcher getInstance(
|
||||
DecimalFormatSymbols symbols,
|
||||
Grouper grouper,
|
||||
int parseFlags) {
|
||||
public static ScientificMatcher getInstance(DecimalFormatSymbols symbols, Grouper grouper) {
|
||||
// TODO: Static-initialize most common instances?
|
||||
return new ScientificMatcher(symbols, grouper, parseFlags);
|
||||
return new ScientificMatcher(symbols, grouper);
|
||||
}
|
||||
|
||||
private ScientificMatcher(DecimalFormatSymbols symbols, Grouper grouper, int parseFlags) {
|
||||
exponentSeparatorString = ParsingUtils.maybeFold(symbols.getExponentSeparator(), parseFlags);
|
||||
private ScientificMatcher(DecimalFormatSymbols symbols, Grouper grouper) {
|
||||
exponentSeparatorString = symbols.getExponentSeparator();
|
||||
exponentMatcher = DecimalMatcher.getInstance(symbols,
|
||||
grouper,
|
||||
ParsingUtils.PARSE_FLAG_DECIMAL_SCIENTIFIC | ParsingUtils.PARSE_FLAG_INTEGER_ONLY);
|
||||
|
@ -47,19 +44,14 @@ public class ScientificMatcher implements NumberParseMatcher {
|
|||
if (segment.length() == 0) {
|
||||
return true;
|
||||
}
|
||||
int leadCp = segment.getCodePoint();
|
||||
if (leadCp == -1) {
|
||||
// Partial code point match
|
||||
return true;
|
||||
}
|
||||
|
||||
// Allow a sign, and then try to match digits.
|
||||
boolean minusSign = false;
|
||||
if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.MINUS_SIGN).contains(leadCp)) {
|
||||
if (segment.matches(UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.MINUS_SIGN))) {
|
||||
minusSign = true;
|
||||
segment.adjustOffset(Character.charCount(leadCp));
|
||||
} else if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.PLUS_SIGN).contains(leadCp)) {
|
||||
segment.adjustOffset(Character.charCount(leadCp));
|
||||
segment.adjustOffsetByCodePoint();
|
||||
} else if (segment.matches(UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.PLUS_SIGN))) {
|
||||
segment.adjustOffsetByCodePoint();
|
||||
}
|
||||
|
||||
int digitsOffset = segment.getOffset();
|
||||
|
|
|
@ -2,6 +2,9 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.number.parse;
|
||||
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
/**
|
||||
* A mutable class allowing for a String with a variable offset and length. The charAt, length, and
|
||||
* subSequence methods all operate relative to the fixed offset into the String.
|
||||
|
@ -12,11 +15,13 @@ public class StringSegment implements CharSequence {
|
|||
private final String str;
|
||||
private int start;
|
||||
private int end;
|
||||
private boolean foldCase;
|
||||
|
||||
public StringSegment(String str) {
|
||||
public StringSegment(String str, int parseFlags) {
|
||||
this.str = str;
|
||||
this.start = 0;
|
||||
this.end = str.length();
|
||||
this.foldCase = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE);
|
||||
}
|
||||
|
||||
public int getOffset() {
|
||||
|
@ -42,6 +47,13 @@ public class StringSegment implements CharSequence {
|
|||
start += delta;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adjusts the offset by the width of the current code point, either 1 or 2 chars.
|
||||
*/
|
||||
public void adjustOffsetByCodePoint() {
|
||||
start += Character.charCount(getCodePoint());
|
||||
}
|
||||
|
||||
public void setLength(int length) {
|
||||
assert length >= 0;
|
||||
assert start + length <= str.length();
|
||||
|
@ -72,28 +84,73 @@ public class StringSegment implements CharSequence {
|
|||
/**
|
||||
* Returns the first code point in the string segment, or -1 if the string starts with an invalid
|
||||
* code point.
|
||||
*
|
||||
* <p>
|
||||
* <strong>Important:</strong> Most of the time, you should use {@link #matches}, which handles case
|
||||
* folding logic, instead of this method.
|
||||
*/
|
||||
public int getCodePoint() {
|
||||
assert start < end;
|
||||
char lead = str.charAt(start);
|
||||
if (Character.isHighSurrogate(lead) && start + 1 < end) {
|
||||
return Character.toCodePoint(lead, str.charAt(start + 1));
|
||||
} else if (Character.isSurrogate(lead)) {
|
||||
return -1;
|
||||
} else {
|
||||
return lead;
|
||||
char trail;
|
||||
if (Character.isHighSurrogate(lead)
|
||||
&& start + 1 < end
|
||||
&& Character.isLowSurrogate(trail = str.charAt(start + 1))) {
|
||||
return Character.toCodePoint(lead, trail);
|
||||
}
|
||||
return lead;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the first code point of this StringSegment equals the given code point.
|
||||
*
|
||||
* <p>
|
||||
* This method will perform case folding if case folding is enabled for the parser.
|
||||
*/
|
||||
public boolean matches(int otherCp) {
|
||||
return codePointsEqual(getCodePoint(), otherCp, foldCase);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the first code point of this StringSegment is in the given UnicodeSet.
|
||||
*/
|
||||
public boolean matches(UnicodeSet uniset) {
|
||||
// TODO: Move UnicodeSet case-folding logic here.
|
||||
// TODO: Handle string matches here instead of separately.
|
||||
int cp = getCodePoint();
|
||||
if (cp == -1) {
|
||||
return false;
|
||||
}
|
||||
return uniset.contains(cp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the length of the prefix shared by this StringSegment and the given CharSequence. For
|
||||
* example, if this string segment is "aab", and the char sequence is "aac", this method returns 2,
|
||||
* since the first 2 characters are the same.
|
||||
*
|
||||
* <p>
|
||||
* This method will perform case folding if case folding is enabled for the parser.
|
||||
*/
|
||||
public int getCommonPrefixLength(CharSequence other) {
|
||||
return getPrefixLengthInternal(other, foldCase);
|
||||
}
|
||||
|
||||
/**
|
||||
* Like {@link #getCommonPrefixLength}, but never performs case folding, even if case folding is
|
||||
* enabled for the parser.
|
||||
*/
|
||||
public int getCaseSensitivePrefixLength(CharSequence other) {
|
||||
return getPrefixLengthInternal(other, false);
|
||||
}
|
||||
|
||||
private int getPrefixLengthInternal(CharSequence other, boolean foldCase) {
|
||||
int offset = 0;
|
||||
for (; offset < Math.min(length(), other.length());) {
|
||||
if (charAt(offset) != other.charAt(offset)) {
|
||||
// TODO: case-fold code points, not chars
|
||||
char c1 = charAt(offset);
|
||||
char c2 = other.charAt(offset);
|
||||
if (!codePointsEqual(c1, c2, foldCase)) {
|
||||
break;
|
||||
}
|
||||
offset++;
|
||||
|
@ -101,6 +158,30 @@ public class StringSegment implements CharSequence {
|
|||
return offset;
|
||||
}
|
||||
|
||||
// /**
|
||||
// * Case-folds the string if IGNORE_CASE flag is set; otherwise, returns the same string.
|
||||
// */
|
||||
// public static String maybeFold(String input, int parseFlags) {
|
||||
// UnicodeSet cwcf = UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.CWCF);
|
||||
// if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE) && cwcf.containsSome(input)) {
|
||||
// return UCharacter.foldCase(input, true);
|
||||
// } else {
|
||||
// return input;
|
||||
// }
|
||||
// }
|
||||
|
||||
private static final boolean codePointsEqual(int cp1, int cp2, boolean foldCase) {
|
||||
if (cp1 == cp2) {
|
||||
return true;
|
||||
}
|
||||
if (!foldCase) {
|
||||
return false;
|
||||
}
|
||||
cp1 = UCharacter.foldCase(cp1, true);
|
||||
cp2 = UCharacter.foldCase(cp2, true);
|
||||
return cp1 == cp2;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return str.substring(0, start) + "[" + str.substring(start, end) + "]" + str.substring(end);
|
||||
|
|
|
@ -47,9 +47,8 @@ public abstract class SymbolMatcher implements NumberParseMatcher {
|
|||
}
|
||||
}
|
||||
|
||||
int cp = segment.getCodePoint();
|
||||
if (cp != -1 && uniSet.contains(cp)) {
|
||||
segment.adjustOffset(Character.charCount(cp));
|
||||
if (segment.matches(uniSet)) {
|
||||
segment.adjustOffsetByCodePoint();
|
||||
accept(segment, result);
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -51,7 +51,7 @@ public class UnicodeSetStaticCache {
|
|||
DIGITS,
|
||||
NAN_LEAD,
|
||||
SCIENTIFIC_LEAD,
|
||||
CWCF,
|
||||
CWCF, // TODO: Check if this is being used and remove it if not.
|
||||
|
||||
// Combined Separators with Digits (for lead code points)
|
||||
DIGITS_OR_ALL_SEPARATORS,
|
||||
|
|
|
@ -5123,37 +5123,6 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a string with just the first word titlecased, for menus and UI, etc. This does not affect most of the string,
|
||||
* and sometimes has no effect at all; the original string is returned whenever casing
|
||||
* would not be appropriate for the first word (such as for CJK characters or initial numbers).
|
||||
* Initial non-letters are skipped in order to find the character to change.
|
||||
* Characters past the first affected are left untouched: see also TITLECASE_NO_LOWERCASE.
|
||||
* <p>Examples:
|
||||
* <table border='1'><tr><th>Source</th><th>Result</th><th>Locale</th></tr>
|
||||
* <tr><td>anglo-American locale</td><td>Anglo-American locale</td></tr>
|
||||
* <tr><td>“contact us”</td><td>“Contact us”</td></tr>
|
||||
* <tr><td>49ers win!</td><td>49ers win!</td></tr>
|
||||
* <tr><td>丰(abc)</td><td>丰(abc)</td></tr>
|
||||
* <tr><td>«ijs»</td><td>«Ijs»</td></tr>
|
||||
* <tr><td>«ijs»</td><td>«IJs»</td><td>nl-BE</td></tr>
|
||||
* <tr><td>«ijs»</td><td>«İjs»</td><td>tr-DE</td></tr>
|
||||
* </table>
|
||||
* @param locale the locale for accessing exceptional behavior (eg for tr).
|
||||
* @param str the source string to change
|
||||
* @return the modified string, or the original if no modifications were necessary.
|
||||
* @internal
|
||||
* @deprecated ICU internal only
|
||||
*/
|
||||
@Deprecated
|
||||
public static String toTitleFirst(ULocale locale, String str) {
|
||||
// TODO: Remove this function. Inline it where it is called in CLDR.
|
||||
return TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply(locale.toLocale(), null, str);
|
||||
}
|
||||
|
||||
private static final com.ibm.icu.text.CaseMap.Title TO_TITLE_WHOLE_STRING_NO_LOWERCASE =
|
||||
com.ibm.icu.text.CaseMap.toTitle().wholeString().noLowercase();
|
||||
|
||||
/**
|
||||
* {@icu} <p>Returns the titlecase version of the argument string.
|
||||
* <p>Position for titlecasing is determined by the argument break
|
||||
|
|
|
@ -15,8 +15,8 @@ import com.ibm.icu.text.PluralRules.IFixedDecimal;
|
|||
import com.ibm.icu.util.ICUUncheckedIOException;
|
||||
|
||||
/**
|
||||
* The result of a number formatting operation. This class allows the result to be exported in several data types,
|
||||
* including a String, an AttributedCharacterIterator, and a BigDecimal.
|
||||
* The result of a number formatting operation. This class allows the result to be exported in several
|
||||
* data types, including a String, an AttributedCharacterIterator, and a BigDecimal.
|
||||
*
|
||||
* @draft ICU 60
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
|
@ -47,12 +47,12 @@ public class FormattedNumber {
|
|||
}
|
||||
|
||||
/**
|
||||
* Append the formatted number to an Appendable, such as a StringBuilder. This may be slightly more efficient than
|
||||
* creating a String.
|
||||
* Append the formatted number to an Appendable, such as a StringBuilder. This may be slightly more
|
||||
* efficient than creating a String.
|
||||
*
|
||||
* <p>
|
||||
* If an IOException occurs when appending to the Appendable, an unchecked {@link ICUUncheckedIOException} is thrown
|
||||
* instead.
|
||||
* If an IOException occurs when appending to the Appendable, an unchecked
|
||||
* {@link ICUUncheckedIOException} is thrown instead.
|
||||
*
|
||||
* @param appendable
|
||||
* The Appendable to which to append the formatted number string.
|
||||
|
@ -73,16 +73,18 @@ public class FormattedNumber {
|
|||
}
|
||||
|
||||
/**
|
||||
* Determine the start and end indices of the first occurrence of the given <em>field</em> in the output string.
|
||||
* This allows you to determine the locations of the integer part, fraction part, and sign.
|
||||
* Determine the start and end indices of the first occurrence of the given <em>field</em> in the
|
||||
* output string. This allows you to determine the locations of the integer part, fraction part, and
|
||||
* sign.
|
||||
*
|
||||
* <p>
|
||||
* If multiple different field attributes are needed, this method can be called repeatedly, or if <em>all</em> field
|
||||
* attributes are needed, consider using getFieldIterator().
|
||||
* If multiple different field attributes are needed, this method can be called repeatedly, or if
|
||||
* <em>all</em> field attributes are needed, consider using getFieldIterator().
|
||||
*
|
||||
* <p>
|
||||
* If a field occurs multiple times in an output string, such as a grouping separator, this method will only ever
|
||||
* return the first occurrence. Use getFieldIterator() to access all occurrences of an attribute.
|
||||
* If a field occurs multiple times in an output string, such as a grouping separator, this method
|
||||
* will only ever return the first occurrence. Use getFieldIterator() to access all occurrences of an
|
||||
* attribute.
|
||||
*
|
||||
* @param fieldPosition
|
||||
* The FieldPosition to populate with the start and end indices of the desired field.
|
||||
|
@ -106,13 +108,15 @@ public class FormattedNumber {
|
|||
}
|
||||
|
||||
/**
|
||||
* Export the formatted number as an AttributedCharacterIterator. This allows you to determine which characters in
|
||||
* the output string correspond to which <em>fields</em>, such as the integer part, fraction part, and sign.
|
||||
* Export the formatted number as an AttributedCharacterIterator. This allows you to determine which
|
||||
* characters in the output string correspond to which <em>fields</em>, such as the integer part,
|
||||
* fraction part, and sign.
|
||||
*
|
||||
* <p>
|
||||
* If information on only one field is needed, consider using populateFieldPosition() instead.
|
||||
*
|
||||
* @return An AttributedCharacterIterator, containing information on the field attributes of the number string.
|
||||
* @return An AttributedCharacterIterator, containing information on the field attributes of the
|
||||
* number string.
|
||||
* @draft ICU 60
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
* @see com.ibm.icu.text.NumberFormat.Field
|
||||
|
@ -124,8 +128,9 @@ public class FormattedNumber {
|
|||
}
|
||||
|
||||
/**
|
||||
* Export the formatted number as a BigDecimal. This endpoint is useful for obtaining the exact number being printed
|
||||
* after scaling and rounding have been applied by the number formatting pipeline.
|
||||
* Export the formatted number as a BigDecimal. This endpoint is useful for obtaining the exact
|
||||
* number being printed after scaling and rounding have been applied by the number formatting
|
||||
* pipeline.
|
||||
*
|
||||
* @return A BigDecimal representation of the formatted number.
|
||||
* @draft ICU 60
|
||||
|
@ -138,31 +143,29 @@ public class FormattedNumber {
|
|||
|
||||
/**
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* @deprecated This API is ICU internal only. Use {@link #populateFieldPosition} or
|
||||
* {@link #getFieldIterator} for similar functionality.
|
||||
*/
|
||||
@Deprecated
|
||||
public String getPrefix() {
|
||||
NumberStringBuilder temp = new NumberStringBuilder();
|
||||
int length = micros.modOuter.apply(temp, 0, 0);
|
||||
length += micros.modMiddle.apply(temp, 0, length);
|
||||
/* length += */ micros.modInner.apply(temp, 0, length);
|
||||
int prefixLength = micros.modOuter.getPrefixLength() + micros.modMiddle.getPrefixLength()
|
||||
+ micros.modInner.getPrefixLength();
|
||||
// #13453: DecimalFormat wants the affixes from the pattern only (modMiddle).
|
||||
micros.modMiddle.apply(temp, 0, 0);
|
||||
int prefixLength = micros.modMiddle.getPrefixLength();
|
||||
return temp.subSequence(0, prefixLength).toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
* @deprecated This API is ICU internal only. Use {@link #populateFieldPosition} or
|
||||
* {@link #getFieldIterator} for similar functionality.
|
||||
*/
|
||||
@Deprecated
|
||||
public String getSuffix() {
|
||||
NumberStringBuilder temp = new NumberStringBuilder();
|
||||
int length = micros.modOuter.apply(temp, 0, 0);
|
||||
length += micros.modMiddle.apply(temp, 0, length);
|
||||
length += micros.modInner.apply(temp, 0, length);
|
||||
int prefixLength = micros.modOuter.getPrefixLength() + micros.modMiddle.getPrefixLength()
|
||||
+ micros.modInner.getPrefixLength();
|
||||
// #13453: DecimalFormat wants the affixes from the pattern only (modMiddle).
|
||||
int length = micros.modMiddle.apply(temp, 0, 0);
|
||||
int prefixLength = micros.modMiddle.getPrefixLength();
|
||||
return temp.subSequence(prefixLength, length).toString();
|
||||
}
|
||||
|
||||
|
@ -185,7 +188,9 @@ public class FormattedNumber {
|
|||
public int hashCode() {
|
||||
// NumberStringBuilder and BigDecimal are mutable, so we can't call
|
||||
// #equals() or #hashCode() on them directly.
|
||||
return Arrays.hashCode(nsb.toCharArray()) ^ Arrays.hashCode(nsb.toFieldArray()) ^ fq.toBigDecimal().hashCode();
|
||||
return Arrays.hashCode(nsb.toCharArray())
|
||||
^ Arrays.hashCode(nsb.toFieldArray())
|
||||
^ fq.toBigDecimal().hashCode();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -206,7 +211,7 @@ public class FormattedNumber {
|
|||
// #equals() or #hashCode() on them directly.
|
||||
FormattedNumber _other = (FormattedNumber) other;
|
||||
return Arrays.equals(nsb.toCharArray(), _other.nsb.toCharArray())
|
||||
^ Arrays.equals(nsb.toFieldArray(), _other.nsb.toFieldArray())
|
||||
^ fq.toBigDecimal().equals(_other.fq.toBigDecimal());
|
||||
&& Arrays.equals(nsb.toFieldArray(), _other.nsb.toFieldArray())
|
||||
&& fq.toBigDecimal().equals(_other.fq.toBigDecimal());
|
||||
}
|
||||
}
|
|
@ -171,7 +171,7 @@ public final class NumberFormatter {
|
|||
* <li>MIN2: 1234 and 12,34,567
|
||||
* <li>AUTO: 1,234 and 12,34,567
|
||||
* <li>ON_ALIGNED: 1,234 and 12,34,567
|
||||
* <li>WESTERN: 1,234 and 1,234,567
|
||||
* <li>THOUSANDS: 1,234 and 1,234,567
|
||||
* </ul>
|
||||
*
|
||||
* <p>
|
||||
|
@ -259,7 +259,7 @@ public final class NumberFormatter {
|
|||
* @provisional This API might change or be removed in a future release.
|
||||
* @see NumberFormatter
|
||||
*/
|
||||
WESTERN
|
||||
THOUSANDS
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -1044,7 +1044,7 @@ public class MeasureFormat extends UFormat {
|
|||
case TIME_UNIT_FORMAT:
|
||||
return createTimeUnitFormat();
|
||||
case CURRENCY_FORMAT:
|
||||
return new CurrencyFormat(locale);
|
||||
return MeasureFormat.getCurrencyFormat(locale);
|
||||
default:
|
||||
throw new InvalidObjectException("Unknown subclass: " + subClass);
|
||||
}
|
||||
|
|
|
@ -18,17 +18,19 @@ import com.ibm.icu.impl.ICUBinary.Authenticate;
|
|||
import com.ibm.icu.impl.Trie2;
|
||||
|
||||
/**
|
||||
* <p>Internal class used for Rule Based Break Iterators</p>
|
||||
* <p>Internal class used for Rule Based Break Iterators.</p>
|
||||
* <p>This class provides access to the compiled break rule data, as
|
||||
* it is stored in a .brk file.
|
||||
* Not intended for public use; declared public for testing purposes only.
|
||||
* @internal
|
||||
*/
|
||||
final class RBBIDataWrapper {
|
||||
public final class RBBIDataWrapper {
|
||||
//
|
||||
// These fields are the ready-to-use compiled rule data, as
|
||||
// read from the file.
|
||||
//
|
||||
RBBIDataHeader fHeader;
|
||||
short fFTable[];
|
||||
public RBBIDataHeader fHeader;
|
||||
public short fFTable[];
|
||||
short fRTable[];
|
||||
short fSFTable[];
|
||||
short fSRTable[];
|
||||
|
@ -78,11 +80,16 @@ final class RBBIDataWrapper {
|
|||
// Index offsets to the fields in a state table row.
|
||||
// Corresponds to struct RBBIStateTableRow in the C version.
|
||||
//
|
||||
final static int ACCEPTING = 0;
|
||||
final static int LOOKAHEAD = 1;
|
||||
final static int TAGIDX = 2;
|
||||
final static int RESERVED = 3;
|
||||
final static int NEXTSTATES = 4;
|
||||
/** @internal */
|
||||
public final static int ACCEPTING = 0;
|
||||
/** @internal */
|
||||
public final static int LOOKAHEAD = 1;
|
||||
/** @internal */
|
||||
public final static int TAGIDX = 2;
|
||||
/** @internal */
|
||||
public final static int RESERVED = 3;
|
||||
/** @internal */
|
||||
public final static int NEXTSTATES = 4;
|
||||
|
||||
// Index offsets to header fields of a state table
|
||||
// struct RBBIStateTable {... in the C version.
|
||||
|
@ -101,13 +108,15 @@ final class RBBIDataWrapper {
|
|||
|
||||
/**
|
||||
* Data Header. A struct-like class with the fields from the RBBI data file header.
|
||||
* Not intended for public use, declared public for testing purposes only.
|
||||
* @internal
|
||||
*/
|
||||
final static class RBBIDataHeader {
|
||||
public final static class RBBIDataHeader {
|
||||
int fMagic; // == 0xbla0
|
||||
byte[] fFormatVersion; // For ICU 3.4 and later.
|
||||
int fLength; // Total length in bytes of this RBBI Data,
|
||||
// including all sections, not just the header.
|
||||
int fCatCount; // Number of character categories.
|
||||
public int fCatCount; // Number of character categories.
|
||||
|
||||
//
|
||||
// Offsets and sizes of each of the subsections within the RBBI data.
|
||||
|
@ -139,9 +148,9 @@ final class RBBIDataWrapper {
|
|||
/**
|
||||
* RBBI State Table Indexing Function. Given a state number, return the
|
||||
* array index of the start of the state table row for that state.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
int getRowIndex(int state){
|
||||
public int getRowIndex(int state){
|
||||
return ROW_DATA + state * (fHeader.fCatCount + 4);
|
||||
}
|
||||
|
||||
|
@ -311,17 +320,17 @@ final class RBBIDataWrapper {
|
|||
return This;
|
||||
}
|
||||
|
||||
///CLOVER:OFF
|
||||
// Getters for fields from the state table header
|
||||
//
|
||||
private int getStateTableNumStates(short table[]) {
|
||||
/**
|
||||
* Getters for fields from the state table header
|
||||
* @internal
|
||||
*/
|
||||
public int getStateTableNumStates(short table[]) {
|
||||
if (isBigEndian) {
|
||||
return (table[NUMSTATES] << 16) | (table[NUMSTATES+1] & 0xffff);
|
||||
} else {
|
||||
return (table[NUMSTATES+1] << 16) | (table[NUMSTATES] & 0xffff);
|
||||
}
|
||||
}
|
||||
///CLOVER:ON
|
||||
|
||||
int getStateTableFlags(short table[]) {
|
||||
// This works for up to 15 flags bits.
|
||||
|
|
|
@ -342,10 +342,10 @@ class RBBIRuleBuilder {
|
|||
//
|
||||
// UnicodeSet processing.
|
||||
// Munge the Unicode Sets to create a set of character categories.
|
||||
// Generate the mapping tables (TRIE) from input 32-bit characters to
|
||||
// Generate the mapping tables (TRIE) from input code points to
|
||||
// the character categories.
|
||||
//
|
||||
builder.fSetBuilder.build();
|
||||
builder.fSetBuilder.buildRanges();
|
||||
|
||||
//
|
||||
// Generate the DFA state transition table.
|
||||
|
@ -363,10 +363,34 @@ class RBBIRuleBuilder {
|
|||
builder.fForwardTables.printRuleStatusTable();
|
||||
}
|
||||
|
||||
builder.optimizeTables();
|
||||
builder.fSetBuilder.buildTrie();
|
||||
//
|
||||
// Package up the compiled data, writing it to an output stream
|
||||
// in the serialization format. This is the same as the ICU4C runtime format.
|
||||
//
|
||||
builder.flattenData(os);
|
||||
}
|
||||
|
||||
static class ClassPair {
|
||||
int left = 3;
|
||||
int right = 0;
|
||||
}
|
||||
|
||||
void optimizeTables() {
|
||||
ClassPair duplPair = new ClassPair();
|
||||
|
||||
while (fForwardTables.findDuplCharClassFrom(duplPair)) {
|
||||
fSetBuilder.mergeCategories(duplPair);
|
||||
fForwardTables.removeColumn(duplPair.right);
|
||||
fReverseTables.removeColumn(duplPair.right);
|
||||
fSafeFwdTables.removeColumn(duplPair.right);
|
||||
fSafeRevTables.removeColumn(duplPair.right);
|
||||
}
|
||||
|
||||
fForwardTables.removeDuplicateStates();
|
||||
fReverseTables.removeDuplicateStates();
|
||||
fSafeFwdTables.removeDuplicateStates();
|
||||
fSafeRevTables.removeDuplicateStates();
|
||||
|
||||
}
|
||||
|
|
|
@ -112,7 +112,7 @@ class RBBISetBuilder {
|
|||
}
|
||||
}
|
||||
if (setName.equals("dictionary")) {
|
||||
this.fNum |= 0x4000;
|
||||
this.fNum |= DICT_BIT;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -138,6 +138,8 @@ class RBBISetBuilder {
|
|||
|
||||
boolean fSawBOF;
|
||||
|
||||
static final int DICT_BIT = 0x4000;
|
||||
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//
|
||||
|
@ -156,7 +158,7 @@ class RBBISetBuilder {
|
|||
// from the Unicode Sets.
|
||||
//
|
||||
//------------------------------------------------------------------------
|
||||
void build() {
|
||||
void buildRanges() {
|
||||
RangeDescriptor rlRange;
|
||||
|
||||
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("usets")>=0) {printSets();}
|
||||
|
@ -280,6 +282,15 @@ class RBBISetBuilder {
|
|||
|
||||
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("rgroup")>=0) {printRangeGroups();}
|
||||
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("esets")>=0) {printSets();}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Build the Trie table for mapping UChar32 values to the corresponding
|
||||
* range group number.
|
||||
*/
|
||||
void buildTrie() {
|
||||
RangeDescriptor rlRange;
|
||||
|
||||
fTrie = new Trie2Writable(0, // Initial value for all code points.
|
||||
0); // Error value for out-of-range input.
|
||||
|
@ -294,7 +305,20 @@ class RBBISetBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
void mergeCategories(int left, int right) {
|
||||
assert(left >= 1);
|
||||
assert(right > left);
|
||||
for (RangeDescriptor rd = fRangeList; rd != null; rd = rd.fNext) {
|
||||
int rangeNum = rd.fNum & ~DICT_BIT;
|
||||
int rangeDict = rd.fNum & DICT_BIT;
|
||||
if (rangeNum == right) {
|
||||
rd.fNum = left | rangeDict;
|
||||
} else if (rangeNum > right) {
|
||||
rd.fNum--;
|
||||
}
|
||||
}
|
||||
--fGroupCount;
|
||||
}
|
||||
//-----------------------------------------------------------------------------------
|
||||
//
|
||||
// getTrieSize() Return the size that will be required to serialize the Trie.
|
||||
|
@ -457,7 +481,7 @@ class RBBISetBuilder {
|
|||
if (groupNum<10) {System.out.print(" ");}
|
||||
System.out.print(groupNum + " ");
|
||||
|
||||
if ((rlRange.fNum & 0x4000) != 0) { System.out.print(" <DICT> ");}
|
||||
if ((rlRange.fNum & DICT_BIT) != 0) { System.out.print(" <DICT> ");}
|
||||
|
||||
for (i=0; i<rlRange.fIncludesSets.size(); i++) {
|
||||
RBBINode usetNode = rlRange.fIncludesSets.get(i);
|
||||
|
|
|
@ -655,7 +655,7 @@ class RBBITableBuilder {
|
|||
// if sd.fAccepting already had a value other than 0 or -1, leave it be.
|
||||
|
||||
// If the end marker node is from a look-ahead rule, set
|
||||
// the fLookAhead field or this state also.
|
||||
// the fLookAhead field for this state also.
|
||||
if (endMarker.fLookAheadEnd) {
|
||||
// TODO: don't change value if already set?
|
||||
// TODO: allow for more than one active look-ahead rule in engine.
|
||||
|
@ -832,6 +832,129 @@ class RBBITableBuilder {
|
|||
|
||||
|
||||
|
||||
//
|
||||
// findDuplCharClassFrom()
|
||||
//
|
||||
boolean findDuplCharClassFrom(RBBIRuleBuilder.ClassPair classPair) {
|
||||
int numStates = fDStates.size();
|
||||
int numCols = fRB.fSetBuilder.getNumCharCategories();
|
||||
|
||||
uint16_t table_base;
|
||||
uint16_t table_dupl;
|
||||
for (; baseCategory < numCols-1; ++baseCategory) {
|
||||
for (duplCategory=baseCategory+1; duplCategory < numCols; ++duplCategory) {
|
||||
for (int state=0; state<numStates; state++) {
|
||||
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates.elementAt(state);
|
||||
table_base = (uint16_t)sd.fDtran.elementAti(baseCategory);
|
||||
table_dupl = (uint16_t)sd.fDtran.elementAti(duplCategory);
|
||||
if (table_base != table_dupl) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (table_base == table_dupl) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// removeColumn()
|
||||
//
|
||||
void removeColumn(int column) {
|
||||
int numStates = fDStates.size();
|
||||
for (int state=0; state<numStates; state++) {
|
||||
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates.elementAt(state);
|
||||
U_ASSERT(column < sd.fDtran.size());
|
||||
sd.fDtran.removeElementAt(column);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* findDuplicateState
|
||||
*/
|
||||
bool findDuplicateState(int &firstState, int &duplState) {
|
||||
int numStates = fDStates.size();
|
||||
int numCols = fRB.fSetBuilder.getNumCharCategories();
|
||||
|
||||
for (; firstState<numStates-1; ++firstState) {
|
||||
RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates.elementAt(firstState);
|
||||
for (duplState=firstState+1; duplState<numStates; ++duplState) {
|
||||
RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates.elementAt(duplState);
|
||||
if (firstSD.fAccepting != duplSD.fAccepting ||
|
||||
firstSD.fLookAhead != duplSD.fLookAhead ||
|
||||
firstSD.fTagsIdx != duplSD.fTagsIdx) {
|
||||
continue;
|
||||
}
|
||||
bool rowsMatch = true;
|
||||
for (int col=0; col < numCols; ++col) {
|
||||
int firstVal = firstSD.fDtran.elementAti(col);
|
||||
int duplVal = duplSD.fDtran.elementAti(col);
|
||||
if (!((firstVal == duplVal) ||
|
||||
((firstVal == firstState || firstVal == duplState) &&
|
||||
(duplVal == firstState || duplVal == duplState)))) {
|
||||
rowsMatch = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (rowsMatch) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void removeState(int keepState, int duplState) {
|
||||
U_ASSERT(keepState < duplState);
|
||||
U_ASSERT(duplState < fDStates.size());
|
||||
|
||||
RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates.elementAt(duplState);
|
||||
fDStates.removeElementAt(duplState);
|
||||
delete duplSD;
|
||||
|
||||
int numStates = fDStates.size();
|
||||
int numCols = fRB.fSetBuilder.getNumCharCategories();
|
||||
for (int state=0; state<numStates; ++state) {
|
||||
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates.elementAt(state);
|
||||
for (int col=0; col<numCols; col++) {
|
||||
int existingVal = sd.fDtran.elementAti(col);
|
||||
int newVal = existingVal;
|
||||
if (existingVal == duplState) {
|
||||
newVal = keepState;
|
||||
} else if (existingVal > duplState) {
|
||||
newVal = existingVal - 1;
|
||||
}
|
||||
sd.fDtran.setElementAt(newVal, col);
|
||||
}
|
||||
if (sd.fAccepting == duplState) {
|
||||
sd.fAccepting = keepState;
|
||||
} else if (sd.fAccepting > duplState) {
|
||||
sd.fAccepting--;
|
||||
}
|
||||
if (sd.fLookAhead == duplState) {
|
||||
sd.fLookAhead = keepState;
|
||||
} else if (sd.fLookAhead > duplState) {
|
||||
sd.fLookAhead--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* RemoveDuplicateStates
|
||||
*/
|
||||
void removeDuplicateStates() {
|
||||
int firstState = 3;
|
||||
int duplicateState = 0;
|
||||
while (findDuplicateState(firstState, duplicateState)) {
|
||||
// printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState);
|
||||
removeState(firstState, duplicateState);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//
|
||||
|
|
|
@ -222,9 +222,11 @@ public class RuleBasedBreakIterator extends BreakIterator {
|
|||
private CharacterIterator fText = new java.text.StringCharacterIterator("");
|
||||
|
||||
/**
|
||||
* The rule data for this BreakIterator instance. Package private.
|
||||
* The rule data for this BreakIterator instance.
|
||||
* Not intended for public use. Declared public for testing purposes only.
|
||||
* @internal
|
||||
*/
|
||||
RBBIDataWrapper fRData;
|
||||
public RBBIDataWrapper fRData;
|
||||
|
||||
/**
|
||||
* The iteration state - current position, rule status for the current position,
|
||||
|
|
|
@ -84,19 +84,12 @@ public class TimeUnitFormat extends MeasureFormat {
|
|||
|
||||
private static final long serialVersionUID = -3707773153184971529L;
|
||||
|
||||
// These fields are supposed to be the same as the fields in mf. They
|
||||
// are here for serialization backward compatibility and to support parsing.
|
||||
// Unlike MeasureFormat, this class is mutable and allows a new NumberFormat to be set after
|
||||
// initialization. Keep a second copy of NumberFormat and use it instead of the one from the parent.
|
||||
private NumberFormat format;
|
||||
private ULocale locale;
|
||||
private int style;
|
||||
|
||||
// We use this field in lieu of the super class because the super class
|
||||
// is immutable while this class is mutable. The contents of the super class
|
||||
// is an empty shell. Every public method of the super class is overridden to
|
||||
// delegate to this field. Each time this object mutates, it replaces this field with
|
||||
// a new immutable instance.
|
||||
// private transient MeasureFormat mf;
|
||||
|
||||
private transient Map<TimeUnit, Map<String, Object[]>> timeUnitToCountToPatterns;
|
||||
private transient PluralRules pluralRules;
|
||||
private transient boolean isReady;
|
||||
|
|
|
@ -2521,5 +2521,25 @@ public class CalendarRegressionTest extends com.ibm.icu.dev.test.TestFmwk {
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestIslamicCalOverflow() {
|
||||
String localeID = "ar@calendar=islamic-civil";
|
||||
Calendar cal = Calendar.getInstance(new ULocale(localeID));
|
||||
int maxMonth = cal.getMaximum(Calendar.MONTH);
|
||||
int maxDayOfMonth = cal.getMaximum(Calendar.DATE);
|
||||
int jd, year, month, dayOfMonth;
|
||||
for (jd = 73530872; jd <= 73530876; jd++) { // year 202002, int32_t overflow if jd >= 73530874
|
||||
cal.clear();
|
||||
cal.set(Calendar.JULIAN_DAY, jd);
|
||||
year = cal.get(Calendar.YEAR);
|
||||
month = cal.get(Calendar.MONTH);
|
||||
dayOfMonth = cal.get(Calendar.DATE);
|
||||
if (month > maxMonth || dayOfMonth > maxDayOfMonth) {
|
||||
errln("Error: localeID " + localeID + ", julianDay " + jd + "; got year " + year + "; maxMonth " + maxMonth +
|
||||
", got month " + month + "; maxDayOfMonth " + maxDayOfMonth + ", got dayOfMonth " + dayOfMonth);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
//eof
|
||||
|
|
|
@ -311,6 +311,36 @@ public class IntlTestDecimalFormatSymbols extends TestFmwk
|
|||
errln("ERROR: Char digits should be Latin digits");
|
||||
}
|
||||
|
||||
// Check on copy
|
||||
DecimalFormatSymbols copy = (DecimalFormatSymbols) symbols.clone();
|
||||
if (!Arrays.equals(copy.getDigitStrings(), osmanyaDigitStrings)) {
|
||||
errln("ERROR: Osmanya digits (supplementary) should be set");
|
||||
}
|
||||
if (Character.codePointAt(osmanyaDigitStrings[0], 0) != copy.getCodePointZero()) {
|
||||
errln("ERROR: Code point zero be Osmanya code point zero");
|
||||
}
|
||||
if (defZero != copy.getZeroDigit()) {
|
||||
errln("ERROR: Zero digit should be 0");
|
||||
}
|
||||
if (!Arrays.equals(copy.getDigits(), defDigits)) {
|
||||
errln("ERROR: Char digits should be Latin digits");
|
||||
}
|
||||
|
||||
// Check on resource bundle
|
||||
DecimalFormatSymbols fromData = DecimalFormatSymbols.getInstance(new ULocale("en@numbers=osma"));
|
||||
if (!Arrays.equals(fromData.getDigitStrings(), osmanyaDigitStrings)) {
|
||||
errln("ERROR: Osmanya digits (supplementary) should be set");
|
||||
}
|
||||
if (Character.codePointAt(osmanyaDigitStrings[0], 0) != fromData.getCodePointZero()) {
|
||||
errln("ERROR: Code point zero be Osmanya code point zero");
|
||||
}
|
||||
if (defZero != fromData.getZeroDigit()) {
|
||||
errln("ERROR: Zero digit should be 0");
|
||||
}
|
||||
if (!Arrays.equals(fromData.getDigits(), defDigits)) {
|
||||
errln("ERROR: Char digits should be Latin digits");
|
||||
}
|
||||
|
||||
symbols.setDigitStrings(differentDigitStrings);
|
||||
if (!Arrays.equals(symbols.getDigitStrings(), differentDigitStrings)) {
|
||||
errln("ERROR: Different digits should be set");
|
||||
|
|
|
@ -16,6 +16,7 @@ import java.io.ObjectOutputStream;
|
|||
import java.io.Serializable;
|
||||
import java.lang.reflect.Field;
|
||||
import java.text.FieldPosition;
|
||||
import java.text.ParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
|
@ -42,6 +43,7 @@ import com.ibm.icu.text.MeasureFormat;
|
|||
import com.ibm.icu.text.MeasureFormat.FormatWidth;
|
||||
import com.ibm.icu.text.NumberFormat;
|
||||
import com.ibm.icu.util.Currency;
|
||||
import com.ibm.icu.util.CurrencyAmount;
|
||||
import com.ibm.icu.util.Measure;
|
||||
import com.ibm.icu.util.MeasureUnit;
|
||||
import com.ibm.icu.util.NoUnit;
|
||||
|
@ -1925,6 +1927,15 @@ public class MeasureUnitTest extends TestFmwk {
|
|||
assertEquals("getCurrencyFormat ULocale/Locale", mfu, mfj);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCurrencyFormatParseIsoCode() throws ParseException {
|
||||
MeasureFormat mf = MeasureFormat.getCurrencyFormat(ULocale.ENGLISH);
|
||||
CurrencyAmount result = (CurrencyAmount) mf.parseObject("GTQ 34.56");
|
||||
assertEquals("Parse should succeed", result.getNumber().doubleValue(), 34.56, 0.0);
|
||||
assertEquals("Should parse ISO code GTQ even though the currency is USD",
|
||||
"GTQ", result.getCurrency().getCurrencyCode());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDoubleZero() {
|
||||
ULocale en = new ULocale("en");
|
||||
|
|
|
@ -868,7 +868,7 @@ public class NumberFormatTest extends TestFmwk {
|
|||
new ParseCurrencyItem( "en_GB", "euros4", "4,00\u00A0\u20AC", 6,400, "EUR" ),
|
||||
new ParseCurrencyItem( "en_GB", "euros6", "6\u00A0\u20AC", 3, 6, "EUR" ),
|
||||
new ParseCurrencyItem( "en_GB", "euros8", "\u20AC8", 2, 8, "EUR" ),
|
||||
new ParseCurrencyItem( "en_GB", "dollars4", "US$4", 0, 0, "USD" ),
|
||||
new ParseCurrencyItem( "en_GB", "dollars4", "US$4", 4, 4, "USD" ),
|
||||
|
||||
new ParseCurrencyItem( "fr_FR", "euros4", "4,00\u00A0\u20AC", 6, 4, "EUR" ),
|
||||
new ParseCurrencyItem( "fr_FR", "euros6", "6\u00A0\u20AC", 3, 6, "EUR" ),
|
||||
|
@ -2018,7 +2018,6 @@ public class NumberFormatTest extends TestFmwk {
|
|||
};
|
||||
|
||||
@SuppressWarnings("resource") // InputStream is will be closed by the ResourceReader.
|
||||
@Ignore("TODO: http://bugs.icu-project.org/trac/ticket/13571")
|
||||
@Test
|
||||
public void TestCases() {
|
||||
String caseFileName = "NumberFormatTestCases.txt";
|
||||
|
@ -5331,6 +5330,23 @@ public class NumberFormatTest extends TestFmwk {
|
|||
assertEquals("Grouping should be off", false, df.isGroupingUsed());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void Test13453_AffixContent() {
|
||||
DecimalFormat df = (DecimalFormat) DecimalFormat.getScientificInstance();
|
||||
assertEquals("Scientific should NOT be included", "", df.getPositiveSuffix());
|
||||
|
||||
df = CompactDecimalFormat.getInstance(ULocale.ENGLISH, CompactDecimalFormat.CompactStyle.SHORT);
|
||||
assertEquals("Compact should NOT be included", "", df.getPositiveSuffix());
|
||||
|
||||
df = (DecimalFormat) DecimalFormat.getInstance(NumberFormat.ISOCURRENCYSTYLE);
|
||||
df.setCurrency(Currency.getInstance("GBP"));
|
||||
assertEquals("ISO currency SHOULD be included", "GBP", df.getPositivePrefix());
|
||||
|
||||
df = (DecimalFormat) DecimalFormat.getInstance(NumberFormat.PLURALCURRENCYSTYLE);
|
||||
df.setCurrency(Currency.getInstance("GBP"));
|
||||
assertEquals("Plural name SHOULD be included", " British pounds", df.getPositiveSuffix());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void Test11035_FormatCurrencyAmount() {
|
||||
double amount = 12345.67;
|
||||
|
|
|
@ -25,7 +25,6 @@ import com.ibm.icu.impl.number.Padder;
|
|||
import com.ibm.icu.impl.number.Padder.PadPosition;
|
||||
import com.ibm.icu.impl.number.PatternStringParser;
|
||||
import com.ibm.icu.number.CompactNotation;
|
||||
import com.ibm.icu.number.FormattedNumber;
|
||||
import com.ibm.icu.number.FractionRounder;
|
||||
import com.ibm.icu.number.IntegerWidth;
|
||||
import com.ibm.icu.number.LocalizedNumberFormatter;
|
||||
|
@ -1176,6 +1175,21 @@ public class NumberFormatterApiTest {
|
|||
"8.765",
|
||||
"0");
|
||||
|
||||
assertFormatDescendingBig(
|
||||
"Indic locale with THOUSANDS grouping",
|
||||
"",
|
||||
NumberFormatter.with().grouping(GroupingStrategy.THOUSANDS),
|
||||
new ULocale("en-IN"),
|
||||
"87,650,000",
|
||||
"8,765,000",
|
||||
"876,500",
|
||||
"87,650",
|
||||
"8,765",
|
||||
"876.5",
|
||||
"87.65",
|
||||
"8.765",
|
||||
"0");
|
||||
|
||||
// NOTE: Hungarian is interesting because it has minimumGroupingDigits=4 in locale data
|
||||
// If this test breaks due to data changes, find another locale that has minimumGroupingDigits.
|
||||
assertFormatDescendingBig(
|
||||
|
@ -1860,29 +1874,6 @@ public class NumberFormatterApiTest {
|
|||
assertNotEquals(NumberFormatter.with().locale(ULocale.ENGLISH), NumberFormatter.with().locale(Locale.FRENCH));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void getPrefixSuffix() {
|
||||
Object[][] cases = {
|
||||
{ NumberFormatter.withLocale(ULocale.ENGLISH).unit(GBP).unitWidth(UnitWidth.ISO_CODE), "GBP", "",
|
||||
"-GBP", "" },
|
||||
{ NumberFormatter.withLocale(ULocale.ENGLISH).unit(GBP).unitWidth(UnitWidth.FULL_NAME), "",
|
||||
" British pounds", "-", " British pounds" } };
|
||||
|
||||
for (Object[] cas : cases) {
|
||||
LocalizedNumberFormatter f = (LocalizedNumberFormatter) cas[0];
|
||||
String posPrefix = (String) cas[1];
|
||||
String posSuffix = (String) cas[2];
|
||||
String negPrefix = (String) cas[3];
|
||||
String negSuffix = (String) cas[4];
|
||||
FormattedNumber positive = f.format(1);
|
||||
FormattedNumber negative = f.format(-1);
|
||||
assertEquals(posPrefix, positive.getPrefix());
|
||||
assertEquals(posSuffix, positive.getSuffix());
|
||||
assertEquals(negPrefix, negative.getPrefix());
|
||||
assertEquals(negSuffix, negative.getSuffix());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void plurals() {
|
||||
// TODO: Expand this test.
|
||||
|
@ -1921,12 +1912,12 @@ public class NumberFormatterApiTest {
|
|||
Rounder.class.getDeclaredMethod("minMaxFraction", Integer.TYPE, Integer.TYPE),
|
||||
Rounder.class.getDeclaredMethod("minMaxDigits", Integer.TYPE, Integer.TYPE), };
|
||||
|
||||
final int EXPECTED_MAX_INT_FRAC_SIG = 100;
|
||||
final String expectedSubstring0 = "between 0 and 100 (inclusive)";
|
||||
final String expectedSubstring1 = "between 1 and 100 (inclusive)";
|
||||
final String expectedSubstringN1 = "between -1 and 100 (inclusive)";
|
||||
final int EXPECTED_MAX_INT_FRAC_SIG = 999;
|
||||
final String expectedSubstring0 = "between 0 and 999 (inclusive)";
|
||||
final String expectedSubstring1 = "between 1 and 999 (inclusive)";
|
||||
final String expectedSubstringN1 = "between -1 and 999 (inclusive)";
|
||||
|
||||
// We require that the upper bounds all be 100 inclusive.
|
||||
// We require that the upper bounds all be 999 inclusive.
|
||||
// The lower bound may be either -1, 0, or 1.
|
||||
Set<String> methodsWithLowerBound1 = new HashSet();
|
||||
methodsWithLowerBound1.add("fixedDigits");
|
||||
|
@ -1936,6 +1927,12 @@ public class NumberFormatterApiTest {
|
|||
methodsWithLowerBound1.add("withMinDigits");
|
||||
methodsWithLowerBound1.add("withMaxDigits");
|
||||
methodsWithLowerBound1.add("withMinExponentDigits");
|
||||
// Methods with lower bound 0:
|
||||
// fixedFraction
|
||||
// minFraction
|
||||
// maxFraction
|
||||
// minMaxFraction
|
||||
// zeroFillTo
|
||||
Set<String> methodsWithLowerBoundN1 = new HashSet();
|
||||
methodsWithLowerBoundN1.add("truncateAt");
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@ import com.ibm.icu.impl.number.parse.IgnorablesMatcher;
|
|||
import com.ibm.icu.impl.number.parse.MinusSignMatcher;
|
||||
import com.ibm.icu.impl.number.parse.NumberParserImpl;
|
||||
import com.ibm.icu.impl.number.parse.ParsedNumber;
|
||||
import com.ibm.icu.impl.number.parse.ParsingUtils;
|
||||
import com.ibm.icu.impl.number.parse.PercentMatcher;
|
||||
import com.ibm.icu.impl.number.parse.PlusSignMatcher;
|
||||
import com.ibm.icu.impl.number.parse.SeriesMatcher;
|
||||
|
@ -191,7 +192,7 @@ public class NumberParserTest {
|
|||
int expectedOffset = (Integer) cas[1];
|
||||
boolean expectedMaybeMore = (Boolean) cas[2];
|
||||
|
||||
StringSegment segment = new StringSegment(input);
|
||||
StringSegment segment = new StringSegment(input, 0);
|
||||
ParsedNumber result = new ParsedNumber();
|
||||
boolean actualMaybeMore = series.match(segment, result);
|
||||
int actualOffset = segment.getOffset();
|
||||
|
@ -215,4 +216,39 @@ public class NumberParserTest {
|
|||
result.getNumber().doubleValue(),
|
||||
0.0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCaseFolding() {
|
||||
Object[][] cases = new Object[][] {
|
||||
// pattern, input string, case sensitive chars, case insensitive chars
|
||||
{ "0", "JP¥3456", 7, 7 },
|
||||
{ "0", "jp¥3456", 0, 0 }, // not to be accepted, even in case insensitive mode
|
||||
{ "A0", "A5", 2, 2 },
|
||||
{ "A0", "a5", 0, 2 },
|
||||
{ "0", "NaN", 3, 3 },
|
||||
{ "0", "nan", 0, 3 } };
|
||||
for (Object[] cas : cases) {
|
||||
String patternString = (String) cas[0];
|
||||
String inputString = (String) cas[1];
|
||||
int expectedCaseSensitiveChars = (Integer) cas[2];
|
||||
int expectedCaseFoldingChars = (Integer) cas[3];
|
||||
|
||||
NumberParserImpl caseSensitiveParser = NumberParserImpl
|
||||
.removeMeWhenMerged(ULocale.ENGLISH, patternString, ParsingUtils.PARSE_FLAG_OPTIMIZE);
|
||||
ParsedNumber result = new ParsedNumber();
|
||||
caseSensitiveParser.parse(inputString, true, result);
|
||||
assertEquals("Case-Sensitive: " + inputString + " on " + patternString,
|
||||
expectedCaseSensitiveChars,
|
||||
result.charEnd);
|
||||
|
||||
NumberParserImpl caseFoldingParser = NumberParserImpl.removeMeWhenMerged(ULocale.ENGLISH,
|
||||
patternString,
|
||||
ParsingUtils.PARSE_FLAG_IGNORE_CASE | ParsingUtils.PARSE_FLAG_OPTIMIZE);
|
||||
result = new ParsedNumber();
|
||||
caseFoldingParser.parse(inputString, true, result);
|
||||
assertEquals("Folded: " + inputString + " on " + patternString,
|
||||
expectedCaseFoldingChars,
|
||||
result.charEnd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,7 +17,7 @@ public class StringSegmentTest {
|
|||
|
||||
@Test
|
||||
public void testOffset() {
|
||||
StringSegment segment = new StringSegment(SAMPLE_STRING);
|
||||
StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
|
||||
assertEquals(0, segment.getOffset());
|
||||
segment.adjustOffset(3);
|
||||
assertEquals(3, segment.getOffset());
|
||||
|
@ -29,7 +29,7 @@ public class StringSegmentTest {
|
|||
|
||||
@Test
|
||||
public void testLength() {
|
||||
StringSegment segment = new StringSegment(SAMPLE_STRING);
|
||||
StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
|
||||
assertEquals(11, segment.length());
|
||||
segment.adjustOffset(3);
|
||||
assertEquals(8, segment.length());
|
||||
|
@ -43,7 +43,7 @@ public class StringSegmentTest {
|
|||
|
||||
@Test
|
||||
public void testCharAt() {
|
||||
StringSegment segment = new StringSegment(SAMPLE_STRING);
|
||||
StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
|
||||
assertCharSequenceEquals(SAMPLE_STRING, segment);
|
||||
segment.adjustOffset(3);
|
||||
assertCharSequenceEquals("radio 📻", segment);
|
||||
|
@ -53,20 +53,20 @@ public class StringSegmentTest {
|
|||
|
||||
@Test
|
||||
public void testGetCodePoint() {
|
||||
StringSegment segment = new StringSegment(SAMPLE_STRING);
|
||||
StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
|
||||
assertEquals(0x1F4FB, segment.getCodePoint());
|
||||
segment.setLength(1);
|
||||
assertEquals(-1, segment.getCodePoint());
|
||||
assertEquals(0xD83D, segment.getCodePoint());
|
||||
segment.resetLength();
|
||||
segment.adjustOffset(1);
|
||||
assertEquals(-1, segment.getCodePoint());
|
||||
assertEquals(0xDCFB, segment.getCodePoint());
|
||||
segment.adjustOffset(1);
|
||||
assertEquals(0x20, segment.getCodePoint());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCommonPrefixLength() {
|
||||
StringSegment segment = new StringSegment(SAMPLE_STRING);
|
||||
StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
|
||||
assertEquals(11, segment.getCommonPrefixLength(SAMPLE_STRING));
|
||||
assertEquals(4, segment.getCommonPrefixLength("📻 r"));
|
||||
assertEquals(3, segment.getCommonPrefixLength("📻 x"));
|
||||
|
|
|
@ -19,6 +19,7 @@ package com.ibm.icu.dev.test.rbbi;
|
|||
import java.text.CharacterIterator;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
|
@ -26,6 +27,7 @@ import org.junit.runners.JUnit4;
|
|||
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.text.BreakIterator;
|
||||
import com.ibm.icu.text.RBBIDataWrapper;
|
||||
import com.ibm.icu.text.RuleBasedBreakIterator;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
|
@ -562,4 +564,62 @@ public class RBBITest extends TestFmwk {
|
|||
String rtRules = bi.toString(); // getRules() in C++
|
||||
assertEquals("Break Iterator rule stripping test", "!!forward; $x = [ab#]; '#' '?'; ", rtRules);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestTableRedundancies() {
|
||||
RuleBasedBreakIterator bi = (RuleBasedBreakIterator)BreakIterator.getLineInstance(Locale.ENGLISH);
|
||||
String rules = bi.toString();
|
||||
bi = new RuleBasedBreakIterator(rules);
|
||||
// Build a break iterator from source rules.
|
||||
// Want to check the rule builder in Java, not the pre-built rules that are imported from ICU4C.
|
||||
RBBIDataWrapper dw = bi.fRData;
|
||||
short[] fwtbl = dw.fFTable;
|
||||
int numCharClasses = dw.fHeader.fCatCount;
|
||||
|
||||
// Check for duplicate columns (character categories)
|
||||
List<String> columns = new ArrayList<String>();
|
||||
for (int column=0; column<numCharClasses; column++) {
|
||||
StringBuilder s = new StringBuilder();
|
||||
for (int r = 1; r < dw.getStateTableNumStates(fwtbl); r++) {
|
||||
int row = dw.getRowIndex(r);
|
||||
short tableVal = fwtbl[row + RBBIDataWrapper.NEXTSTATES + column];
|
||||
s.append((char)tableVal);
|
||||
}
|
||||
columns.add(s.toString());
|
||||
}
|
||||
// Ignore column (char class) 0 while checking; it's special, and may have duplicates.
|
||||
for (int c1=1; c1<numCharClasses; c1++) {
|
||||
for (int c2 = c1+1; c2 < numCharClasses; c2++) {
|
||||
// assertFalse(String.format("Duplicate columns (%d, %d)", c1, c2), columns.get(c1).equals(columns.get(c2)));
|
||||
if (columns.get(c1).equals(columns.get(c2))) {
|
||||
System.out.printf("Duplicate columns (%d, %d)\n", c1, c2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for duplicate states.
|
||||
List<String> rows = new ArrayList<String>();
|
||||
for (int r=0; r<dw.getStateTableNumStates(fwtbl); r++) {
|
||||
StringBuilder s = new StringBuilder();
|
||||
int row = dw.getRowIndex(r);
|
||||
assertTrue("Accepting < -1", fwtbl[row + RBBIDataWrapper.ACCEPTING] >= -1);
|
||||
s.append(fwtbl[row + RBBIDataWrapper.ACCEPTING]);
|
||||
s.append(fwtbl[row + RBBIDataWrapper.LOOKAHEAD]);
|
||||
s.append(fwtbl[row + RBBIDataWrapper.TAGIDX]);
|
||||
for (int column=0; column<numCharClasses; column++) {
|
||||
short tableVal = fwtbl[row + RBBIDataWrapper.NEXTSTATES + column];
|
||||
s.append((char)tableVal);
|
||||
}
|
||||
rows.add(s.toString());
|
||||
}
|
||||
|
||||
for (int r1=0; r1 < dw.getStateTableNumStates(fwtbl); r1++) {
|
||||
for (int r2= r1+1; r2 < dw.getStateTableNumStates(fwtbl); r2++) {
|
||||
// assertFalse(String.format("Duplicate states (%d, %d)", r1, r2), rows.get(r1).equals(rows.get(r2)));
|
||||
if (rows.get(r1).equals(rows.get(r2))) {
|
||||
System.out.printf("Duplicate states (%d, %d)\n", r1, r2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue