ICU-13513 Removing ICU 59 parsing code.

X-SVN-Rev: 40814
This commit is contained in:
Shane Carr 2018-01-27 11:45:00 +00:00
parent f698c8814b
commit 519d2a5a86
11 changed files with 69 additions and 2817 deletions

View file

@ -14,7 +14,6 @@ import java.util.List;
import java.util.ListIterator;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
/**
@ -120,85 +119,6 @@ public class TextTrieMap<V> {
_root.putLeadCodePoints(output);
}
/**
* Creates an object that consumes code points one at a time and returns intermediate prefix
* matches. Returns null if no match exists.
*
* @return An instance of {@link ParseState}, or null if the starting code point is not a
* prefix for any entry in the trie.
*/
public ParseState openParseState(int startingCp) {
// Check to see whether this is a valid starting character. If not, return null.
if (_ignoreCase) {
startingCp = UCharacter.foldCase(startingCp, true);
}
int count = Character.charCount(startingCp);
char ch1 = (count == 1) ? (char) startingCp : UTF16.getLeadSurrogate(startingCp);
if (!_root.hasChildFor(ch1)) {
return null;
}
return new ParseState(_root);
}
/**
* ParseState is mutable, not thread-safe, and intended to be used internally by parsers for
* consuming values from this trie.
*/
public class ParseState {
private Node node;
private int offset;
private Node.StepResult result;
ParseState(Node start) {
node = start;
offset = 0;
result = start.new StepResult();
}
/**
* Consumes a code point and walk to the next node in the trie.
*
* @param cp The code point to consume.
*/
public void accept(int cp) {
assert node != null;
if (_ignoreCase) {
cp = UCharacter.foldCase(cp, true);
}
int count = Character.charCount(cp);
char ch1 = (count == 1) ? (char) cp : UTF16.getLeadSurrogate(cp);
node.takeStep(ch1, offset, result);
if (count == 2 && result.node != null) {
char ch2 = UTF16.getTrailSurrogate(cp);
result.node.takeStep(ch2, result.offset, result);
}
node = result.node;
offset = result.offset;
}
/**
* Gets the exact prefix matches for all code points that have been consumed so far.
*
* @return The matches.
*/
public Iterator<V> getCurrentMatches() {
if (node != null && offset == node.charCount()) {
return node.values();
}
return null;
}
/**
* Checks whether any more code points can be consumed.
*
* @return true if no more code points can be consumed; false otherwise.
*/
public boolean atEnd() {
return node == null || (node.charCount() == offset && node._children == null);
}
}
public static class CharIterator implements Iterator<Character> {
private boolean _ignoreCase;
private CharSequence _text;
@ -332,17 +252,6 @@ public class TextTrieMap<V> {
return _text == null ? 0 : _text.length;
}
public boolean hasChildFor(char ch) {
for (int i=0; _children != null && i < _children.size(); i++) {
Node child = _children.get(i);
if (ch < child._text[0]) break;
if (ch == child._text[0]) {
return true;
}
}
return false;
}
public Iterator<V> values() {
if (_values == null) {
return null;
@ -405,37 +314,6 @@ public class TextTrieMap<V> {
}
}
public class StepResult {
public Node node;
public int offset;
}
public void takeStep(char ch, int offset, StepResult result) {
assert offset <= charCount();
if (offset == charCount()) {
// Go to a child node
for (int i=0; _children != null && i < _children.size(); i++) {
Node child = _children.get(i);
if (ch < child._text[0]) break;
if (ch == child._text[0]) {
// Found a matching child node
result.node = child;
result.offset = 1;
return;
}
}
// No matching children; fall through
} else if (_text[offset] == ch) {
// Return to this node; increase offset
result.node = this;
result.offset = offset + 1;
return;
}
// No matches
result.node = null;
result.offset = -1;
return;
}
private void add(char[] text, int offset, V value) {
if (text.length == offset) {
_values = addValue(_values, value);

View file

@ -16,8 +16,7 @@ import java.util.ArrayList;
import java.util.Map;
import com.ibm.icu.impl.number.Padder.PadPosition;
import com.ibm.icu.impl.number.Parse.GroupingMode;
import com.ibm.icu.impl.number.Parse.ParseMode;
import com.ibm.icu.impl.number.parse.NumberParserImpl.ParseMode;
import com.ibm.icu.text.CompactDecimalFormat.CompactStyle;
import com.ibm.icu.text.CurrencyPluralInfo;
import com.ibm.icu.text.PluralRules;
@ -73,7 +72,6 @@ public class DecimalFormatProperties implements Cloneable, Serializable {
private transient PadPosition padPosition;
private transient String padString;
private transient boolean parseCaseSensitive;
private transient GroupingMode parseGroupingMode;
private transient boolean parseIntegerOnly;
private transient ParseMode parseMode;
private transient boolean parseNoExponent;
@ -145,7 +143,6 @@ public class DecimalFormatProperties implements Cloneable, Serializable {
padPosition = null;
padString = null;
parseCaseSensitive = false;
parseGroupingMode = null;
parseIntegerOnly = false;
parseMode = null;
parseNoExponent = false;
@ -191,7 +188,6 @@ public class DecimalFormatProperties implements Cloneable, Serializable {
padPosition = other.padPosition;
padString = other.padString;
parseCaseSensitive = other.parseCaseSensitive;
parseGroupingMode = other.parseGroupingMode;
parseIntegerOnly = other.parseIntegerOnly;
parseMode = other.parseMode;
parseNoExponent = other.parseNoExponent;
@ -238,7 +234,6 @@ public class DecimalFormatProperties implements Cloneable, Serializable {
eq = eq && _equalsHelper(padPosition, other.padPosition);
eq = eq && _equalsHelper(padString, other.padString);
eq = eq && _equalsHelper(parseCaseSensitive, other.parseCaseSensitive);
eq = eq && _equalsHelper(parseGroupingMode, other.parseGroupingMode);
eq = eq && _equalsHelper(parseIntegerOnly, other.parseIntegerOnly);
eq = eq && _equalsHelper(parseMode, other.parseMode);
eq = eq && _equalsHelper(parseNoExponent, other.parseNoExponent);
@ -301,7 +296,6 @@ public class DecimalFormatProperties implements Cloneable, Serializable {
hashCode ^= _hashCodeHelper(padPosition);
hashCode ^= _hashCodeHelper(padString);
hashCode ^= _hashCodeHelper(parseCaseSensitive);
hashCode ^= _hashCodeHelper(parseGroupingMode);
hashCode ^= _hashCodeHelper(parseIntegerOnly);
hashCode ^= _hashCodeHelper(parseMode);
hashCode ^= _hashCodeHelper(parseNoExponent);
@ -484,10 +478,6 @@ public class DecimalFormatProperties implements Cloneable, Serializable {
return parseCaseSensitive;
}
public GroupingMode getParseGroupingMode() {
return parseGroupingMode;
}
public boolean getParseIntegerOnly() {
return parseIntegerOnly;
}
@ -1082,34 +1072,6 @@ public class DecimalFormatProperties implements Cloneable, Serializable {
return this;
}
/**
* Sets the strategy used during parsing when a code point needs to be interpreted as either a
* decimal separator or a grouping separator.
*
* <p>
* The comma, period, space, and apostrophe have different meanings in different locales. For
* example, in <em>en-US</em> and most American locales, the period is used as a decimal separator,
* but in <em>es-PY</em> and most European locales, it is used as a grouping separator.
*
* <p>
* Suppose you are in <em>fr-FR</em> the parser encounters the string "1.234". In <em>fr-FR</em>, the
* grouping is a space and the decimal is a comma. The <em>grouping mode</em> is a mechanism to let
* you specify whether to accept the string as 1234 (GroupingMode.DEFAULT) or whether to reject it
* since the separators don't match (GroupingMode.RESTRICTED).
*
* <p>
* When resolving grouping separators, it is the <em>equivalence class</em> of separators that is
* considered. For example, a period is seen as equal to a fixed set of other period-like characters.
*
* @param parseGroupingMode
* The {@link GroupingMode} to use; either DEFAULT or RESTRICTED.
* @return The property bag, for chaining.
*/
public DecimalFormatProperties setParseGroupingMode(GroupingMode parseGroupingMode) {
this.parseGroupingMode = parseGroupingMode;
return this;
}
/**
* Whether to ignore the fractional part of numbers. For example, parses "123.4" to "123" instead of
* "123.4".
@ -1150,8 +1112,8 @@ public class DecimalFormatProperties implements Cloneable, Serializable {
}
/**
* Whether to always return a BigDecimal from {@link Parse#parse} and all other parse methods. By
* default, a Long or a BigInteger are returned when possible.
* Whether to always return a BigDecimal from parse methods. By default, a Long or a BigInteger are
* returned when possible.
*
* @param parseToBigDecimal
* true to always return a BigDecimal; false to return a Long or a BigInteger when

File diff suppressed because it is too large Load diff

View file

@ -12,7 +12,6 @@ import com.ibm.icu.impl.number.AffixPatternProvider;
import com.ibm.icu.impl.number.AffixUtils;
import com.ibm.icu.impl.number.CustomSymbolCurrency;
import com.ibm.icu.impl.number.DecimalFormatProperties;
import com.ibm.icu.impl.number.Parse.ParseMode;
import com.ibm.icu.impl.number.PatternStringParser;
import com.ibm.icu.impl.number.PatternStringParser.ParsedPatternInfo;
import com.ibm.icu.impl.number.PropertiesAffixPatternProvider;
@ -31,6 +30,43 @@ import com.ibm.icu.util.ULocale;
*
*/
public class NumberParserImpl {
// TODO: Find a better place for this enum.
/** Controls the set of rules for parsing a string. */
public static enum ParseMode {
/**
* Lenient mode should be used if you want to accept malformed user input. It will use
* heuristics to attempt to parse through typographical errors in the string.
*/
LENIENT,
/**
* Strict mode should be used if you want to require that the input is well-formed. More
* specifically, it differs from lenient mode in the following ways:
*
* <ul>
* <li>Grouping widths must match the grouping settings. For example, "12,3,45" will fail if
* the grouping width is 3, as in the pattern "#,##0".
* <li>The string must contain a complete prefix and suffix. For example, if the pattern is
* "{#};(#)", then "{123}" or "(123)" would match, but "{123", "123}", and "123" would all
* fail. (The latter strings would be accepted in lenient mode.)
* <li>Whitespace may not appear at arbitrary places in the string. In lenient mode,
* whitespace is allowed to occur arbitrarily before and after prefixes and exponent
* separators.
* <li>Leading grouping separators are not allowed, as in ",123".
* <li>Minus and plus signs can only appear if specified in the pattern. In lenient mode, a
* plus or minus sign can always precede a number.
* <li>The set of characters that can be interpreted as a decimal or grouping separator is
* smaller.
* <li><strong>If currency parsing is enabled,</strong> currencies must only appear where
* specified in either the current pattern string or in a valid pattern string for the
* current locale. For example, if the pattern is "¤0.00", then "$1.23" would match, but
* "1.23$" would fail to match.
* </ul>
*/
STRICT,
}
@Deprecated
public static NumberParserImpl createParserFromPattern(
ULocale locale,

View file

@ -15,10 +15,10 @@ import java.text.ParsePosition;
import com.ibm.icu.impl.number.AffixUtils;
import com.ibm.icu.impl.number.DecimalFormatProperties;
import com.ibm.icu.impl.number.Padder.PadPosition;
import com.ibm.icu.impl.number.Parse;
import com.ibm.icu.impl.number.PatternStringParser;
import com.ibm.icu.impl.number.PatternStringUtils;
import com.ibm.icu.impl.number.parse.NumberParserImpl;
import com.ibm.icu.impl.number.parse.NumberParserImpl.ParseMode;
import com.ibm.icu.impl.number.parse.ParsedNumber;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.math.BigDecimal;
@ -2162,7 +2162,7 @@ public class DecimalFormat extends NumberFormat {
*/
@Override
public synchronized boolean isParseStrict() {
return properties.getParseMode() == Parse.ParseMode.STRICT;
return properties.getParseMode() == ParseMode.STRICT;
}
/**
@ -2173,7 +2173,7 @@ public class DecimalFormat extends NumberFormat {
*/
@Override
public synchronized void setParseStrict(boolean parseStrict) {
Parse.ParseMode mode = parseStrict ? Parse.ParseMode.STRICT : Parse.ParseMode.LENIENT;
ParseMode mode = parseStrict ? ParseMode.STRICT : ParseMode.LENIENT;
properties.setParseMode(mode);
refreshFormatter();
}

View file

@ -13,7 +13,7 @@ import java.text.AttributedCharacterIterator.Attribute;
import java.text.CharacterIterator;
import java.util.Map;
import com.ibm.icu.impl.number.Parse;
import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.util.ULocale;
@ -230,14 +230,14 @@ public final class ScientificNumberFormatter {
int start = iterator.getRunStart(NumberFormat.Field.EXPONENT_SIGN);
int limit = iterator.getRunLimit(NumberFormat.Field.EXPONENT_SIGN);
int aChar = char32AtAndAdvance(iterator);
if (Parse.UNISET_MINUS.contains(aChar)) {
if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.MINUS_SIGN).contains(aChar)) {
append(
iterator,
copyFromOffset,
start,
result);
result.append(SUPERSCRIPT_MINUS_SIGN);
} else if (Parse.UNISET_PLUS.contains(aChar)) {
} else if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.PLUS_SIGN).contains(aChar)) {
append(
iterator,
copyFromOffset,

View file

@ -756,21 +756,6 @@ public class Currency extends MeasureUnit {
}
}
/**
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
public static TextTrieMap<CurrencyStringInfo>.ParseState openParseState(
ULocale locale, int startingCp, int type) {
List<TextTrieMap<CurrencyStringInfo>> currencyTrieVec = getCurrencyTrieVec(locale);
if (type == Currency.LONG_NAME) {
return currencyTrieVec.get(1).openParseState(startingCp);
} else {
return currencyTrieVec.get(0).openParseState(startingCp);
}
}
private static List<TextTrieMap<CurrencyStringInfo>> getCurrencyTrieVec(ULocale locale) {
List<TextTrieMap<CurrencyStringInfo>> currencyTrieVec = CURRENCY_NAME_CACHE.get(locale);
if (currencyTrieVec == null) {

View file

@ -754,9 +754,9 @@ parse output breaks
+1,234,567.8901 1234567.8901
+1,23,4567.8901 1234567.8901
// P supports grouping separators in the fraction; none of the others do.
+1,23,4567.89,01 1234567.8901 CJKS
+1,23,4567.89,01 1234567.8901 CJK
+1,23,456.78.9 123456.78
+12.34,56 12.3456 CJKS
+12.34,56 12.3456 CJK
+79,,20,3 79203
+79 20 3 79203 K
// Parsing stops at comma as it is different from other separators
@ -862,7 +862,7 @@ parse output breaks
+1,234.5 1234.5
// Comma after decimal means a fractional grouping separator
// P fails since it finds an invalid grouping size
+1,23,456.78,9 123456.789 JKPS
+1,23,456.78,9 123456.789 JKP
// C and J fail upon seeing the second decimal point
+1,23,456.78.9 123456.78 CJ
+79 79
@ -997,8 +997,8 @@ parse output breaks
123.456 123456
123,456 123.456
// The separator after the comma can be inrepreted as a fractional grouping
987,654.321 987.654321 CJKS
987,654 321 987.654321 CJKS
987,654.321 987.654321 CJK
987,654 321 987.654321 CJK
987.654,321 987654.321
test select
@ -1188,17 +1188,17 @@ $53.45 fail USD J
USD 53.45 53.45 USD J
53.45USD 53.45 USD CJ
USD53.45 53.45 USD
// S fails these because '(' is an incomplete prefix.
(7.92) USD -7.92 USD CJSP
(7.92) GBP -7.92 GBP CJSP
(7.926) USD -7.926 USD CJSP
(7.926 USD) -7.926 USD CJSP
// P fails these because '(' is an incomplete prefix.
(7.92) USD -7.92 USD CJP
(7.92) GBP -7.92 GBP CJP
(7.926) USD -7.926 USD CJP
(7.926 USD) -7.926 USD CJP
(USD 7.926) -7.926 USD J
USD (7.926) -7.926 USD CJSP
USD (7.92) -7.92 USD CJSP
(7.92)USD -7.92 USD CJSP
USD(7.92) -7.92 USD CJSP
(8) USD -8 USD CJSP
USD (7.926) -7.926 USD CJP
USD (7.92) -7.92 USD CJP
(7.92)USD -7.92 USD CJP
USD(7.92) -7.92 USD CJP
(8) USD -8 USD CJP
-8 USD -8 USD C
67 USD 67 USD C
53.45$ fail USD
@ -1429,8 +1429,8 @@ NaN NaN K
1E2147483646 1E2147483646
1E-2147483649 0
1E-2147483648 0
// S and P return zero here
1E-2147483647 1E-2147483647 SP
// P returns zero here
1E-2147483647 1E-2147483647 P
1E-2147483646 1E-2147483646
test format push limits
@ -1460,7 +1460,7 @@ set pattern #,##0
begin
parse output breaks
// K and J return null; S, C, and P return 99
9 9 9 CJKSP
9 9 9 CJKP
// K returns null
9 999 9999 K
@ -1504,7 +1504,7 @@ y g h56 -56 JK
56i jk -56 CJK
56i jk -56 CJK
// S and C get 56 (accepts ' ' gs grouping); J and K get null
5 6 fail CSP
5 6 fail CP
56 5 JK
test parse spaces in grouping
@ -1515,8 +1515,8 @@ set pattern #,##0
begin
parse output breaks
// C, J, S, and P get "12" here
1 2 1 CJSP
1 23 1 CJSP
1 2 1 CJP
1 23 1 CJP
// K gets 1 here; doesn't pick up the grouping separator
1 234 1234 K

View file

@ -4,7 +4,6 @@ package com.ibm.icu.dev.test.format;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.text.ParseException;
import java.text.ParsePosition;
import org.junit.Test;
@ -12,11 +11,10 @@ import org.junit.Test;
import com.ibm.icu.dev.test.TestUtil;
import com.ibm.icu.impl.number.DecimalFormatProperties;
import com.ibm.icu.impl.number.Padder.PadPosition;
import com.ibm.icu.impl.number.Parse;
import com.ibm.icu.impl.number.Parse.ParseMode;
import com.ibm.icu.impl.number.PatternStringParser;
import com.ibm.icu.impl.number.PatternStringUtils;
import com.ibm.icu.impl.number.parse.NumberParserImpl;
import com.ibm.icu.impl.number.parse.NumberParserImpl.ParseMode;
import com.ibm.icu.number.LocalizedNumberFormatter;
import com.ibm.icu.number.NumberFormatter;
import com.ibm.icu.text.DecimalFormat;
@ -727,98 +725,6 @@ public class NumberFormatDataDrivenTest {
return null;
}
/**
* Runs a single parse test. On success, returns null. On failure, returns the error. This implementation just
* returns null. Subclasses should override.
*
* @param tuple
* contains the parameters of the format test.
*/
@Override
public String parse(DataDrivenNumberFormatTestData tuple) {
String pattern = (tuple.pattern == null) ? "0" : tuple.pattern;
DecimalFormatProperties properties;
ParsePosition ppos = new ParsePosition(0);
Number actual;
try {
properties = PatternStringParser.parseToProperties(
pattern,
tuple.currency != null ? PatternStringParser.IGNORE_ROUNDING_ALWAYS
: PatternStringParser.IGNORE_ROUNDING_NEVER);
propertiesFromTuple(tuple, properties);
actual = Parse.parse(tuple.parse, ppos, properties, DecimalFormatSymbols.getInstance(tuple.locale));
} catch (IllegalArgumentException e) {
return "parse exception: " + e.getMessage();
}
if (actual == null && ppos.getIndex() != 0) {
throw new AssertionError("Error: value is null but parse position is not zero");
}
if (ppos.getIndex() == 0) {
return "Parse failed; got " + actual + ", but expected " + tuple.output;
}
if (tuple.output.equals("NaN")) {
if (!Double.isNaN(actual.doubleValue())) {
return "Expected NaN, but got: " + actual;
}
return null;
} else if (tuple.output.equals("Inf")) {
if (!Double.isInfinite(actual.doubleValue()) || Double.compare(actual.doubleValue(), 0.0) < 0) {
return "Expected Inf, but got: " + actual;
}
return null;
} else if (tuple.output.equals("-Inf")) {
if (!Double.isInfinite(actual.doubleValue()) || Double.compare(actual.doubleValue(), 0.0) > 0) {
return "Expected -Inf, but got: " + actual;
}
return null;
} else if (tuple.output.equals("fail")) {
return null;
} else if (new BigDecimal(tuple.output).compareTo(new BigDecimal(actual.toString())) != 0) {
return "Expected: " + tuple.output + ", got: " + actual;
} else {
return null;
}
}
/**
* Runs a single parse currency test. On success, returns null. On failure, returns the error. This
* implementation just returns null. Subclasses should override.
*
* @param tuple
* contains the parameters of the format test.
*/
@Override
public String parseCurrency(DataDrivenNumberFormatTestData tuple) {
String pattern = (tuple.pattern == null) ? "0" : tuple.pattern;
DecimalFormatProperties properties;
ParsePosition ppos = new ParsePosition(0);
CurrencyAmount actual;
try {
properties = PatternStringParser.parseToProperties(
pattern,
tuple.currency != null ? PatternStringParser.IGNORE_ROUNDING_ALWAYS
: PatternStringParser.IGNORE_ROUNDING_NEVER);
propertiesFromTuple(tuple, properties);
actual = Parse
.parseCurrency(tuple.parse, ppos, properties, DecimalFormatSymbols.getInstance(tuple.locale));
} catch (ParseException e) {
e.printStackTrace();
return "parse exception: " + e.getMessage();
}
if (ppos.getIndex() == 0 || actual.getCurrency().getCurrencyCode().equals("XXX")) {
return "Parse failed; got " + actual + ", but expected " + tuple.output;
}
BigDecimal expectedNumber = new BigDecimal(tuple.output);
if (expectedNumber.compareTo(new BigDecimal(actual.getNumber().toString())) != 0) {
return "Wrong number: Expected: " + expectedNumber + ", got: " + actual;
}
String expectedCurrency = tuple.outputCurrency;
if (!expectedCurrency.equals(actual.getCurrency().toString())) {
return "Wrong currency: Expected: " + expectedCurrency + ", got: " + actual;
}
return null;
}
/**
* Runs a single select test. On success, returns null. On failure, returns the error. This implementation just
* returns null. Subclasses should override.

View file

@ -31,9 +31,8 @@ import org.junit.Test;
import com.ibm.icu.dev.test.serializable.SerializableTestUtility;
import com.ibm.icu.impl.number.DecimalFormatProperties;
import com.ibm.icu.impl.number.Padder.PadPosition;
import com.ibm.icu.impl.number.Parse.GroupingMode;
import com.ibm.icu.impl.number.Parse.ParseMode;
import com.ibm.icu.impl.number.PatternStringParser;
import com.ibm.icu.impl.number.parse.NumberParserImpl.ParseMode;
import com.ibm.icu.text.CompactDecimalFormat.CompactStyle;
import com.ibm.icu.text.CurrencyPluralInfo;
import com.ibm.icu.text.MeasureFormat.FormatWidth;
@ -252,12 +251,6 @@ public class PropertiesTest {
CurrencyUsage[] values = CurrencyUsage.values();
return values[seed % values.length];
} else if (type == GroupingMode.class) {
if (seed == 0)
return null;
GroupingMode[] values = GroupingMode.values();
return values[seed % values.length];
} else if (type == FormatWidth.class) {
if (seed == 0)
return null;

View file

@ -85,67 +85,6 @@ public class TextTrieMapTest extends TestFmwk {
{"l📺", null, SUP1},
};
private static final Object[][] TESTCASES_PARSE = {
{
"Sunday",
new Object[]{
new Object[]{SAT,SUN}, new Object[]{SAT,SUN}, // matches on "S"
null, null, // matches on "Su"
SUN, SUN, // matches on "Sun"
null, null, // matches on "Sund"
null, null, // matches on "Sunda"
SUN, SUN, // matches on "Sunday"
}
},
{
"sunday",
new Object[]{
null, new Object[]{SAT,SUN}, // matches on "s"
null, null, // matches on "su"
null, SUN, // matches on "sun"
null, null, // matches on "sund"
null, null, // matches on "sunda"
null, SUN, // matches on "sunday"
}
},
{
"MMM",
new Object[]{
MON, MON, // matches on "M"
// no more matches in data
}
},
{
"BBB",
new Object[]{
// no matches in data
}
},
{
"l📺12",
new Object[]{
null, null, // matches on "L"
null, SUP1, // matches on "L📺"
null, SUP2, // matches on "L📺1"
// no more matches in data
}
},
{
"L📻",
new Object[] {
null, null, // matches on "L"
SUP3, SUP3, // matches on "L📻"
}
},
{
"L🃏",
new Object[] {
null, null, // matches on "L"
SUP4, SUP4, // matches on "L🃏"
}
}
};
@Test
public void TestCaseSensitive() {
Iterator itr = null;
@ -172,13 +111,6 @@ public class TextTrieMapTest extends TestFmwk {
checkResult("get(String, int) case " + i, itr, TESTCASES[i][1]);
}
logln("Test for ParseState");
for (int i = 0; i < TESTCASES_PARSE.length; i++) {
String test = (String) TESTCASES_PARSE[i][0];
Object[] expecteds = (Object[]) TESTCASES_PARSE[i][1];
checkParse(map, test, expecteds, true);
}
logln("Test for partial match");
for (Object[] cas : TESTDATA) {
String str = (String) cas[0];
@ -239,13 +171,6 @@ public class TextTrieMapTest extends TestFmwk {
checkResult("get(String, int) case " + i, itr, TESTCASES[i][2]);
}
logln("Test for ParseState");
for (int i = 0; i < TESTCASES_PARSE.length; i++) {
String test = (String) TESTCASES_PARSE[i][0];
Object[] expecteds = (Object[]) TESTCASES_PARSE[i][1];
checkParse(map, test, expecteds, false);
}
logln("Test for partial match");
for (Object[] cas : TESTDATA) {
String str = (String) cas[0];
@ -279,54 +204,6 @@ public class TextTrieMapTest extends TestFmwk {
checkResult("Get Sunday", itr, new Object[]{SUN, FOO, BAR});
}
private void checkParse(TextTrieMap map, String text, Object[] rawExpecteds, boolean caseSensitive) {
// rawExpecteds has even-valued indices for case sensitive and odd-valued indicies for case insensitive
// Get out only the values that we want.
Object[] expecteds = null;
for (int i=rawExpecteds.length/2-1; i>=0; i--) {
int j = i*2+(caseSensitive?0:1);
if (rawExpecteds[j] != null) {
if (expecteds == null) {
expecteds = new Object[i+1];
}
expecteds[i] = rawExpecteds[j];
}
}
if (expecteds == null) {
expecteds = new Object[0];
}
TextTrieMap.ParseState state = null;
for (int charOffset=0, cpOffset=0; charOffset < text.length(); cpOffset++) {
int cp = Character.codePointAt(text, charOffset);
if (state == null) {
state = map.openParseState(cp);
}
if (state == null) {
assertEquals("Expected matches, but no matches are available", 0, expecteds.length);
break;
}
state.accept(cp);
if (cpOffset < expecteds.length - 1) {
assertFalse(
"In middle of parse sequence, but atEnd() is true: '" + text + "' offset " + charOffset,
state.atEnd());
} else if (cpOffset == expecteds.length) {
// Note: it possible for atEnd() to be either true or false at expecteds.length - 1;
// if true, we are at the end of the input string; if false, there is still input string
// left to be consumed, but we don't know if there are remaining matches.
assertTrue(
"At end of parse sequence, but atEnd() is false: '" + text + "' offset " + charOffset,
state.atEnd());
break;
}
Object expected = expecteds[cpOffset];
Iterator actual = state.getCurrentMatches();
checkResult("ParseState '" + text + "' offset " + charOffset, actual, expected);
charOffset += Character.charCount(cp);
}
}
private boolean eql(Object o1, Object o2) {
if (o1 == null || o2 == null) {
if (o1 == null && o2 == null) {