ICU-13060 Fixing hash code iteration order issues (#13065) and signAlwaysShown parsing.

X-SVN-Rev: 39937
This commit is contained in:
Shane Carr 2017-03-25 07:41:42 +00:00
parent b58e1721a1
commit 7cfbb489bd
5 changed files with 246 additions and 102 deletions

View file

@ -95,45 +95,6 @@ public class AffixPatternUtils {
/** Represents a sequence of four or more currency symbols. */
public static final int TYPE_CURRENCY_OVERFLOW = -15;
/**
* Checks whether the specified affix pattern has any unquoted currency symbols ("¤").
*
* @param patternString The string to check for currency symbols.
* @return true if the literal has at least one unquoted currency symbol; false otherwise.
*/
public static boolean hasCurrencySymbols(CharSequence patternString) {
if (patternString == null) return false;
int offset = 0;
int state = STATE_BASE;
boolean result = false;
for (; offset < patternString.length(); ) {
int cp = Character.codePointAt(patternString, offset);
switch (state) {
case STATE_BASE:
if (cp == '¤') {
result = true;
} else if (cp == '\'') {
state = STATE_INSIDE_QUOTE;
}
break;
case STATE_INSIDE_QUOTE:
if (cp == '\'') {
state = STATE_BASE;
}
break;
default:
throw new AssertionError();
}
offset += Character.charCount(cp);
}
if (state == STATE_INSIDE_QUOTE) {
throw new IllegalArgumentException("Unterminated quote: \"" + patternString + "\"");
} else {
return result;
}
}
/**
* Estimates the number of code points present in an unescaped version of the affix pattern string
* (one that would be returned by {@link #unescape}), assuming that all interpolated symbols
@ -327,6 +288,70 @@ public class AffixPatternUtils {
}
}
/**
* Checks whether the given affix pattern contains at least one token of the given type, which is
* one of the constants "TYPE_" in {@link AffixPatternUtils}.
*
* @param affixPattern The affix pattern to check.
* @param type The token type.
* @return true if the affix pattern contains the given token type; false otherwise.
*/
public static boolean containsType(CharSequence affixPattern, int type) {
if (affixPattern == null || affixPattern.length() == 0) return false;
long tag = 0L;
while (hasNext(tag, affixPattern)) {
tag = nextToken(tag, affixPattern);
if (getTypeOrCp(tag) == type) {
return true;
}
}
return false;
}
/**
* Checks whether the specified affix pattern has any unquoted currency symbols ("¤").
*
* @param affixPattern The string to check for currency symbols.
* @return true if the literal has at least one unquoted currency symbol; false otherwise.
*/
public static boolean hasCurrencySymbols(CharSequence affixPattern) {
if (affixPattern == null || affixPattern.length() == 0) return false;
long tag = 0L;
while (hasNext(tag, affixPattern)) {
tag = nextToken(tag, affixPattern);
int typeOrCp = getTypeOrCp(tag);
if (typeOrCp == AffixPatternUtils.TYPE_CURRENCY_SINGLE
|| typeOrCp == AffixPatternUtils.TYPE_CURRENCY_DOUBLE
|| typeOrCp == AffixPatternUtils.TYPE_CURRENCY_TRIPLE
|| typeOrCp == AffixPatternUtils.TYPE_CURRENCY_OVERFLOW) {
return true;
}
}
return false;
}
/**
* Replaces all occurrences of tokens with the given type with the given replacement char.
*
* @param affixPattern The source affix pattern (does not get modified).
* @param type The token type.
* @param replacementChar The char to substitute in place of chars of the given token type.
* @return A string containing the new affix pattern.
*/
public static String replaceType(CharSequence affixPattern, int type, char replacementChar) {
if (affixPattern == null || affixPattern.length() == 0) return "";
char[] chars = affixPattern.toString().toCharArray();
long tag = 0L;
while (hasNext(tag, affixPattern)) {
tag = nextToken(tag, affixPattern);
if (getTypeOrCp(tag) == type) {
int offset = getOffset(tag);
chars[offset - 1] = replacementChar;
}
}
return new String(chars);
}
/**
* Returns the next token from the affix pattern.
*

View file

@ -116,10 +116,10 @@ public class PNAffixGenerator {
return getModifiersWithPlusSign(symbols, curr1, curr2, curr3, properties);
}
CharSequence ppp = properties.getPositivePrefixPattern();
CharSequence psp = properties.getPositiveSuffixPattern();
CharSequence npp = properties.getNegativePrefixPattern();
CharSequence nsp = properties.getNegativeSuffixPattern();
String ppp = properties.getPositivePrefixPattern();
String psp = properties.getPositiveSuffixPattern();
String npp = properties.getNegativePrefixPattern();
String nsp = properties.getNegativeSuffixPattern();
// Set sb1/sb2 to the positive prefix/suffix.
sb1.clear();
@ -151,10 +151,10 @@ public class PNAffixGenerator {
String curr3,
IProperties properties) {
CharSequence ppp = properties.getPositivePrefixPattern();
CharSequence psp = properties.getPositiveSuffixPattern();
CharSequence npp = properties.getNegativePrefixPattern();
CharSequence nsp = properties.getNegativeSuffixPattern();
String ppp = properties.getPositivePrefixPattern();
String psp = properties.getPositiveSuffixPattern();
String npp = properties.getNegativePrefixPattern();
String nsp = properties.getNegativeSuffixPattern();
// There are three cases, listed below with their expected outcomes.
// TODO: Should we handle the cases when the positive subpattern has a '+' already?

View file

@ -717,41 +717,53 @@ public class Parse {
static final AffixHolder EMPTY_POSITIVE = new AffixHolder("", "", true, false);
static final AffixHolder EMPTY_NEGATIVE = new AffixHolder("", "", true, true);
static final AffixHolder DEFAULT_POSITIVE = new AffixHolder("+", "", false, false);
static final AffixHolder DEFAULT_NEGATIVE = new AffixHolder("-", "", false, true);
static void addToState(ParserState state, IProperties properties) {
AffixHolder pp = fromPropertiesPositivePattern(properties);
AffixHolder np = fromPropertiesNegativePattern(properties);
AffixHolder ps = fromPropertiesPositiveString(properties);
AffixHolder ns = fromPropertiesNegativeString(properties);
if (pp == null && ps == null) {
if (properties.getSignAlwaysShown()) {
state.affixHolders.add(DEFAULT_POSITIVE);
} else {
state.affixHolders.add(EMPTY_POSITIVE);
}
} else {
if (pp != null) state.affixHolders.add(pp);
if (ps != null) state.affixHolders.add(ps);
}
if (np == null && ns == null) {
state.affixHolders.add(DEFAULT_NEGATIVE);
} else {
if (np != null) state.affixHolders.add(np);
if (ns != null) state.affixHolders.add(ns);
}
if (pp != null) state.affixHolders.add(pp);
if (ps != null) state.affixHolders.add(ps);
if (np != null) state.affixHolders.add(np);
if (ns != null) state.affixHolders.add(ns);
}
static AffixHolder fromPropertiesPositivePattern(IProperties properties) {
String ppp = properties.getPositivePrefixPattern();
String psp = properties.getPositiveSuffixPattern();
if (properties.getSignAlwaysShown()) {
// TODO: This logic is somewhat duplicated from PNAffixGenerator.
boolean foundSign = false;
String npp = properties.getNegativePrefixPattern();
String nsp = properties.getNegativeSuffixPattern();
if (AffixPatternUtils.containsType(npp, AffixPatternUtils.TYPE_MINUS_SIGN)) {
foundSign = true;
ppp = AffixPatternUtils.replaceType(npp, AffixPatternUtils.TYPE_MINUS_SIGN, '+');
}
if (AffixPatternUtils.containsType(nsp, AffixPatternUtils.TYPE_MINUS_SIGN)) {
foundSign = true;
psp = AffixPatternUtils.replaceType(nsp, AffixPatternUtils.TYPE_MINUS_SIGN, '+');
}
if (!foundSign) {
ppp = "+" + ppp;
}
}
return getInstance(ppp, psp, false, false);
}
static AffixHolder fromPropertiesNegativePattern(IProperties properties) {
String npp = properties.getNegativePrefixPattern();
String nsp = properties.getNegativeSuffixPattern();
if (npp == null && nsp == null) {
npp = properties.getPositivePrefixPattern();
nsp = properties.getPositiveSuffixPattern();
if (npp == null) {
npp = "-";
} else {
npp = "-" + npp;
}
}
return getInstance(npp, nsp, false, true);
}
@ -768,7 +780,7 @@ public class Parse {
}
static AffixHolder getInstance(String p, String s, boolean strings, boolean negative) {
if (p == null && s == null) return null;
if (p == null && s == null) return negative ? EMPTY_NEGATIVE : EMPTY_POSITIVE;
if (p == null) p = "";
if (s == null) s = "";
if (p.length() == 0 && s.length() == 0) return negative ? EMPTY_NEGATIVE : EMPTY_POSITIVE;
@ -1251,9 +1263,9 @@ public class Parse {
break;
case INSIDE_AFFIX_PATTERN:
acceptAffixPatternOffset(cp, state, item);
long added = acceptAffixPatternOffset(cp, state, item);
// Accept arbitrary bidi and whitespace (if lenient) in the middle of affixes.
if (state.length == 0 && isIgnorable(cp, state)) {
if (added == 0L && isIgnorable(cp, state)) {
state.getNext().copyFrom(item, item.name, cp);
}
break;
@ -1529,10 +1541,32 @@ public class Parse {
*/
private static void acceptMinusOrPlusSign(
int cp, StateName nextName, ParserState state, StateItem item, boolean exponent) {
acceptMinusOrPlusSign(cp, nextName, null, state, item, exponent);
acceptMinusSign(cp, nextName, null, state, item, exponent);
acceptPlusSign(cp, nextName, null, state, item, exponent);
}
private static void acceptMinusOrPlusSign(
private static long acceptMinusSign(
int cp,
StateName returnTo1,
StateName returnTo2,
ParserState state,
StateItem item,
boolean exponent) {
if (UNISET_MINUS.contains(cp)) {
StateItem next = state.getNext().copyFrom(item, returnTo1, -1);
next.returnTo1 = returnTo2;
if (exponent) {
next.sawNegativeExponent = true;
} else {
next.sawNegative = true;
}
return 1L << state.lastInsertedIndex();
} else {
return 0L;
}
}
private static long acceptPlusSign(
int cp,
StateName returnTo1,
StateName returnTo2,
@ -1542,14 +1576,9 @@ public class Parse {
if (UNISET_PLUS.contains(cp)) {
StateItem next = state.getNext().copyFrom(item, returnTo1, -1);
next.returnTo1 = returnTo2;
} else if (UNISET_MINUS.contains(cp)) {
StateItem next = state.getNext().copyFrom(item, returnTo1, -1);
next.returnTo1 = returnTo2;
if (exponent) {
next.sawNegativeExponent = true;
} else {
next.sawNegative = true;
}
return 1L << state.lastInsertedIndex();
} else {
return 0L;
}
}
@ -1700,29 +1729,33 @@ public class Parse {
// At most one item can be added upon consuming a string.
if (added != 0) {
int i = state.lastInsertedIndex();
// The following six lines are duplicated below; not enough for their own function.
state.getItem(i).affix = holder;
if (prefix) state.getItem(i).sawPrefix = true;
if (!prefix) state.getItem(i).sawSuffix = true;
if (holder.negative) state.getItem(i).sawNegative = true;
state.getItem(i).score++; // reward for consuming a prefix/suffix.
recordAffixHolder(holder, state.getItem(i), prefix);
}
} else {
long added = acceptAffixPattern(cp, nextName, state, item, str, 0);
// Multiple items can be added upon consuming an affix pattern.
for (int i = Long.numberOfTrailingZeros(added); (1L << i) <= added; i++) {
if (((1L << i) & added) != 0) {
// The following six lines are duplicated above; not enough for their own function.
state.getItem(i).affix = holder;
if (prefix) state.getItem(i).sawPrefix = true;
if (!prefix) state.getItem(i).sawSuffix = true;
if (holder.negative) state.getItem(i).sawNegative = true;
state.getItem(i).score++; // reward for consuming a prefix/suffix.
recordAffixHolder(holder, state.getItem(i), prefix);
}
}
}
}
private static void recordAffixHolder(AffixHolder holder, StateItem next, boolean prefix) {
next.affix = holder;
if (prefix) next.sawPrefix = true;
if (!prefix) next.sawSuffix = true;
if (holder.negative) next.sawNegative = true;
// 10 point reward for consuming a prefix/suffix:
next.score += 10;
// 1 point reward for positive holders (if there is ambiguity, we want to favor positive):
if (!holder.negative) next.score += 1;
// 5 point reward for affix holders that have an empty prefix or suffix (we won't see them again):
if (!next.sawPrefix && holder.p.isEmpty()) next.score += 5;
if (!next.sawSuffix && holder.s.isEmpty()) next.score += 5;
}
private static void acceptStringOffset(int cp, ParserState state, StateItem item) {
acceptString(
cp,
@ -1814,8 +1847,8 @@ public class Parse {
return 0L;
}
private static void acceptAffixPatternOffset(int cp, ParserState state, StateItem item) {
acceptAffixPattern(
private static long acceptAffixPatternOffset(int cp, ParserState state, StateItem item) {
return acceptAffixPattern(
cp, item.returnTo1, state, item, item.currentAffixPattern, item.currentStepwiseParserTag);
}
@ -1863,12 +1896,16 @@ public class Parse {
resolvedPlusSign = true;
break;
case AffixPatternUtils.TYPE_PERCENT:
resolvedCp = '%'; // accept ASCII percent as well as locale percent
resolvedStr = state.symbols.getPercentString();
if (resolvedStr.length() != 1 || resolvedStr.charAt(0) != '%') {
resolvedCp = '%'; // accept ASCII percent as well as locale percent
}
break;
case AffixPatternUtils.TYPE_PERMILLE:
resolvedCp = '‰'; // accept ASCII permille as well as locale permille
resolvedStr = state.symbols.getPerMillString();
if (resolvedStr.length() != 1 || resolvedStr.charAt(0) != '‰') {
resolvedCp = '‰'; // accept ASCII permille as well as locale permille
}
break;
case AffixPatternUtils.TYPE_CURRENCY_SINGLE:
case AffixPatternUtils.TYPE_CURRENCY_DOUBLE:
@ -1911,22 +1948,29 @@ public class Parse {
}
added |= 1L << state.lastInsertedIndex();
}
if (resolvedMinusSign || resolvedPlusSign) {
// Sign
if (resolvedMinusSign) {
if (hasNext) {
acceptMinusOrPlusSign(cp, StateName.INSIDE_AFFIX_PATTERN, returnTo, state, item, false);
added |= acceptMinusSign(cp, StateName.INSIDE_AFFIX_PATTERN, returnTo, state, item, false);
} else {
acceptMinusOrPlusSign(cp, returnTo, null, state, item, false);
added |= acceptMinusSign(cp, returnTo, null, state, item, false);
}
// Decide whether to accept a custom string
if (resolvedMinusSign) {
if (added == 0L) {
// Attempt to accept custom minus sign string
String mss = state.symbols.getMinusSignString();
int mssCp = Character.codePointAt(mss, 0);
if (mss.length() != Character.charCount(mssCp) || !UNISET_MINUS.contains(mssCp)) {
resolvedStr = mss;
}
}
if (resolvedPlusSign) {
}
if (resolvedPlusSign) {
if (hasNext) {
added |= acceptPlusSign(cp, StateName.INSIDE_AFFIX_PATTERN, returnTo, state, item, false);
} else {
added |= acceptPlusSign(cp, returnTo, null, state, item, false);
}
if (added == 0L) {
// Attempt to accept custom plus sign string
String pss = state.symbols.getPlusSignString();
int pssCp = Character.codePointAt(pss, 0);
if (pss.length() != Character.charCount(pssCp) || !UNISET_MINUS.contains(pssCp)) {

View file

@ -4302,6 +4302,10 @@ public class NumberFormatTest extends TestFmwk {
int end = iterator.getRunLimit();
Iterator it = iterator.getAttributes().keySet().iterator();
AttributedCharacterIterator.Attribute attribute = (AttributedCharacterIterator.Attribute) it.next();
// For positions with both INTEGER and GROUPING attributes, we want the GROUPING attribute.
if (it.hasNext() && attribute.equals(NumberFormat.Field.INTEGER)) {
attribute = (AttributedCharacterIterator.Attribute) it.next();
}
Object value = iterator.getAttribute(attribute);
result.add(new FieldContainer(start, end, attribute, value));
iterator.setIndex(end);
@ -5130,6 +5134,36 @@ public class NumberFormatTest extends TestFmwk {
assertEquals("Quote should be escapable in padding syntax", "a''12b", result);
}
@Test
public void testParseAmbiguousAffixes() {
BigDecimal positive = new BigDecimal("0.0567");
BigDecimal negative = new BigDecimal("-0.0567");
DecimalFormat df = new DecimalFormat();
df.setParseBigDecimal(true);
String[] patterns = { "+0.00%;-0.00%", "+0.00%;0.00%", "0.00%;-0.00%" };
String[] inputs = { "+5.67%", "-5.67%", "5.67%" };
boolean[][] expectedPositive = {
{ true, false, true },
{ true, false, false },
{ true, false, true }
};
for (int i=0; i<patterns.length; i++) {
String pattern = patterns[i];
df.applyPattern(pattern);
for (int j=0; j<inputs.length; j++) {
String input = inputs[j];
ParsePosition ppos = new ParsePosition(0);
Number actual = df.parse(input, ppos);
BigDecimal expected = expectedPositive[i][j] ? positive : negative;
String message = "Pattern " + pattern + " with input " + input;
assertEquals(message, expected, actual);
assertEquals(message, input.length(), ppos.getIndex());
}
}
}
@Test
public void testSignificantDigitsMode() {
String[][] allExpected = {
@ -5240,7 +5274,7 @@ public class NumberFormatTest extends TestFmwk {
}
@Test
public void testPlusSignAlwaysShown() {
public void testPlusSignAlwaysShown() throws ParseException {
double[] numbers = {0.012, 5.78, 0, -0.012, -5.78};
ULocale[] locs = {new ULocale("en-US"), new ULocale("ar-EG"), new ULocale("es-CL")};
String[][][] expecteds = {
@ -5296,6 +5330,19 @@ public class NumberFormatTest extends TestFmwk {
String exp = expecteds[i][j][k];
String act = df.format(d);
assertEquals("Locale " + loc + ", type " + j + ", " + d, exp, act);
BigDecimal parsedExp = BigDecimal.valueOf(d);
if (j == 1) {
// Currency-round expected parse output
int scale = (i == 2) ? 0 : 2;
parsedExp = parsedExp.setScale(scale, BigDecimal.ROUND_HALF_EVEN);
}
Number parsedNum = df.parse(exp);
BigDecimal parsedAct = (parsedNum.getClass() == BigDecimal.class)
? (BigDecimal) parsedNum
: BigDecimal.valueOf(parsedNum.doubleValue());
assertEquals(
"Locale " + loc + ", type " + j + ", " + d + ", " + parsedExp + " => " + parsedAct,
0, parsedExp.compareTo(parsedAct));
}
}
}

View file

@ -96,6 +96,34 @@ public class AffixPatternUtilsTest {
}
}
@Test
public void testContainsReplaceType() {
Object[][] cases = {
{"", false, ""},
{"-", true, "+"},
{"-a", true, "+a"},
{"a-", true, "a+"},
{"a-b", true, "a+b"},
{"--", true, "++"},
{"x", false, "x"}
};
for (Object[] cas : cases) {
String input = (String) cas[0];
boolean hasMinusSign = (Boolean) cas[1];
String output = (String) cas[2];
assertEquals(
"Contains on input " + input,
hasMinusSign,
AffixPatternUtils.containsType(input, AffixPatternUtils.TYPE_MINUS_SIGN));
assertEquals(
"Replace on input" + input,
output,
AffixPatternUtils.replaceType(input, AffixPatternUtils.TYPE_MINUS_SIGN, '+'));
}
}
@Test
public void testInvalid() {
String[] invalidExamples = {"'", "x'", "'x", "'x''", "''x'"};