ICU-13228 Adding more symbols to localized notation mapper function, including support for multi-char symbols.

X-SVN-Rev: 40185
This commit is contained in:
Shane Carr 2017-06-21 00:38:25 +00:00
parent 7351dbcf24
commit e9c5e5631b
5 changed files with 202 additions and 80 deletions

View file

@ -693,6 +693,18 @@ en #0% 0.4376 44%
// This next test breaks JDK. JDK doesn't multiply by 100.
fa \u0025\u00a0\u0023\u0030 0.4376 \u200e\u066a\u00a0\u06f4\u06f4 K
test localized pattern basic symbol coverage
begin
locale localizedPattern toPattern breaks
it #.##0,00 #,##0.00
// JDK either doesn't know sl uses this character for minus sign
// or doesn't support minus sign in localized pattern
sl #.##0;#.##0 #,##0;#,##0- K
// JDK does not have data for "×10^" in this locale
en_SE 0,00×10^0;0,00×10^0- 0.00E0;0.00E0- K
// JDK does not seem to transform the digits in localized patterns
ar_SA #\u066C##\u0660\u066B\u0660\u0660\u061Ba# #,##0.00;a#,##0.00 K
test toPattern
set locale en
begin

View file

@ -278,6 +278,11 @@ public class PatternString {
* pattern "0.000" means "decimal" in standard notation (as it does in every other locale), but it
* means "grouping" in localized notation.
*
* <p>A greedy string-substitution strategy is used to substitute locale symbols. If two symbols
* are ambiguous or have the same prefix, the result is not well-defined.
*
* <p>Locale symbols are not allowed to contain the ASCII quote character.
*
* @param input The pattern to convert.
* @param symbols The symbols corresponding to the localized pattern.
* @param toLocalized true to convert from standard to localized notation; false to convert from
@ -288,100 +293,136 @@ public class PatternString {
*/
@Deprecated
public static String convertLocalized(
CharSequence input, DecimalFormatSymbols symbols, boolean toLocalized) {
String input, DecimalFormatSymbols symbols, boolean toLocalized) {
if (input == null) return null;
/// This is not the prettiest function in the world, but it gets the job done. ///
// Construct a table of code points to be converted between localized and standard.
int[][] table = new int[6][2];
// Construct a table of strings to be converted between localized and standard.
String[][] table = new String[21][2];
int standIdx = toLocalized ? 0 : 1;
int localIdx = toLocalized ? 1 : 0;
table[0][standIdx] = '%';
table[0][localIdx] = symbols.getPercent();
table[1][standIdx] = '‰';
table[1][localIdx] = symbols.getPerMill();
table[2][standIdx] = '.';
table[2][localIdx] = symbols.getDecimalSeparator();
table[3][standIdx] = ',';
table[3][localIdx] = symbols.getGroupingSeparator();
table[4][standIdx] = '-';
table[4][localIdx] = symbols.getMinusSign();
table[5][standIdx] = '+';
table[5][localIdx] = symbols.getPlusSign();
table[0][standIdx] = "%";
table[0][localIdx] = symbols.getPercentString();
table[1][standIdx] = "";
table[1][localIdx] = symbols.getPerMillString();
table[2][standIdx] = ".";
table[2][localIdx] = symbols.getDecimalSeparatorString();
table[3][standIdx] = ",";
table[3][localIdx] = symbols.getGroupingSeparatorString();
table[4][standIdx] = "-";
table[4][localIdx] = symbols.getMinusSignString();
table[5][standIdx] = "+";
table[5][localIdx] = symbols.getPlusSignString();
table[6][standIdx] = ";";
table[6][localIdx] = Character.toString(symbols.getPatternSeparator());
table[7][standIdx] = "@";
table[7][localIdx] = Character.toString(symbols.getSignificantDigit());
table[8][standIdx] = "E";
table[8][localIdx] = symbols.getExponentSeparator();
table[9][standIdx] = "*";
table[9][localIdx] = Character.toString(symbols.getPadEscape());
table[10][standIdx] = "#";
table[10][localIdx] = Character.toString(symbols.getDigit());
for (int i = 0; i < 10; i++) {
table[11 + i][standIdx] = Character.toString((char) ('0' + i));
table[11 + i][localIdx] = symbols.getDigitStringsLocal()[i];
}
// Special case: localIdx characters are NOT allowed to be quotes, like in de_CH.
// Use '' instead.
// Special case: quotes are NOT allowed to be in any localIdx strings.
// Substitute them with '' instead.
for (int i = 0; i < table.length; i++) {
if (table[i][localIdx] == '\'') {
table[i][localIdx] = '';
}
table[i][localIdx] = table[i][localIdx].replace('\'', '');
}
// Iterate through the string and convert
int offset = 0;
int state = 0;
// Iterate through the string and convert.
// State table:
// 0 => base state
// 1 => first char inside a quoted sequence in input and output string
// 2 => inside a quoted sequence in input and output string
// 3 => first char after a close quote in input string;
// close quote still needs to be written to output string
// 4 => base state in input string; inside quoted sequence in output string
// 5 => first char inside a quoted sequence in input string;
// inside quoted sequence in output string
StringBuilder result = new StringBuilder();
for (; offset < input.length(); ) {
int cp = Character.codePointAt(input, offset);
int cpToAppend = cp;
int state = 0;
outer:
for (int offset = 0; offset < input.length(); offset++) {
char ch = input.charAt(offset);
if (state == 1 || state == 3 || state == 4) {
// Inside user-specified quote
if (cp == '\'') {
if (state == 1) {
state = 0;
} else if (state == 3) {
state = 2;
cpToAppend = -1;
} else {
state = 2;
}
}
} else {
// Base state or inside special character quote
if (cp == '\'') {
if (state == 2 && offset + 1 < input.length()) {
int nextCp = Character.codePointAt(input, offset + 1);
if (nextCp == '\'') {
// escaped quote
state = 4;
} else {
// begin user-specified quote sequence
// we are already in a quote sequence, so omit the opening quote
state = 3;
cpToAppend = -1;
}
} else {
state = 1;
}
// Handle a quote character (state shift)
if (ch == '\'') {
if (state == 0) {
result.append('\'');
state = 1;
continue;
} else if (state == 1) {
result.append('\'');
state = 0;
continue;
} else if (state == 2) {
state = 3;
continue;
} else if (state == 3) {
result.append('\'');
result.append('\'');
state = 1;
continue;
} else if (state == 4) {
state = 5;
continue;
} else {
boolean needsSpecialQuote = false;
for (int i = 0; i < table.length; i++) {
if (table[i][0] == cp) {
cpToAppend = table[i][1];
needsSpecialQuote = false; // in case an earlier translation triggered it
break;
} else if (table[i][1] == cp) {
needsSpecialQuote = true;
}
}
if (state == 0 && needsSpecialQuote) {
state = 2;
result.appendCodePoint('\'');
} else if (state == 2 && !needsSpecialQuote) {
state = 0;
result.appendCodePoint('\'');
}
assert state == 5;
result.append('\'');
result.append('\'');
state = 4;
continue;
}
}
if (cpToAppend != -1) {
result.appendCodePoint(cpToAppend);
if (state == 0 || state == 3 || state == 4) {
for (String[] pair : table) {
// Perform a greedy match on this symbol string
if (input.regionMatches(offset, pair[0], 0, pair[0].length())) {
// Skip ahead past this region for the next iteration
offset += pair[0].length() - 1;
if (state == 3 || state == 4) {
result.append('\'');
state = 0;
}
result.append(pair[1]);
continue outer;
}
}
// No replacement found. Check if a special quote is necessary
for (String[] pair : table) {
if (input.regionMatches(offset, pair[1], 0, pair[1].length())) {
if (state == 0) {
result.append('\'');
state = 4;
}
result.append(ch);
continue outer;
}
}
// Still nothing. Copy the char verbatim. (Add a close quote if necessary)
if (state == 3 || state == 4) {
result.append('\'');
state = 0;
}
result.append(ch);
} else {
assert state == 1 || state == 2 || state == 5;
result.append(ch);
state = 2;
}
offset += Character.charCount(cp);
}
if (state == 2) {
result.appendCodePoint('\'');
// Resolve final quotes
if (state == 3 || state == 4) {
result.append('\'');
state = 0;
}
if (state != 0) {
throw new IllegalArgumentException("Malformed localized pattern: unterminated quote");
}
return result.toString();
}

View file

@ -693,6 +693,18 @@ en #0% 0.4376 44%
// This next test breaks JDK. JDK doesn't multiply by 100.
fa \u0025\u00a0\u0023\u0030 0.4376 \u200e\u066a\u00a0\u06f4\u06f4 K
test localized pattern basic symbol coverage
begin
locale localizedPattern toPattern breaks
it #.##0,00 #,##0.00
// JDK either doesn't know sl uses this character for minus sign
// or doesn't support minus sign in localized pattern
sl #.##0;#.##0 #,##0;#,##0- K
// JDK does not have data for "×10^" in this locale
en_SE 0,00×10^0;0,00×10^0- 0.00E0;0.00E0- K
// JDK does not seem to transform the digits in localized patterns
ar_SA #\u066C##\u0660\u066B\u0660\u0660\u061Ba# #,##0.00;a#,##0.00 K
test toPattern
set locale en
begin

View file

@ -753,7 +753,10 @@ public class NumberFormatDataDrivenTest {
properties.setNegativeSuffix(tuple.negativeSuffix);
}
if (tuple.localizedPattern != null) {
// TODO
DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(tuple.locale);
String converted =
PatternString.convertLocalized(tuple.localizedPattern, symbols, false);
PatternString.parseToExistingProperties(converted, properties);
}
if (tuple.lenient != null) {
properties.setParseMode(tuple.lenient == 0 ? ParseMode.STRICT : ParseMode.LENIENT);

View file

@ -51,6 +51,7 @@ import com.ibm.icu.text.DecimalFormat;
import com.ibm.icu.text.DecimalFormat.PropertySetter;
import com.ibm.icu.text.DecimalFormat.SignificantDigitsMode;
import com.ibm.icu.text.DecimalFormatSymbols;
import com.ibm.icu.text.DecimalFormat_ICU58;
import com.ibm.icu.text.DisplayContext;
import com.ibm.icu.text.MeasureFormat;
import com.ibm.icu.text.NumberFormat;
@ -1597,6 +1598,59 @@ public class NumberFormatTest extends TestFmwk {
}
}
@Test
public void TestLocalizedPatternSymbolCoverage() {
String[] standardPatterns = { "#,##0.05+%;#,##0.05-%", "* @@@E0‰" };
String[] standardPatterns58 = { "#,##0.05+%;#,##0.05-%", "* @@@E0‰;* -@@@E0‰" };
String[] localizedPatterns = { "▰⁖▰▰໐⁘໐໕†⁜⁙▰⁖▰▰໐⁘໐໕‡⁜", "⁂ ⁕⁕⁕⁑⁑໐‱" };
String[] localizedPatterns58 = { "▰⁖▰▰໐⁘໐໕+⁜⁙▰⁖▰▰໐⁘໐໕‡⁜", "⁂ ⁕⁕⁕⁑⁑໐‱⁙⁂ ‡⁕⁕⁕⁑⁑໐‱" };
DecimalFormatSymbols dfs = new DecimalFormatSymbols();
dfs.setGroupingSeparator('⁖');
dfs.setDecimalSeparator('⁘');
dfs.setPatternSeparator('⁙');
dfs.setDigit('▰');
dfs.setZeroDigit('');
dfs.setSignificantDigit('⁕');
dfs.setPlusSign('†');
dfs.setMinusSign('‡');
dfs.setPercent('⁜');
dfs.setPerMill('‱');
dfs.setExponentSeparator("⁑⁑"); // tests multi-char sequence
dfs.setPadEscape('⁂');
for (int i=0; i<2; i++) {
String standardPattern = standardPatterns[i];
String standardPattern58 = standardPatterns58[i];
String localizedPattern = localizedPatterns[i];
String localizedPattern58 = localizedPatterns58[i];
DecimalFormat df1 = new DecimalFormat("#", dfs);
df1.applyPattern(standardPattern);
DecimalFormat df2 = new DecimalFormat("#", dfs);
df2.applyLocalizedPattern(localizedPattern);
assertEquals("DecimalFormat instances should be equal",
df1, df2);
assertEquals("toPattern should match on localizedPattern instance",
standardPattern, df2.toPattern());
assertEquals("toLocalizedPattern should match on standardPattern instance",
localizedPattern, df1.toLocalizedPattern());
// Note: ICU 58 does not support plus signs in patterns
// Note: ICU 58 always prints the negative part of scientific notation patterns,
// even when the negative part is not necessary
DecimalFormat_ICU58 df3 = new DecimalFormat_ICU58("#", dfs);
df3.applyPattern(standardPattern); // Reading standardPattern is OK
DecimalFormat_ICU58 df4 = new DecimalFormat_ICU58("#", dfs);
df4.applyLocalizedPattern(localizedPattern58);
// Note: DecimalFormat#equals() is broken on ICU 58
assertEquals("toPattern should match on ICU58 localizedPattern instance",
standardPattern58, df4.toPattern());
assertEquals("toLocalizedPattern should match on ICU58 standardPattern instance",
localizedPattern58, df3.toLocalizedPattern());
}
}
@Test
public void TestParseNull() throws ParseException {
DecimalFormat df = new DecimalFormat();