mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-20 20:19:32 +00:00
ICU-13228 Adding more symbols to localized notation mapper function, including support for multi-char symbols.
X-SVN-Rev: 40185
This commit is contained in:
parent
7351dbcf24
commit
e9c5e5631b
5 changed files with 202 additions and 80 deletions
|
@ -693,6 +693,18 @@ en #0% 0.4376 44%
|
|||
// This next test breaks JDK. JDK doesn't multiply by 100.
|
||||
fa \u0025\u00a0\u0023\u0030 0.4376 \u200e\u066a\u00a0\u06f4\u06f4 K
|
||||
|
||||
test localized pattern basic symbol coverage
|
||||
begin
|
||||
locale localizedPattern toPattern breaks
|
||||
it #.##0,00 #,##0.00
|
||||
// JDK either doesn't know sl uses this character for minus sign
|
||||
// or doesn't support minus sign in localized pattern
|
||||
sl #.##0;#.##0− #,##0;#,##0- K
|
||||
// JDK does not have data for "×10^" in this locale
|
||||
en_SE 0,00×10^0;0,00×10^0- 0.00E0;0.00E0- K
|
||||
// JDK does not seem to transform the digits in localized patterns
|
||||
ar_SA #\u066C##\u0660\u066B\u0660\u0660\u061Ba# #,##0.00;a#,##0.00 K
|
||||
|
||||
test toPattern
|
||||
set locale en
|
||||
begin
|
||||
|
|
|
@ -278,6 +278,11 @@ public class PatternString {
|
|||
* pattern "0.000" means "decimal" in standard notation (as it does in every other locale), but it
|
||||
* means "grouping" in localized notation.
|
||||
*
|
||||
* <p>A greedy string-substitution strategy is used to substitute locale symbols. If two symbols
|
||||
* are ambiguous or have the same prefix, the result is not well-defined.
|
||||
*
|
||||
* <p>Locale symbols are not allowed to contain the ASCII quote character.
|
||||
*
|
||||
* @param input The pattern to convert.
|
||||
* @param symbols The symbols corresponding to the localized pattern.
|
||||
* @param toLocalized true to convert from standard to localized notation; false to convert from
|
||||
|
@ -288,100 +293,136 @@ public class PatternString {
|
|||
*/
|
||||
@Deprecated
|
||||
public static String convertLocalized(
|
||||
CharSequence input, DecimalFormatSymbols symbols, boolean toLocalized) {
|
||||
String input, DecimalFormatSymbols symbols, boolean toLocalized) {
|
||||
if (input == null) return null;
|
||||
|
||||
/// This is not the prettiest function in the world, but it gets the job done. ///
|
||||
|
||||
// Construct a table of code points to be converted between localized and standard.
|
||||
int[][] table = new int[6][2];
|
||||
// Construct a table of strings to be converted between localized and standard.
|
||||
String[][] table = new String[21][2];
|
||||
int standIdx = toLocalized ? 0 : 1;
|
||||
int localIdx = toLocalized ? 1 : 0;
|
||||
table[0][standIdx] = '%';
|
||||
table[0][localIdx] = symbols.getPercent();
|
||||
table[1][standIdx] = '‰';
|
||||
table[1][localIdx] = symbols.getPerMill();
|
||||
table[2][standIdx] = '.';
|
||||
table[2][localIdx] = symbols.getDecimalSeparator();
|
||||
table[3][standIdx] = ',';
|
||||
table[3][localIdx] = symbols.getGroupingSeparator();
|
||||
table[4][standIdx] = '-';
|
||||
table[4][localIdx] = symbols.getMinusSign();
|
||||
table[5][standIdx] = '+';
|
||||
table[5][localIdx] = symbols.getPlusSign();
|
||||
table[0][standIdx] = "%";
|
||||
table[0][localIdx] = symbols.getPercentString();
|
||||
table[1][standIdx] = "‰";
|
||||
table[1][localIdx] = symbols.getPerMillString();
|
||||
table[2][standIdx] = ".";
|
||||
table[2][localIdx] = symbols.getDecimalSeparatorString();
|
||||
table[3][standIdx] = ",";
|
||||
table[3][localIdx] = symbols.getGroupingSeparatorString();
|
||||
table[4][standIdx] = "-";
|
||||
table[4][localIdx] = symbols.getMinusSignString();
|
||||
table[5][standIdx] = "+";
|
||||
table[5][localIdx] = symbols.getPlusSignString();
|
||||
table[6][standIdx] = ";";
|
||||
table[6][localIdx] = Character.toString(symbols.getPatternSeparator());
|
||||
table[7][standIdx] = "@";
|
||||
table[7][localIdx] = Character.toString(symbols.getSignificantDigit());
|
||||
table[8][standIdx] = "E";
|
||||
table[8][localIdx] = symbols.getExponentSeparator();
|
||||
table[9][standIdx] = "*";
|
||||
table[9][localIdx] = Character.toString(symbols.getPadEscape());
|
||||
table[10][standIdx] = "#";
|
||||
table[10][localIdx] = Character.toString(symbols.getDigit());
|
||||
for (int i = 0; i < 10; i++) {
|
||||
table[11 + i][standIdx] = Character.toString((char) ('0' + i));
|
||||
table[11 + i][localIdx] = symbols.getDigitStringsLocal()[i];
|
||||
}
|
||||
|
||||
// Special case: localIdx characters are NOT allowed to be quotes, like in de_CH.
|
||||
// Use '’' instead.
|
||||
// Special case: quotes are NOT allowed to be in any localIdx strings.
|
||||
// Substitute them with '’' instead.
|
||||
for (int i = 0; i < table.length; i++) {
|
||||
if (table[i][localIdx] == '\'') {
|
||||
table[i][localIdx] = '’';
|
||||
}
|
||||
table[i][localIdx] = table[i][localIdx].replace('\'', '’');
|
||||
}
|
||||
|
||||
// Iterate through the string and convert
|
||||
int offset = 0;
|
||||
int state = 0;
|
||||
// Iterate through the string and convert.
|
||||
// State table:
|
||||
// 0 => base state
|
||||
// 1 => first char inside a quoted sequence in input and output string
|
||||
// 2 => inside a quoted sequence in input and output string
|
||||
// 3 => first char after a close quote in input string;
|
||||
// close quote still needs to be written to output string
|
||||
// 4 => base state in input string; inside quoted sequence in output string
|
||||
// 5 => first char inside a quoted sequence in input string;
|
||||
// inside quoted sequence in output string
|
||||
StringBuilder result = new StringBuilder();
|
||||
for (; offset < input.length(); ) {
|
||||
int cp = Character.codePointAt(input, offset);
|
||||
int cpToAppend = cp;
|
||||
int state = 0;
|
||||
outer:
|
||||
for (int offset = 0; offset < input.length(); offset++) {
|
||||
char ch = input.charAt(offset);
|
||||
|
||||
if (state == 1 || state == 3 || state == 4) {
|
||||
// Inside user-specified quote
|
||||
if (cp == '\'') {
|
||||
if (state == 1) {
|
||||
state = 0;
|
||||
} else if (state == 3) {
|
||||
state = 2;
|
||||
cpToAppend = -1;
|
||||
} else {
|
||||
state = 2;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Base state or inside special character quote
|
||||
if (cp == '\'') {
|
||||
if (state == 2 && offset + 1 < input.length()) {
|
||||
int nextCp = Character.codePointAt(input, offset + 1);
|
||||
if (nextCp == '\'') {
|
||||
// escaped quote
|
||||
state = 4;
|
||||
} else {
|
||||
// begin user-specified quote sequence
|
||||
// we are already in a quote sequence, so omit the opening quote
|
||||
state = 3;
|
||||
cpToAppend = -1;
|
||||
}
|
||||
} else {
|
||||
state = 1;
|
||||
}
|
||||
// Handle a quote character (state shift)
|
||||
if (ch == '\'') {
|
||||
if (state == 0) {
|
||||
result.append('\'');
|
||||
state = 1;
|
||||
continue;
|
||||
} else if (state == 1) {
|
||||
result.append('\'');
|
||||
state = 0;
|
||||
continue;
|
||||
} else if (state == 2) {
|
||||
state = 3;
|
||||
continue;
|
||||
} else if (state == 3) {
|
||||
result.append('\'');
|
||||
result.append('\'');
|
||||
state = 1;
|
||||
continue;
|
||||
} else if (state == 4) {
|
||||
state = 5;
|
||||
continue;
|
||||
} else {
|
||||
boolean needsSpecialQuote = false;
|
||||
for (int i = 0; i < table.length; i++) {
|
||||
if (table[i][0] == cp) {
|
||||
cpToAppend = table[i][1];
|
||||
needsSpecialQuote = false; // in case an earlier translation triggered it
|
||||
break;
|
||||
} else if (table[i][1] == cp) {
|
||||
needsSpecialQuote = true;
|
||||
}
|
||||
}
|
||||
if (state == 0 && needsSpecialQuote) {
|
||||
state = 2;
|
||||
result.appendCodePoint('\'');
|
||||
} else if (state == 2 && !needsSpecialQuote) {
|
||||
state = 0;
|
||||
result.appendCodePoint('\'');
|
||||
}
|
||||
assert state == 5;
|
||||
result.append('\'');
|
||||
result.append('\'');
|
||||
state = 4;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (cpToAppend != -1) {
|
||||
result.appendCodePoint(cpToAppend);
|
||||
|
||||
if (state == 0 || state == 3 || state == 4) {
|
||||
for (String[] pair : table) {
|
||||
// Perform a greedy match on this symbol string
|
||||
if (input.regionMatches(offset, pair[0], 0, pair[0].length())) {
|
||||
// Skip ahead past this region for the next iteration
|
||||
offset += pair[0].length() - 1;
|
||||
if (state == 3 || state == 4) {
|
||||
result.append('\'');
|
||||
state = 0;
|
||||
}
|
||||
result.append(pair[1]);
|
||||
continue outer;
|
||||
}
|
||||
}
|
||||
// No replacement found. Check if a special quote is necessary
|
||||
for (String[] pair : table) {
|
||||
if (input.regionMatches(offset, pair[1], 0, pair[1].length())) {
|
||||
if (state == 0) {
|
||||
result.append('\'');
|
||||
state = 4;
|
||||
}
|
||||
result.append(ch);
|
||||
continue outer;
|
||||
}
|
||||
}
|
||||
// Still nothing. Copy the char verbatim. (Add a close quote if necessary)
|
||||
if (state == 3 || state == 4) {
|
||||
result.append('\'');
|
||||
state = 0;
|
||||
}
|
||||
result.append(ch);
|
||||
} else {
|
||||
assert state == 1 || state == 2 || state == 5;
|
||||
result.append(ch);
|
||||
state = 2;
|
||||
}
|
||||
offset += Character.charCount(cp);
|
||||
}
|
||||
if (state == 2) {
|
||||
result.appendCodePoint('\'');
|
||||
// Resolve final quotes
|
||||
if (state == 3 || state == 4) {
|
||||
result.append('\'');
|
||||
state = 0;
|
||||
}
|
||||
if (state != 0) {
|
||||
throw new IllegalArgumentException("Malformed localized pattern: unterminated quote");
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
|
|
@ -693,6 +693,18 @@ en #0% 0.4376 44%
|
|||
// This next test breaks JDK. JDK doesn't multiply by 100.
|
||||
fa \u0025\u00a0\u0023\u0030 0.4376 \u200e\u066a\u00a0\u06f4\u06f4 K
|
||||
|
||||
test localized pattern basic symbol coverage
|
||||
begin
|
||||
locale localizedPattern toPattern breaks
|
||||
it #.##0,00 #,##0.00
|
||||
// JDK either doesn't know sl uses this character for minus sign
|
||||
// or doesn't support minus sign in localized pattern
|
||||
sl #.##0;#.##0− #,##0;#,##0- K
|
||||
// JDK does not have data for "×10^" in this locale
|
||||
en_SE 0,00×10^0;0,00×10^0- 0.00E0;0.00E0- K
|
||||
// JDK does not seem to transform the digits in localized patterns
|
||||
ar_SA #\u066C##\u0660\u066B\u0660\u0660\u061Ba# #,##0.00;a#,##0.00 K
|
||||
|
||||
test toPattern
|
||||
set locale en
|
||||
begin
|
||||
|
|
|
@ -753,7 +753,10 @@ public class NumberFormatDataDrivenTest {
|
|||
properties.setNegativeSuffix(tuple.negativeSuffix);
|
||||
}
|
||||
if (tuple.localizedPattern != null) {
|
||||
// TODO
|
||||
DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(tuple.locale);
|
||||
String converted =
|
||||
PatternString.convertLocalized(tuple.localizedPattern, symbols, false);
|
||||
PatternString.parseToExistingProperties(converted, properties);
|
||||
}
|
||||
if (tuple.lenient != null) {
|
||||
properties.setParseMode(tuple.lenient == 0 ? ParseMode.STRICT : ParseMode.LENIENT);
|
||||
|
|
|
@ -51,6 +51,7 @@ import com.ibm.icu.text.DecimalFormat;
|
|||
import com.ibm.icu.text.DecimalFormat.PropertySetter;
|
||||
import com.ibm.icu.text.DecimalFormat.SignificantDigitsMode;
|
||||
import com.ibm.icu.text.DecimalFormatSymbols;
|
||||
import com.ibm.icu.text.DecimalFormat_ICU58;
|
||||
import com.ibm.icu.text.DisplayContext;
|
||||
import com.ibm.icu.text.MeasureFormat;
|
||||
import com.ibm.icu.text.NumberFormat;
|
||||
|
@ -1597,6 +1598,59 @@ public class NumberFormatTest extends TestFmwk {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestLocalizedPatternSymbolCoverage() {
|
||||
String[] standardPatterns = { "#,##0.05+%;#,##0.05-%", "* @@@E0‰" };
|
||||
String[] standardPatterns58 = { "#,##0.05+%;#,##0.05-%", "* @@@E0‰;* -@@@E0‰" };
|
||||
String[] localizedPatterns = { "▰⁖▰▰໐⁘໐໕†⁜⁙▰⁖▰▰໐⁘໐໕‡⁜", "⁂ ⁕⁕⁕⁑⁑໐‱" };
|
||||
String[] localizedPatterns58 = { "▰⁖▰▰໐⁘໐໕+⁜⁙▰⁖▰▰໐⁘໐໕‡⁜", "⁂ ⁕⁕⁕⁑⁑໐‱⁙⁂ ‡⁕⁕⁕⁑⁑໐‱" };
|
||||
|
||||
DecimalFormatSymbols dfs = new DecimalFormatSymbols();
|
||||
dfs.setGroupingSeparator('⁖');
|
||||
dfs.setDecimalSeparator('⁘');
|
||||
dfs.setPatternSeparator('⁙');
|
||||
dfs.setDigit('▰');
|
||||
dfs.setZeroDigit('໐');
|
||||
dfs.setSignificantDigit('⁕');
|
||||
dfs.setPlusSign('†');
|
||||
dfs.setMinusSign('‡');
|
||||
dfs.setPercent('⁜');
|
||||
dfs.setPerMill('‱');
|
||||
dfs.setExponentSeparator("⁑⁑"); // tests multi-char sequence
|
||||
dfs.setPadEscape('⁂');
|
||||
|
||||
for (int i=0; i<2; i++) {
|
||||
String standardPattern = standardPatterns[i];
|
||||
String standardPattern58 = standardPatterns58[i];
|
||||
String localizedPattern = localizedPatterns[i];
|
||||
String localizedPattern58 = localizedPatterns58[i];
|
||||
|
||||
DecimalFormat df1 = new DecimalFormat("#", dfs);
|
||||
df1.applyPattern(standardPattern);
|
||||
DecimalFormat df2 = new DecimalFormat("#", dfs);
|
||||
df2.applyLocalizedPattern(localizedPattern);
|
||||
assertEquals("DecimalFormat instances should be equal",
|
||||
df1, df2);
|
||||
assertEquals("toPattern should match on localizedPattern instance",
|
||||
standardPattern, df2.toPattern());
|
||||
assertEquals("toLocalizedPattern should match on standardPattern instance",
|
||||
localizedPattern, df1.toLocalizedPattern());
|
||||
|
||||
// Note: ICU 58 does not support plus signs in patterns
|
||||
// Note: ICU 58 always prints the negative part of scientific notation patterns,
|
||||
// even when the negative part is not necessary
|
||||
DecimalFormat_ICU58 df3 = new DecimalFormat_ICU58("#", dfs);
|
||||
df3.applyPattern(standardPattern); // Reading standardPattern is OK
|
||||
DecimalFormat_ICU58 df4 = new DecimalFormat_ICU58("#", dfs);
|
||||
df4.applyLocalizedPattern(localizedPattern58);
|
||||
// Note: DecimalFormat#equals() is broken on ICU 58
|
||||
assertEquals("toPattern should match on ICU58 localizedPattern instance",
|
||||
standardPattern58, df4.toPattern());
|
||||
assertEquals("toLocalizedPattern should match on ICU58 standardPattern instance",
|
||||
localizedPattern58, df3.toLocalizedPattern());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestParseNull() throws ParseException {
|
||||
DecimalFormat df = new DecimalFormat();
|
||||
|
|
Loading…
Add table
Reference in a new issue