ICU-13228 Adding more symbols to localized notation mapper function, including support for multi-char symbols.

X-SVN-Rev: 40185
2025-04-20 20:19:32 +00:00 · 2017-06-21 00:38:25 +00:00 · 2017-06-21 00:38:25 +00:00 · e9c5e5631b
commit e9c5e5631b
parent 7351dbcf24
5 changed files with 202 additions and 80 deletions
--- a/icu4c/source/test/testdata/numberformattestspecification.txt
+++ b/icu4c/source/test/testdata/numberformattestspecification.txt
@ -693,6 +693,18 @@ en	#0%	0.4376	44%
 // This next test breaks JDK. JDK doesn't multiply by 100.
 fa	\u0025\u00a0\u0023\u0030	0.4376	\u200e\u066a\u00a0\u06f4\u06f4	K

+test localized pattern basic symbol coverage
+begin
+locale	localizedPattern	toPattern	breaks
+it	#.##0,00	#,##0.00
+// JDK either doesn't know sl uses this character for minus sign
+// or doesn't support minus sign in localized pattern
+sl	#.##0;#.##0−	#,##0;#,##0-	K
+// JDK does not have data for "×10^" in this locale
+en_SE	0,00×10^0;0,00×10^0-	0.00E0;0.00E0-	K
+// JDK does not seem to transform the digits in localized patterns
+ar_SA	#\u066C##\u0660\u066B\u0660\u0660\u061Ba#	#,##0.00;a#,##0.00	K
+
 test toPattern
 set locale en
 begin
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/PatternString.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/PatternString.java
@ -278,6 +278,11 @@ public class PatternString {
   * pattern "0.000" means "decimal" in standard notation (as it does in every other locale), but it
   * means "grouping" in localized notation.
   *
+   * <p>A greedy string-substitution strategy is used to substitute locale symbols. If two symbols
+   * are ambiguous or have the same prefix, the result is not well-defined.
+   *
+   * <p>Locale symbols are not allowed to contain the ASCII quote character.
+   *
   * @param input The pattern to convert.
   * @param symbols The symbols corresponding to the localized pattern.
   * @param toLocalized true to convert from standard to localized notation; false to convert from
@ -288,100 +293,136 @@ public class PatternString {
   */
  @Deprecated
  public static String convertLocalized(
-      CharSequence input, DecimalFormatSymbols symbols, boolean toLocalized) {
+      String input, DecimalFormatSymbols symbols, boolean toLocalized) {
    if (input == null) return null;

-    /// This is not the prettiest function in the world, but it gets the job done. ///
-
-    // Construct a table of code points to be converted between localized and standard.
-    int[][] table = new int[6][2];
+    // Construct a table of strings to be converted between localized and standard.
+    String[][] table = new String[21][2];
    int standIdx = toLocalized ? 0 : 1;
    int localIdx = toLocalized ? 1 : 0;
-    table[0][standIdx] = '%';
-    table[0][localIdx] = symbols.getPercent();
-    table[1][standIdx] = '‰';
-    table[1][localIdx] = symbols.getPerMill();
-    table[2][standIdx] = '.';
-    table[2][localIdx] = symbols.getDecimalSeparator();
-    table[3][standIdx] = ',';
-    table[3][localIdx] = symbols.getGroupingSeparator();
-    table[4][standIdx] = '-';
-    table[4][localIdx] = symbols.getMinusSign();
-    table[5][standIdx] = '+';
-    table[5][localIdx] = symbols.getPlusSign();
+    table[0][standIdx] = "%";
+    table[0][localIdx] = symbols.getPercentString();
+    table[1][standIdx] = "‰";
+    table[1][localIdx] = symbols.getPerMillString();
+    table[2][standIdx] = ".";
+    table[2][localIdx] = symbols.getDecimalSeparatorString();
+    table[3][standIdx] = ",";
+    table[3][localIdx] = symbols.getGroupingSeparatorString();
+    table[4][standIdx] = "-";
+    table[4][localIdx] = symbols.getMinusSignString();
+    table[5][standIdx] = "+";
+    table[5][localIdx] = symbols.getPlusSignString();
+    table[6][standIdx] = ";";
+    table[6][localIdx] = Character.toString(symbols.getPatternSeparator());
+    table[7][standIdx] = "@";
+    table[7][localIdx] = Character.toString(symbols.getSignificantDigit());
+    table[8][standIdx] = "E";
+    table[8][localIdx] = symbols.getExponentSeparator();
+    table[9][standIdx] = "*";
+    table[9][localIdx] = Character.toString(symbols.getPadEscape());
+    table[10][standIdx] = "#";
+    table[10][localIdx] = Character.toString(symbols.getDigit());
+    for (int i = 0; i < 10; i++) {
+      table[11 + i][standIdx] = Character.toString((char) ('0' + i));
+      table[11 + i][localIdx] = symbols.getDigitStringsLocal()[i];
+    }

-    // Special case: localIdx characters are NOT allowed to be quotes, like in de_CH.
-    // Use '’' instead.
+    // Special case: quotes are NOT allowed to be in any localIdx strings.
+    // Substitute them with '’' instead.
    for (int i = 0; i < table.length; i++) {
-      if (table[i][localIdx] == '\'') {
-        table[i][localIdx] = '’';
-      }
+      table[i][localIdx] = table[i][localIdx].replace('\'', '’');
    }

-    // Iterate through the string and convert
-    int offset = 0;
-    int state = 0;
+    // Iterate through the string and convert.
+    // State table:
+    //  0 => base state
+    //  1 => first char inside a quoted sequence in input and output string
+    //  2 => inside a quoted sequence in input and output string
+    //  3 => first char after a close quote in input string;
+    //       close quote still needs to be written to output string
+    //  4 => base state in input string; inside quoted sequence in output string
+    //  5 => first char inside a quoted sequence in input string;
+    //       inside quoted sequence in output string
    StringBuilder result = new StringBuilder();
-    for (; offset < input.length(); ) {
-      int cp = Character.codePointAt(input, offset);
-      int cpToAppend = cp;
+    int state = 0;
+    outer:
+    for (int offset = 0; offset < input.length(); offset++) {
+      char ch = input.charAt(offset);

-      if (state == 1 || state == 3 || state == 4) {
-        // Inside user-specified quote
-        if (cp == '\'') {
-          if (state == 1) {
-            state = 0;
-          } else if (state == 3) {
-            state = 2;
-            cpToAppend = -1;
-          } else {
-            state = 2;
-          }
-        }
-      } else {
-        // Base state or inside special character quote
-        if (cp == '\'') {
-          if (state == 2 && offset + 1 < input.length()) {
-            int nextCp = Character.codePointAt(input, offset + 1);
-            if (nextCp == '\'') {
-              // escaped quote
-              state = 4;
-            } else {
-              // begin user-specified quote sequence
-              // we are already in a quote sequence, so omit the opening quote
-              state = 3;
-              cpToAppend = -1;
-            }
-          } else {
-            state = 1;
-          }
+      // Handle a quote character (state shift)
+      if (ch == '\'') {
+        if (state == 0) {
+          result.append('\'');
+          state = 1;
+          continue;
+        } else if (state == 1) {
+          result.append('\'');
+          state = 0;
+          continue;
+        } else if (state == 2) {
+          state = 3;
+          continue;
+        } else if (state == 3) {
+          result.append('\'');
+          result.append('\'');
+          state = 1;
+          continue;
+        } else if (state == 4) {
+          state = 5;
+          continue;
        } else {
-          boolean needsSpecialQuote = false;
-          for (int i = 0; i < table.length; i++) {
-            if (table[i][0] == cp) {
-              cpToAppend = table[i][1];
-              needsSpecialQuote = false; // in case an earlier translation triggered it
-              break;
-            } else if (table[i][1] == cp) {
-              needsSpecialQuote = true;
-            }
-          }
-          if (state == 0 && needsSpecialQuote) {
-            state = 2;
-            result.appendCodePoint('\'');
-          } else if (state == 2 && !needsSpecialQuote) {
-            state = 0;
-            result.appendCodePoint('\'');
-          }
+          assert state == 5;
+          result.append('\'');
+          result.append('\'');
+          state = 4;
+          continue;
        }
      }
-      if (cpToAppend != -1) {
-        result.appendCodePoint(cpToAppend);
+
+      if (state == 0 || state == 3 || state == 4) {
+        for (String[] pair : table) {
+          // Perform a greedy match on this symbol string
+          if (input.regionMatches(offset, pair[0], 0, pair[0].length())) {
+            // Skip ahead past this region for the next iteration
+            offset += pair[0].length() - 1;
+            if (state == 3 || state == 4) {
+              result.append('\'');
+              state = 0;
+            }
+            result.append(pair[1]);
+            continue outer;
+          }
+        }
+        // No replacement found.  Check if a special quote is necessary
+        for (String[] pair : table) {
+          if (input.regionMatches(offset, pair[1], 0, pair[1].length())) {
+            if (state == 0) {
+              result.append('\'');
+              state = 4;
+            }
+            result.append(ch);
+            continue outer;
+          }
+        }
+        // Still nothing.  Copy the char verbatim.  (Add a close quote if necessary)
+        if (state == 3 || state == 4) {
+          result.append('\'');
+          state = 0;
+        }
+        result.append(ch);
+      } else {
+        assert state == 1 || state == 2 || state == 5;
+        result.append(ch);
+        state = 2;
      }
-      offset += Character.charCount(cp);
    }
-    if (state == 2) {
-      result.appendCodePoint('\'');
+    // Resolve final quotes
+    if (state == 3 || state == 4) {
+      result.append('\'');
+      state = 0;
+    }
+    if (state != 0) {
+      throw new IllegalArgumentException("Malformed localized pattern: unterminated quote");
    }
    return result.toString();
  }
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/data/numberformattestspecification.txt
@ -693,6 +693,18 @@ en	#0%	0.4376	44%
 // This next test breaks JDK. JDK doesn't multiply by 100.
 fa	\u0025\u00a0\u0023\u0030	0.4376	\u200e\u066a\u00a0\u06f4\u06f4	K

+test localized pattern basic symbol coverage
+begin
+locale	localizedPattern	toPattern	breaks
+it	#.##0,00	#,##0.00
+// JDK either doesn't know sl uses this character for minus sign
+// or doesn't support minus sign in localized pattern
+sl	#.##0;#.##0−	#,##0;#,##0-	K
+// JDK does not have data for "×10^" in this locale
+en_SE	0,00×10^0;0,00×10^0-	0.00E0;0.00E0-	K
+// JDK does not seem to transform the digits in localized patterns
+ar_SA	#\u066C##\u0660\u066B\u0660\u0660\u061Ba#	#,##0.00;a#,##0.00	K
+
 test toPattern
 set locale en
 begin
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatDataDrivenTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatDataDrivenTest.java
@ -753,7 +753,10 @@ public class NumberFormatDataDrivenTest {
            properties.setNegativeSuffix(tuple.negativeSuffix);
          }
          if (tuple.localizedPattern != null) {
-            // TODO
+            DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(tuple.locale);
+            String converted =
+                PatternString.convertLocalized(tuple.localizedPattern, symbols, false);
+            PatternString.parseToExistingProperties(converted, properties);
          }
          if (tuple.lenient != null) {
            properties.setParseMode(tuple.lenient == 0 ? ParseMode.STRICT : ParseMode.LENIENT);
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java
@ -51,6 +51,7 @@ import com.ibm.icu.text.DecimalFormat;
 import com.ibm.icu.text.DecimalFormat.PropertySetter;
 import com.ibm.icu.text.DecimalFormat.SignificantDigitsMode;
 import com.ibm.icu.text.DecimalFormatSymbols;
+import com.ibm.icu.text.DecimalFormat_ICU58;
 import com.ibm.icu.text.DisplayContext;
 import com.ibm.icu.text.MeasureFormat;
 import com.ibm.icu.text.NumberFormat;
@ -1597,6 +1598,59 @@ public class NumberFormatTest extends TestFmwk {
        }
    }

+    @Test
+    public void TestLocalizedPatternSymbolCoverage() {
+        String[] standardPatterns = { "#,##0.05+%;#,##0.05-%", "* @@@E0‰" };
+        String[] standardPatterns58 = { "#,##0.05+%;#,##0.05-%", "* @@@E0‰;* -@@@E0‰" };
+        String[] localizedPatterns = { "▰⁖▰▰໐⁘໐໕†⁜⁙▰⁖▰▰໐⁘໐໕‡⁜", "⁂ ⁕⁕⁕⁑⁑໐‱" };
+        String[] localizedPatterns58 = { "▰⁖▰▰໐⁘໐໕+⁜⁙▰⁖▰▰໐⁘໐໕‡⁜", "⁂ ⁕⁕⁕⁑⁑໐‱⁙⁂ ‡⁕⁕⁕⁑⁑໐‱" };
+
+        DecimalFormatSymbols dfs = new DecimalFormatSymbols();
+        dfs.setGroupingSeparator('⁖');
+        dfs.setDecimalSeparator('⁘');
+        dfs.setPatternSeparator('⁙');
+        dfs.setDigit('▰');
+        dfs.setZeroDigit('໐');
+        dfs.setSignificantDigit('⁕');
+        dfs.setPlusSign('†');
+        dfs.setMinusSign('‡');
+        dfs.setPercent('⁜');
+        dfs.setPerMill('‱');
+        dfs.setExponentSeparator("⁑⁑"); // tests multi-char sequence
+        dfs.setPadEscape('⁂');
+
+        for (int i=0; i<2; i++) {
+            String standardPattern = standardPatterns[i];
+            String standardPattern58 = standardPatterns58[i];
+            String localizedPattern = localizedPatterns[i];
+            String localizedPattern58 = localizedPatterns58[i];
+
+            DecimalFormat df1 = new DecimalFormat("#", dfs);
+            df1.applyPattern(standardPattern);
+            DecimalFormat df2 = new DecimalFormat("#", dfs);
+            df2.applyLocalizedPattern(localizedPattern);
+            assertEquals("DecimalFormat instances should be equal",
+                    df1, df2);
+            assertEquals("toPattern should match on localizedPattern instance",
+                    standardPattern, df2.toPattern());
+            assertEquals("toLocalizedPattern should match on standardPattern instance",
+                    localizedPattern, df1.toLocalizedPattern());
+
+            // Note: ICU 58 does not support plus signs in patterns
+            // Note: ICU 58 always prints the negative part of scientific notation patterns,
+            //       even when the negative part is not necessary
+            DecimalFormat_ICU58 df3 = new DecimalFormat_ICU58("#", dfs);
+            df3.applyPattern(standardPattern); // Reading standardPattern is OK
+            DecimalFormat_ICU58 df4 = new DecimalFormat_ICU58("#", dfs);
+            df4.applyLocalizedPattern(localizedPattern58);
+            // Note: DecimalFormat#equals() is broken on ICU 58
+            assertEquals("toPattern should match on ICU58 localizedPattern instance",
+                    standardPattern58, df4.toPattern());
+            assertEquals("toLocalizedPattern should match on ICU58 standardPattern instance",
+                    localizedPattern58, df3.toLocalizedPattern());
+        }
+    }
+
    @Test
    public void TestParseNull() throws ParseException {
        DecimalFormat df = new DecimalFormat();