ICU-6654 CLDR based RBNF data

X-SVN-Rev: 25107
This commit is contained in:
John Emmons 2008-12-15 17:00:46 +00:00
parent 4700f4ae55
commit 818013ab16
5 changed files with 139 additions and 92 deletions

View file

@ -135,11 +135,11 @@ public class RBNFParseTest extends TestFmwk {
String[][] lists = {
{ "1,2", "twelve", "un virgule deux" },
{ "1,2 million", "twelve million", "un million deux cents mille" },
{ "1,2 million", "twelve million", "un virgule deux" },
{ "1,2 millions", "twelve million", "un million deux cents mille" },
{ "1.2", "one point two", "douze" },
// TODO: We'll update the English RBNF rule later
// { "1.2 million", "one million two hundred thousand", "douze million" },
{ "1.2 million", "one million, two hundred thousand, zero", "douze million" },
{ "1.2 million", "one million two hundred thousand", "douze" },
{ "1.2 millions", "one million two hundred thousand", "douze millions" },
};
Locale.setDefault(Locale.FRANCE);

View file

@ -301,19 +301,19 @@ public class RbnfTest extends TestFmwk {
{ "73", "seventy-three" },
{ "88", "eighty-eight" },
{ "100", "one hundred" },
{ "106", "one hundred and six" },
{ "127", "one hundred and twenty-seven" },
{ "106", "one hundred six" },
{ "127", "one hundred twenty-seven" },
{ "200", "two hundred" },
{ "579", "five hundred and seventy-nine" },
{ "579", "five hundred seventy-nine" },
{ "1,000", "one thousand" },
{ "2,000", "two thousand" },
{ "3,004", "three thousand and four" },
{ "4,567", "four thousand five hundred and sixty-seven" },
{ "15,943", "fifteen thousand nine hundred and forty-three" },
{ "2,345,678", "two million, three hundred and forty-five "
+ "thousand, six hundred and seventy-eight" },
{ "3,004", "three thousand four" },
{ "4,567", "four thousand five hundred sixty-seven" },
{ "15,943", "fifteen thousand nine hundred forty-three" },
{ "2,345,678", "two million three hundred forty-five "
+ "thousand six hundred seventy-eight" },
{ "-36", "minus thirty-six" },
{ "234.567", "two hundred and thirty-four point five six seven" }
{ "234.567", "two hundred thirty-four point five six seven" }
};
doTest(formatter, testData, true);
@ -418,10 +418,10 @@ public class RbnfTest extends TestFmwk {
{ "3,004", "tres mil cuatro" },
{ "4,567", "cuatro mil quinientos sesenta y siete" },
{ "15,943", "quince mil novecientos cuarenta y tres" },
{ "2,345,678", "dos mill\u00f3n trescientos cuarenta y cinco mil "
{ "2,345,678", "dos millones trescientos cuarenta y cinco mil "
+ "seiscientos setenta y ocho"},
{ "-36", "menos treinta y seis" },
{ "234.567", "doscientos treinta y cuatro punto cinco seis siete" }
{ "234.567", "doscientos treinta y cuatro coma cinco seis siete" }
};
doTest(formatter, testData, true);
@ -438,7 +438,7 @@ public class RbnfTest extends TestFmwk {
{ "1", "un" },
{ "15", "quinze" },
{ "20", "vingt" },
{ "21", "vingt-et-un" },
{ "21", "vingt et un" },
{ "23", "vingt-trois" },
{ "62", "soixante-deux" },
{ "70", "soixante-dix" },
@ -450,26 +450,26 @@ public class RbnfTest extends TestFmwk {
{ "106", "cent six" },
{ "127", "cent vingt-sept" },
{ "200", "deux cents" },
{ "579", "cinq cents soixante-dix-neuf" },
{ "579", "cinq cent soixante-dix-neuf" },
{ "1,000", "mille" },
{ "1,123", "onze cents vingt-trois" },
{ "1,594", "mille cinq cents quatre-vingt-quatorze" },
{ "1,123", "mille cent vingt-trois" },
{ "1,594", "mille cinq cent quatre-vingt-quatorze" },
{ "2,000", "deux mille" },
{ "3,004", "trois mille quatre" },
{ "4,567", "quatre mille cinq cents soixante-sept" },
{ "15,943", "quinze mille neuf cents quarante-trois" },
{ "2,345,678", "deux million trois cents quarante-cinq mille "
+ "six cents soixante-dix-huit" },
{ "4,567", "quatre mille cinq cent soixante-sept" },
{ "15,943", "quinze mille neuf cent quarante-trois" },
{ "2,345,678", "deux millions trois cent quarante-cinq mille "
+ "six cent soixante-dix-huit" },
{ "-36", "moins trente-six" },
{ "234.567", "deux cents trente-quatre virgule cinq six sept" }
{ "234.567", "deux cent trente-quatre virgule cinq six sept" }
};
doTest(formatter, testData, true);
formatter.setLenientParseMode(true);
String[][] lpTestData = {
{ "trente-un", "31" },
{ "un cents quatre vingt dix huit", "198" }
{ "trente-et-un", "31" },
{ "un cent quatre vingt dix huit", "198" }
};
doLenientParseTest(formatter, lpTestData);
}
@ -485,11 +485,11 @@ public class RbnfTest extends TestFmwk {
{ "1", "un" },
{ "15", "quinze" },
{ "20", "vingt" },
{ "21", "vingt-et-un" },
{ "21", "vingt et un" },
{ "23", "vingt-trois" },
{ "62", "soixante-deux" },
{ "70", "septante" },
{ "71", "septante-et-un" },
{ "71", "septante et un" },
{ "73", "septante-trois" },
{ "80", "huitante" },
{ "88", "huitante-huit" },
@ -497,18 +497,18 @@ public class RbnfTest extends TestFmwk {
{ "106", "cent six" },
{ "127", "cent vingt-sept" },
{ "200", "deux cents" },
{ "579", "cinq cents septante-neuf" },
{ "579", "cinq cent septante-neuf" },
{ "1,000", "mille" },
{ "1,123", "onze cents vingt-trois" },
{ "1,594", "mille cinq cents nonante-quatre" },
{ "1,123", "mille cent vingt-trois" },
{ "1,594", "mille cinq cent nonante-quatre" },
{ "2,000", "deux mille" },
{ "3,004", "trois mille quatre" },
{ "4,567", "quatre mille cinq cents soixante-sept" },
{ "15,943", "quinze mille neuf cents quarante-trois" },
{ "2,345,678", "deux million trois cents quarante-cinq mille "
+ "six cents septante-huit" },
{ "4,567", "quatre mille cinq cent soixante-sept" },
{ "15,943", "quinze mille neuf cent quarante-trois" },
{ "2,345,678", "deux millions trois cent quarante-cinq mille "
+ "six cent septante-huit" },
{ "-36", "moins trente-six" },
{ "234.567", "deux cents trente-quatre virgule cinq six sept" }
{ "234.567", "deux cent trente-quatre virgule cinq six sept" }
};
doTest(formatter, testData, true);
@ -659,51 +659,52 @@ public class RbnfTest extends TestFmwk {
RuleBasedNumberFormat formatter = new RuleBasedNumberFormat(locale, RuleBasedNumberFormat.SPELLOUT);
String[][] testDataDefault = {
{ "101", "etthundra\u00aden" },
{ "123", "etthundra\u00adtjugotre" },
{ "1,001", "ettusen en" },
{ "1,100", "ettusen etthundra" },
{ "1,101", "ettusen etthundra\u00aden" },
{ "1,234", "ettusen tv\u00e5hundra\u00adtrettiofyra" },
{ "10,001", "tio\u00adtusen en" },
{ "11,000", "elva\u00adtusen" },
{ "12,000", "tolv\u00adtusen" },
{ "20,000", "tjugo\u00adtusen" },
{ "21,000", "tjugoen\u00adtusen" },
{ "21,001", "tjugoen\u00adtusen en" },
{ "200,000", "tv\u00e5hundra\u00adtusen" },
{ "201,000", "tv\u00e5hundra\u00aden\u00adtusen" },
{ "200,200", "tv\u00e5hundra\u00adtusen tv\u00e5hundra" },
{ "2,002,000", "tv\u00e5 miljoner tv\u00e5\u00adtusen" },
{ "12,345,678", "tolv miljoner trehundra\u00adfyrtiofem\u00adtusen sexhundra\u00adsjuttio\u00e5tta" },
{ "123,456.789", "etthundra\u00adtjugotre\u00adtusen fyrahundra\u00adfemtiosex komma sju \u00e5tta nio" },
{ "-12,345.678", "minus tolv\u00adtusen trehundra\u00adfyrtiofem komma sex sju \u00e5tta" }
{ "101", "etthundraett" },
{ "123", "etthundratjugotre" },
{ "1,001", "ettusenett" },
{ "1,100", "ettusenetthundra" },
{ "1,101", "ettusenetthundraett" },
{ "1,234", "ettusentv\u00e5hundratrettiofyra" },
{ "10,001", "tiotusenett" },
{ "11,000", "elvatusen" },
{ "12,000", "tolvtusen" },
{ "20,000", "tjugotusen" },
{ "21,000", "tjugoetttusen" },
{ "21,001", "tjugoetttusenett" },
{ "200,000", "tv\u00e5hundratusen" },
{ "201,000", "tv\u00e5hundraetttusen" },
{ "200,200", "tv\u00e5hundratusentv\u00e5hundra" },
{ "2,002,000", "tv\u00e5 miljoner tv\u00e5tusen" },
{ "12,345,678", "tolv miljoner trehundrafyrtiofemtusensexhundrasjuttio\u00e5tta" },
{ "123,456.789", "etthundratjugotretusenfyrahundrafemtiosex komma sju \u00e5tta nio" },
{ "-12,345.678", "minus tolvtusentrehundrafyrtiofem komma sex sju \u00e5tta" }
};
logln("testing default rules");
doTest(formatter, testDataDefault, true);
String[][] testDataNeutrum = {
{ "101", "etthundra\u00adett" },
{ "1,001", "ettusen ett" },
{ "1,101", "ettusen etthundra\u00adett" },
{ "10,001", "tio\u00adtusen ett" },
{ "21,001", "tjugoen\u00adtusen ett" }
};
// Neutrum rules not supported in CLDR
// String[][] testDataNeutrum = {
// { "101", "etthundra\u00adett" },
// { "1,001", "ettusen ett" },
// { "1,101", "ettusen etthundra\u00adett" },
// { "10,001", "tio\u00adtusen ett" },
// { "21,001", "tjugoen\u00adtusen ett" }
// };
formatter.setDefaultRuleSet("%neutrum");
logln("testing neutrum rules");
doTest(formatter, testDataNeutrum, true);
// formatter.setDefaultRuleSet("%neutrum");
// logln("testing neutrum rules");
// doTest(formatter, testDataNeutrum, true);
String[][] testDataYear = {
{ "101", "etthundra\u00adett" },
{ "101", "etthundraett" },
{ "900", "niohundra" },
{ "1,001", "tiohundra\u00adett" },
{ "1,001", "tusenett" },
{ "1,100", "elvahundra" },
{ "1,101", "elvahundra\u00adett" },
{ "1,234", "tolvhundra\u00adtrettiofyra" },
{ "2,001", "tjugohundra\u00adett" },
{ "10,001", "tio\u00adtusen ett" }
{ "1,101", "elvahundraett" },
{ "1,234", "tolvhundratrettiofyra" },
{ "2,001", "tjugohundraett" },
{ "10,001", "tiotusenett" }
};
formatter.setDefaultRuleSet("%year");
@ -795,14 +796,14 @@ public class RbnfTest extends TestFmwk {
{ "10,000,000.00000001", "ten million point zero zero zero zero zero zero zero one" },
{ "10,000,000.000000002", "ten million point zero zero zero zero zero zero zero zero two" },
{ "10,000,000", "ten million" },
{ "1,234,567,890.0987654", "one billion, two hundred and thirty-four million, five hundred and sixty-seven thousand, eight hundred and ninety point zero nine eight seven six five four" },
{ "123,456,789.9876543", "one hundred and twenty-three million, four hundred and fifty-six thousand, seven hundred and eighty-nine point nine eight seven six five four three" },
{ "12,345,678.87654321", "twelve million, three hundred and forty-five thousand, six hundred and seventy-eight point eight seven six five four three two one" },
{ "1,234,567.7654321", "one million, two hundred and thirty-four thousand, five hundred and sixty-seven point seven six five four three two one" },
{ "123,456.654321", "one hundred and twenty-three thousand, four hundred and fifty-six point six five four three two one" },
{ "12,345.54321", "twelve thousand three hundred and forty-five point five four three two one" },
{ "1,234.4321", "one thousand two hundred and thirty-four point four three two one" },
{ "123.321", "one hundred and twenty-three point three two one" },
{ "1,234,567,890.0987654", "one billion two hundred thirty-four million five hundred sixty-seven thousand eight hundred ninety point zero nine eight seven six five four" },
{ "123,456,789.9876543", "one hundred twenty-three million four hundred fifty-six thousand seven hundred eighty-nine point nine eight seven six five four three" },
{ "12,345,678.87654321", "twelve million three hundred forty-five thousand six hundred seventy-eight point eight seven six five four three two one" },
{ "1,234,567.7654321", "one million two hundred thirty-four thousand five hundred sixty-seven point seven six five four three two one" },
{ "123,456.654321", "one hundred twenty-three thousand four hundred fifty-six point six five four three two one" },
{ "12,345.54321", "twelve thousand three hundred forty-five point five four three two one" },
{ "1,234.4321", "one thousand two hundred thirty-four point four three two one" },
{ "123.321", "one hundred twenty-three point three two one" },
{ "0.0000000011754944", "zero point zero zero zero zero zero zero zero zero one one seven five four nine four four" },
{ "0.000001175494351", "zero point zero zero zero zero zero one one seven five four nine four three five one" },
};

View file

@ -131,6 +131,9 @@ public class CompatibilityTest extends TestFmwk
// to fix the root cause, it will likely break the backward compatibility.
// For now, we're skipping this case even it works OK on JRE5. See ticket#6550.
{"ICU_4.0", "com.ibm.icu.text.PluralRules.dat"},
{"ICU_3.6", "com.ibm.icu.text.RuleBasedNumberFormat.dat"},
{"ICU_3.8.1", "com.ibm.icu.text.RuleBasedNumberFormat.dat"},
{"ICU_4.0", "com.ibm.icu.text.RuleBasedNumberFormat.dat"},
};
private Target getFileTargets(URL fileURL)

View file

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c2d3205d75a6d00c6aa7d865cf5bc5b61c675f1faf66e88e74ecc4612346fe4c
size 6604559
oid sha256:241852ceca57f6c26628eab9d94ecb126541cb6d6dbbc2742b286ed8184a9db6
size 6637164

View file

@ -14,6 +14,7 @@ import com.ibm.icu.impl.UCharacterProperty;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.util.ULocale;
import com.ibm.icu.util.UResourceBundle;
import com.ibm.icu.util.UResourceBundleIterator;
import java.math.BigInteger;
import java.text.FieldPosition;
@ -740,7 +741,22 @@ public class RuleBasedNumberFormat extends NumberFormat {
String[][] localizations = null;
try {
// For backwards compatability - If we have a pre-4.2 style RBNF resource, attempt to read it.
description = bundle.getString(rulenames[format-1]);
}
catch (MissingResourceException e) {
try {
ICUResourceBundle rules = bundle.getWithFallback("RBNFRules/"+rulenames[format-1]);
UResourceBundleIterator it = rules.getIterator();
while (it.hasNext()) {
description = description.concat(it.nextString());
}
}
catch (MissingResourceException e1) {
}
}
try {
UResourceBundle locb = bundle.get(locnames[format-1]);
localizations = new String[locb.getSize()][];
for (int i = 0; i < localizations.length; ++i) {
@ -1256,14 +1272,23 @@ public class RuleBasedNumberFormat extends NumberFormat {
if (publicRuleSetNames.length > 0) {
defaultRuleSet = findRuleSet(publicRuleSetNames[0]);
} else {
defaultRuleSet = null;
int n = ruleSets.length;
while (--n >= 0) {
if (ruleSets[n].isPublic()) {
defaultRuleSet = ruleSets[n];
break;
}
}
defaultRuleSet = null;
int n = ruleSets.length;
while (--n >= 0) {
String currentName = ruleSets[n].getName();
if (currentName.equals("%spellout") || currentName.equals("%ordinal") || currentName.equals("%duration")) {
defaultRuleSet = ruleSets[n];
return;
}
}
n = ruleSets.length;
while (--n >= 0) {
if (ruleSets[n].isPublic()) {
defaultRuleSet = ruleSets[n];
break;
}
}
}
} else if (ruleSetName.startsWith("%%")) {
throw new IllegalArgumentException("cannot use private rule set: " + ruleSetName);
@ -1472,11 +1497,29 @@ public class RuleBasedNumberFormat extends NumberFormat {
// {dlf} Initialization of a fraction rule set requires the default rule
// set to be known. For purposes of initialization, this is always the
// last public rule set, no matter what the localization data says.
// Set the default ruleset to the last public ruleset, unless one of the predefined
// ruleset names %spellout, %ordinal, or %duration is found
boolean defaultNameFound = false;
int n = ruleSets.length;
defaultRuleSet = ruleSets[ruleSets.length - 1];
for (int i = ruleSets.length - 1; i >= 0; --i) {
if (!ruleSets[i].getName().startsWith("%%")) {
defaultRuleSet = ruleSets[i];
while (--n >= 0) {
String currentName = ruleSets[n].getName();
if (currentName.equals("%spellout") || currentName.equals("%ordinal") || currentName.equals("%duration")) {
defaultRuleSet = ruleSets[n];
defaultNameFound = true;
break;
}
}
if ( !defaultNameFound ) {
for (int i = ruleSets.length - 1; i >= 0; --i) {
if (!ruleSets[i].getName().startsWith("%%")) {
defaultRuleSet = ruleSets[i];
break;
}
}
}