diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/PluralRulesFactory.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/PluralRulesFactory.java index 2b9f2281218..3e54c8f1a58 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/PluralRulesFactory.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/PluralRulesFactory.java @@ -6,10 +6,13 @@ */ package com.ibm.icu.dev.test.format; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.Map; import java.util.Set; +import java.util.TreeMap; import com.ibm.icu.dev.util.Relation; import com.ibm.icu.text.PluralRules; @@ -61,97 +64,16 @@ public abstract class PluralRulesFactory { } static class PluralRulesFactoryWithOverrides extends PluralRulesFactory { - static Map OVERRIDES = new HashMap(); - static Relation EXTRA_SAMPLES = Relation.of(new HashMap>(), HashSet.class); - static { - String[][] overrides = { - {"bn", "one: n within 0..1"}, - {"en,ca,de,et,fi,gl,it,nl,sv,sw,ta,te,ur", "one: j is 1"}, - {"pt", "one: n is 1 or f is 1"}, - {"cs,sk", "one: j is 1; few: j in 2..4; many: v is not 0"}, - {"cy", "one: n is 1; two: n is 2; few: n is 3; many: n is 6"}, - //{"el", "one: j is 1 or i is 0 and f is 1"}, - {"da,is", "one: j is 1 or f is 1"}, - {"fil", "one: j in 0..1"}, - {"he", "one: j is 1; two: j is 2", "10,20"}, - {"hi", "one: n within 0..1"}, - {"hy", "one: n within 0..2 and n is not 2"}, -// {"hr", "one: j mod 10 is 1 and j mod 100 is not 11; few: j mod 10 in 2..4 and j mod 100 not in 12..14; many: j mod 10 is 0 or j mod 10 in 5..9 or j mod 100 in 11..14"}, - {"lv", "zero: n mod 10 is 0" + - " or n mod 10 in 11..19" + - " or v is 2 and f mod 10 in 11..19;" + - "one: n mod 10 is 1 and n mod 100 is not 11" + - " or v is 2 and f mod 10 is 1 and f mod 100 is not 11" + - " or v is not 2 and f mod 10 is 1"}, -// {"lv", "zero: n mod 10 is 0" + -// " or n mod 10 in 11..19" + -// " or v in 1..6 and f is not 0 and f mod 10 is 0" + -// " or v in 1..6 and f mod 10 in 11..19;" + -// "one: n mod 10 is 1 and n mod 100 is not 11" + -// " or v in 1..6 and f mod 10 is 1 and f mod 100 is not 11" + -// " or v not in 0..6 and f mod 10 is 1"}, - {"pl", "one: j is 1; few: j mod 10 in 2..4 and j mod 100 not in 12..14; many: j is not 1 and j mod 10 in 0..1 or j mod 10 in 5..9 or j mod 100 in 12..14"}, - {"sl", "one: j mod 100 is 1; two: j mod 100 is 2; few: j mod 100 in 3..4 or v is not 0"}, -// {"sr", "one: j mod 10 is 1 and j mod 100 is not 11" + -// " or v in 1..6 and f mod 10 is 1 and f mod 100 is not 11" + -// " or v not in 0..6 and f mod 10 is 1;" + -// "few: j mod 10 in 2..4 and j mod 100 not in 12..14" + -// " or v in 1..6 and f mod 10 in 2..4 and f mod 100 not in 12..14" + -// " or v not in 0..6 and f mod 10 in 2..4" -// }, - {"sr,hr", "one: j mod 10 is 1 and j mod 100 is not 11" + - " or f mod 10 is 1 and f mod 100 is not 11;" + - "few: j mod 10 in 2..4 and j mod 100 not in 12..14" + - " or f mod 10 in 2..4 and f mod 100 not in 12..14" - }, - // + - // " ; many: j mod 10 is 0 " + - // " or j mod 10 in 5..9 " + - // " or j mod 100 in 11..14" + - // " or v in 1..6 and f mod 10 is 0" + - // " or v in 1..6 and f mod 10 in 5..9" + - // " or v in 1..6 and f mod 100 in 11..14" + - // " or v not in 0..6 and f mod 10 in 5..9" - {"ro", "one: j is 1; few: v is not 0 or n is 0 or n is not 1 and n mod 100 in 1..19"}, - {"ru", "one: j mod 10 is 1 and j mod 100 is not 11;" + - " many: j mod 10 is 0 or j mod 10 in 5..9 or j mod 100 in 11..14" -// + "; many: j mod 10 is 0 or j mod 10 in 5..9 or j mod 100 in 11..14" - }, - {"uk", "one: j mod 10 is 1 and j mod 100 is not 11; " + - "few: j mod 10 in 2..4 and j mod 100 not in 12..14; " + - "many: j mod 10 is 0 or j mod 10 in 5..9 or j mod 100 in 11..14"}, - {"zu", "one: n within 0..1"}, - }; - for (String[] pair : overrides) { - for (String locale : pair[0].split("\\s*,\\s*")) { - ULocale uLocale = new ULocale(locale); - if (OVERRIDES.containsKey(uLocale)) { - throw new IllegalArgumentException("Duplicate locale: " + uLocale); - } - try { - PluralRules rules = PluralRules.parseDescription(pair[1]); - OVERRIDES.put(uLocale, rules); - } catch (Exception e) { - throw new IllegalArgumentException(locale + "\t" + pair[1], e); - } - if (pair.length==3) { - for (String item : pair[2].split("\\s*,\\s*")) { - EXTRA_SAMPLES.put(uLocale, new PluralRules.NumberInfo(item)); - } - } - } - } - } @Override boolean hasOverride(ULocale locale) { - return OVERRIDES.containsKey(locale); + return getPluralOverrides().containsKey(locale); } @Override PluralRules forLocale(ULocale locale, PluralType ordinal) { PluralRules override = ordinal != PluralType.CARDINAL ? null - : OVERRIDES.get(locale); + : getPluralOverrides().get(locale); return override != null ? override : PluralRules.forLocale(locale, ordinal); @@ -180,7 +102,110 @@ public abstract class PluralRulesFactory { return result == null ? ULocale.ROOT : result; } }; - + + static class SamplePatterns { + final Map keywordToPattern = new TreeMap(PluralRules.KEYWORD_COMPARATOR); + final Map keywordToErrors = new HashMap(); + public void put(String keyword, String sample) { + if (keywordToPattern.containsKey(keyword)) { + throw new IllegalArgumentException("Duplicate keyword <" + keyword + ">"); + } else { + keywordToPattern.put(keyword, sample.replace(" ", "\u00A0")); + } + } + public void checkErrors(Set set) { + final Map skeletonToKeyword = new HashMap(); + for (String keyword : set) { + String error = ""; + String sample = keywordToPattern.get(keyword); + String skeleton = sample.replace(" ", "").replaceAll("\\s*\\{0\\}\\s*", ""); + String oldSkeletonKeyword = skeletonToKeyword.get(skeleton); + if (oldSkeletonKeyword != null) { + if (error.length() != 0) { + error += ", "; + } + error += "Duplicate keyword skeleton <" + keyword + ", " + skeleton + ">, same as for: <" + oldSkeletonKeyword + ">"; + } else { + skeletonToKeyword.put(skeleton, keyword); + } + if (error.length() == 0) { + keywordToErrors.put(keyword, ""); + } else { + keywordToErrors.put(keyword, "\tERROR: " + error); + } + } + } + } + + + public static Map getLocaleToSamplePatterns() { + if (LOCALE_TO_SAMPLE_PATTERNS == null) { + loadData(); + } + return LOCALE_TO_SAMPLE_PATTERNS; + } + public static Map getPluralOverrides() { + if (OVERRIDES == null) { + loadData(); + } + return OVERRIDES; + } + public static Relation getExtraSamples() { + if (EXTRA_SAMPLES == null) { + loadData(); + } + return EXTRA_SAMPLES; + } + + private static Map LOCALE_TO_SAMPLE_PATTERNS = null; + private static Map OVERRIDES = null; + private static Relation EXTRA_SAMPLES = null; + + private static void loadData() { + LinkedHashMap temp = new LinkedHashMap(); + HashMap tempOverrides = new HashMap(); + Relation tempSamples = Relation.of(new HashMap>(), HashSet.class); + for (String[] row : SAMPLE_PATTERNS) { + ULocale locale = new ULocale(row[0]); + String keyword = row[1]; + String sample = row[2]; + SamplePatterns samplePatterns = temp.get(locale); + if (samplePatterns == null) { + temp.put(locale, samplePatterns = new SamplePatterns()); + } + samplePatterns.put(keyword, sample); + } + for (String[] pair : overrides) { + for (String locale : pair[0].split("\\s*,\\s*")) { + ULocale uLocale = new ULocale(locale); + if (tempOverrides.containsKey(uLocale)) { + throw new IllegalArgumentException("Duplicate locale: " + uLocale); + } + try { + PluralRules rules = PluralRules.parseDescription(pair[1]); + tempOverrides.put(uLocale, rules); + } catch (Exception e) { + throw new IllegalArgumentException(locale + "\t" + pair[1], e); + } + } + } + for (String[] pair : EXTRA_SAMPLE_SOURCE) { + for (String locale : pair[0].split("\\s*,\\s*")) { + ULocale uLocale = new ULocale(locale); + if (tempSamples.containsKey(uLocale)) { + throw new IllegalArgumentException("Duplicate locale: " + uLocale); + } + for (String item : pair[1].split("\\s*,\\s*")) { + tempSamples.put(uLocale, new PluralRules.NumberInfo(item)); + } + } + } + LOCALE_TO_SAMPLE_PATTERNS = Collections.unmodifiableMap(temp); + OVERRIDES = Collections.unmodifiableMap(tempOverrides); + EXTRA_SAMPLES = (Relation) tempSamples.freeze(); + } + + static String[][] OLDRULES = { {"af", "one: n is 1"}, {"am", "one: n in 0..1"}, @@ -255,4 +280,231 @@ public abstract class PluralRulesFactory { {"zh", "other: null"}, {"zu", "one: n is 1"}, }; + + static String[][] SAMPLE_PATTERNS = { + {"und", "zero", "{0} ADD-SAMPLE-ZERO"}, + {"und", "one", "{0} ADD-SAMPLE-ONE"}, + {"und", "two", "{0} ADD-SAMPLE-TWO"}, + {"und", "few", "{0} ADD-SAMPLE-FEW"}, + {"und", "many", "{0} ADD-SAMPLE-MANY"}, + {"und", "other", "{0} ADD-SAMPLE-OTHER"}, + {"af", "one", "{0} dag"}, + {"af", "other", "{0} dae"}, + {"am", "one", "{0} ቀን"}, + {"am", "other", "{0} ቀናት"}, // fixed to 'other' + {"ar", "few", "{0} ساعات"}, + {"ar", "many", "{0} ساعة"}, + {"ar", "one", "ساعة"}, + {"ar", "other", "{0} ساعة"}, + {"ar", "two", "ساعتان"}, + {"ar", "zero", "{0} ساعة"}, + {"bg", "one", "{0} ден"}, + {"bg", "other", "{0} дена"}, + {"bn", "one", "সসে {0}টি আপেল নিয়ে সেটা খেল"}, + {"bn", "other", "সসে {0}টি আপেল নিয়ে সেগুলি খেল"}, + {"br", "few", "{0} deiz"}, + {"br", "many", "{0} a zeizioù"}, + {"br", "one", "{0} deiz"}, + {"br", "other", "{0} deiz"}, + {"br", "two", "{0} zeiz"}, + {"ca", "one", "{0} dia"}, + {"ca", "other", "{0} dies"}, + {"cs", "few", "{0} dny"}, + {"cs", "one", "{0} den"}, + {"cs", "other", "{0} dní"}, + {"cs", "many", "{0} dne"}, // added from spreadsheet + {"cy", "zero", "{0} cadair, {0} peint"}, + {"cy", "one", "{0} gadair, {0} peint"}, + {"cy", "two", "{0} gadair, {0} beint"}, + {"cy", "few", "{0} cadair, {0} pheint"}, + {"cy", "many", "{0} chadair, {0} pheint"}, + {"cy", "other", "{0} cadair, {0} peint"}, + {"da", "one", "{0} dag"}, + {"da", "other", "{0} dage"}, + {"de", "one", "{0} Tag"}, + {"de", "other", "{0} Tage"}, + {"dz", "other", "ཉིནམ་ {0} "}, + {"el", "one", "{0} ημέρα"}, + {"el", "other", "{0} ημέρες"}, + {"es", "one", "{0} día"}, + {"es", "other", "{0} días"}, + {"et", "one", "{0} ööpäev"}, + {"et", "other", "{0} ööpäeva"}, + {"eu", "one", "Nire {0} lagunarekin nago"}, + {"eu", "other", "Nire {0} lagunekin nago"}, + {"fa", "other", "{0} روز"}, + {"fi", "one", "{0} päivä"}, + {"fi", "other", "{0} päivää"}, + {"fil", "one", "sa {0} araw"}, + {"fil", "other", "sa {0} (na) araw"}, + {"fr", "one", "{0} jour"}, + {"fr", "other", "{0} jours"}, + {"gl", "one", "{0} día"}, + {"gl", "other", "{0} días"}, + {"gu", "one", "{0} કિલોગ્રામ"}, + {"gu", "other", "{0} કિલોગ્રામ્સ"}, + {"he", "many", "{0} ימים"}, + {"he", "one", " יום {0}"}, + {"he", "other", "{0} ימים"}, + {"he", "two", "יומיים"}, + {"hi", "one", "{0} घंटा"}, + {"hi", "other", "{0} घंटे"}, + {"hr", "few", "za {0} mjeseca"}, + {"hr", "many", "za {0} mjeseci"}, + {"hr", "one", "za {0} mjesec"}, + {"hr", "other", "za {0} mjeseci"}, + {"hu", "other", "{0} nap"}, + {"hy", "one", "այդ {0} ժամը"}, + {"hy", "other", "այդ {0} ժամերը"}, + {"id", "other", "{0} hari"}, + {"is", "one", "{0} dagur"}, + {"is", "other", "{0} dagar"}, + {"it", "one", "{0} giorno"}, + {"it", "other", "{0} giorni"}, + {"ja", "other", "{0}日"}, + {"km", "other", "{0} ថ្ងៃ"}, // + {"kn", "one", "{0} ದಿನ"}, + {"kn", "other", "{0} ದಿನಗಳು"}, + {"ko", "other", "{0}일"}, + {"lo", "other", "{0} ມື້"}, + {"lt", "few", "{0} dienos"}, + {"lt", "one", "{0} diena"}, + {"lt", "other", "{0} dienų"}, + {"lv", "one", "{0} diennakts"}, + {"lv", "other", "{0} diennaktis"}, + {"lv", "zero", "{0} diennakšu"}, + {"ml", "one", "{0} വ്യക്തി"}, + {"ml", "other", "{0} വ്യക്തികൾ"}, + {"mr", "one", "{0} घर"}, + {"mr", "other", "{0} घरे"}, + {"ms", "other", "{0} hari"}, + {"nb", "one", "{0} dag"}, + {"nb", "other", "{0} dager"}, + {"ne", "one", "तपाईंसँग {0} निमन्त्रणा छ"}, + {"ne", "other", "तपाईँसँग {0} निमन्त्रणाहरू छन्"}, + // {"ne", "", "{0} दिन बाँकी छ ।"}, + // {"ne", "", "{0} दिन बाँकी छ ।"}, + // {"ne", "", "{0} दिन बाँकी छ ।"}, + // {"ne", "", "{0} जनाहरू पाहुना बाँकी छ ।"}, + {"nl", "one", "{0} dag"}, + {"nl", "other", "{0} dagen"}, + {"pl", "few", "{0} miesiące"}, + {"pl", "many", "{0} miesięcy"}, + {"pl", "one", "{0} miesiąc"}, + {"pl", "other", "{0} miesiąca"}, + {"pt", "one", "{0} ponto"}, + {"pt", "other", "{0} pontos"}, + // {"pt_PT", "one", "{0} dia"}, + // {"pt_PT", "other", "{0} dias"}, + {"ro", "few", "{0} zile"}, + {"ro", "one", "{0} zi"}, + {"ro", "other", "{0} de zile"}, + {"ru", "few", "{0} года"}, + {"ru", "many", "{0} лет"}, + {"ru", "one", "{0} год"}, + {"ru", "other", "{0} года"}, + {"si", "other", "දින {0}ක්"}, + {"sk", "few", "{0} dni"}, + {"sk", "one", "{0} deň"}, + {"sk", "other", "{0} dní"}, + {"sk", "many", "{0} dňa"}, // added from spreadsheet + {"sl", "few", "{0} ure"}, + {"sl", "one", "{0} ura"}, + {"sl", "other", "{0} ur"}, + {"sl", "two", "{0} uri"}, + {"sr", "few", "{0} сата"}, + {"sr", "many", "{0} сати"}, + {"sr", "one", "{0} сат"}, + {"sr", "other", "{0} сати"}, + {"sv", "one", "om {0} dag"}, + {"sv", "other", "om {0} dagar"}, + {"sw", "one", "siku {0} iliyopita"}, + {"sw", "other", "siku {0} zilizopita"}, + {"ta", "one", "{0} நாள்"}, + {"ta", "other", "{0} நாட்கள்"}, + {"te", "one", "{0} రోజు"}, + {"te", "other", "{0} రోజులు"}, + {"th", "other", "{0} วัน"}, + {"tr", "other", "{0} gün"}, + {"uk", "few", "{0} дні"}, + {"uk", "many", "{0} днів"}, + {"uk", "one", "{0} день"}, + {"uk", "other", "{0} дня"}, + {"ur", "one", "{0} گھنٹہ"}, + {"ur", "other", "{0} گھنٹے"}, + {"vi", "other", "{0} ngày"}, + {"zh", "other", "{0} 天"}, + {"zh_Hant", "other", "{0} 日"}, + {"en", "one", "{0} day"}, // added from spreadsheet + {"en", "other", "{0} days"}, // added from spreadsheet + {"zu", "one", "{0} usuku"}, // added from spreadsheet + {"zu", "other", "{0} izinsuku"}, // added from spreadsheet + }; + + static String[][] EXTRA_SAMPLE_SOURCE = { + {"he,iw","10,20"}, + {"und,az,ka,kk,ky,mk,mn,my,pa,ps,sq,uz","0,0.0,0.1,1,1.0,1.1,2.0,2.1,3,4,5,10,11"}, + }; + + static String[][] overrides = { + {"gu,mr,kn,am", "one: n within 0..1"}, + {"ta,te", "one: n is 1"}, + {"bn", "one: n within 0..1"}, + {"en,ca,de,et,fi,gl,it,nl,sv,sw,ur", "one: j is 1"}, + {"pt", "one: n is 1 or f is 1"}, + {"cs,sk", "one: j is 1; few: j in 2..4; many: v is not 0"}, + //{"cy", "one: n is 1; two: n is 2; few: n is 3; many: n is 6"}, + //{"el", "one: j is 1 or i is 0 and f is 1"}, + {"da,is", "one: j is 1 or f is 1"}, + {"fil,tl", "one: j in 0..1"}, + {"he,iw", "one: j is 1; two: j is 2; many: j is not 0 and j mod 10 is 0", "10,20"}, + {"hi", "one: n within 0..1"}, + {"hy", "one: n within 0..2 and n is not 2"}, + // {"hr", "one: j mod 10 is 1 and j mod 100 is not 11; few: j mod 10 in 2..4 and j mod 100 not in 12..14; many: j mod 10 is 0 or j mod 10 in 5..9 or j mod 100 in 11..14"}, + {"lv", "zero: n mod 10 is 0" + + " or n mod 10 in 11..19" + + " or v is 2 and f mod 10 in 11..19;" + + "one: n mod 10 is 1 and n mod 100 is not 11" + + " or v is 2 and f mod 10 is 1 and f mod 100 is not 11" + + " or v is not 2 and f mod 10 is 1"}, + // {"lv", "zero: n mod 10 is 0" + + // " or n mod 10 in 11..19" + + // " or v in 1..6 and f is not 0 and f mod 10 is 0" + + // " or v in 1..6 and f mod 10 in 11..19;" + + // "one: n mod 10 is 1 and n mod 100 is not 11" + + // " or v in 1..6 and f mod 10 is 1 and f mod 100 is not 11" + + // " or v not in 0..6 and f mod 10 is 1"}, + {"pl", "one: j is 1; few: j mod 10 in 2..4 and j mod 100 not in 12..14; many: j is not 1 and j mod 10 in 0..1 or j mod 10 in 5..9 or j mod 100 in 12..14"}, + {"sl", "one: j mod 100 is 1; two: j mod 100 is 2; few: j mod 100 in 3..4 or v is not 0"}, + // {"sr", "one: j mod 10 is 1 and j mod 100 is not 11" + + // " or v in 1..6 and f mod 10 is 1 and f mod 100 is not 11" + + // " or v not in 0..6 and f mod 10 is 1;" + + // "few: j mod 10 in 2..4 and j mod 100 not in 12..14" + + // " or v in 1..6 and f mod 10 in 2..4 and f mod 100 not in 12..14" + + // " or v not in 0..6 and f mod 10 in 2..4" + // }, + {"sr,hr,sh,bs", "one: j mod 10 is 1 and j mod 100 is not 11" + + " or f mod 10 is 1 and f mod 100 is not 11;" + + "few: j mod 10 in 2..4 and j mod 100 not in 12..14" + + " or f mod 10 in 2..4 and f mod 100 not in 12..14" + }, + // + + // " ; many: j mod 10 is 0 " + + // " or j mod 10 in 5..9 " + + // " or j mod 100 in 11..14" + + // " or v in 1..6 and f mod 10 is 0" + + // " or v in 1..6 and f mod 10 in 5..9" + + // " or v in 1..6 and f mod 100 in 11..14" + + // " or v not in 0..6 and f mod 10 in 5..9" + {"mo,ro", "one: j is 1; few: v is not 0 or n is 0 or n is not 1 and n mod 100 in 1..19"}, + {"ru", "one: j mod 10 is 1 and j mod 100 is not 11;" + + " many: j mod 10 is 0 or j mod 10 in 5..9 or j mod 100 in 11..14" + // + "; many: j mod 10 is 0 or j mod 10 in 5..9 or j mod 100 in 11..14" + }, + {"uk", "one: j mod 10 is 1 and j mod 100 is not 11; " + + "few: j mod 10 in 2..4 and j mod 100 not in 12..14; " + + "many: j mod 10 is 0 or j mod 10 in 5..9 or j mod 100 in 11..14"}, + {"zu", "one: n within 0..1"}, + }; + } diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/WritePluralRulesData.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/WritePluralRulesData.java index 2d89452d693..cf9fb1e957b 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/WritePluralRulesData.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/WritePluralRulesData.java @@ -34,6 +34,7 @@ import java.util.Map.Entry; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.ibm.icu.dev.test.format.PluralRulesFactory.SamplePatterns; import com.ibm.icu.dev.test.format.PluralRulesTest.StandardPluralCategories; import com.ibm.icu.dev.util.CollectionUtilities; import com.ibm.icu.dev.util.Relation; @@ -55,14 +56,12 @@ public class WritePluralRulesData { args = new String[] {"rules"}; } for (String arg : args) { - if (arg.equalsIgnoreCase("samples")) { - generateSamples(SampleStyle.modified); - } else if (arg.equalsIgnoreCase("original")) { + if (arg.equalsIgnoreCase("original")) { generateSamples(SampleStyle.original); } else if (arg.startsWith("verify")) { - generateSamples(SampleStyle.verify); - } else if (arg.equalsIgnoreCase("rules")) { showRules(); + generateSamples(SampleStyle.samples); + generateSamples(SampleStyle.verify); } else if (arg.equalsIgnoreCase("oldSnap")) { generateLOCALE_SNAPSHOT(PluralRulesFactory.NORMAL); } else if (arg.equalsIgnoreCase("newSnap")) { @@ -79,172 +78,6 @@ public class WritePluralRulesData { "hi,hr,hu,hy,id,is,it,he,ja,ka,kk,km,kn,ko,ky,lo,lt,lv,mk,ml,mn,mr,ms,my,ne,nl,nb," + "pa,pl,ps,pt,ro,ru,si,sk,sl,sq,sr,sv,sw,ta,te,th,tr,uk,ur,uz,vi,zh,zu").split("\\s*,\\s*"); - static String[][] SAMPLE_PATTERNS = { - {"af", "one", "{0} dag"}, - {"af", "other", "{0} dae"}, - {"am", "one", "{0} ቀን"}, - {"am", "other", "{0} ቀናት"}, // fixed to 'other' - {"ar", "few", "{0} ساعات"}, - {"ar", "many", "{0} ساعة"}, - {"ar", "one", "ساعة"}, - {"ar", "other", "{0} ساعة"}, - {"ar", "two", "ساعتان"}, - {"ar", "zero", "{0} ساعة"}, - {"bg", "one", "{0} ден"}, - {"bg", "other", "{0} дена"}, - {"bn", "one", "সসে {0}টি আপেল নিয়ে সেটা খেল"}, - {"bn", "other", "সসে {0}টি আপেল নিয়ে সেগুলি খেল"}, - {"br", "few", "{0} deiz"}, - {"br", "many", "{0} a zeizioù"}, - {"br", "one", "{0} deiz"}, - {"br", "other", "{0} deiz"}, - {"br", "two", "{0} zeiz"}, - {"ca", "one", "{0} dia"}, - {"ca", "other", "{0} dies"}, - {"cs", "few", "{0} dny"}, - {"cs", "one", "{0} den"}, - {"cs", "other", "{0} dní"}, - {"cs", "many", "{0} dne"}, // added from spreadsheet - {"cy", "zero", "{0} cadair (f) {0} peint (m)"}, - {"cy", "one", "{0} gadair (f) {0} peint (m)"}, - {"cy", "two", "{0} gadair (f) {0} beint (m)"}, - {"cy", "few", "{0} cadair (f) {0} pheint (m)"}, - {"cy", "many", "{0} chadair (f) {0} pheint (m)"}, - {"cy", "other", "{0} cadair (f) {0} peint (m)"}, - {"da", "one", "{0} dag"}, - {"da", "other", "{0} dage"}, - {"de", "one", "{0} Tag"}, - {"de", "other", "{0} Tage"}, - {"dz", "other", "ཉིནམ་ {0} "}, - {"el", "one", "{0} ημέρα"}, - {"el", "other", "{0} ημέρες"}, - {"es", "one", "{0} día"}, - {"es", "other", "{0} días"}, - {"et", "one", "{0} ööpäev"}, - {"et", "other", "{0} ööpäeva"}, - {"eu", "one", "Nire {0} lagunarekin nago"}, - {"eu", "other", "Nire {0} lagunekin nago"}, - {"fa", "other", "{0} روز"}, - {"fi", "one", "{0} päivä"}, - {"fi", "other", "{0} päivää"}, - {"fil", "one", "sa {0} araw"}, - {"fil", "other", "sa {0} (na) araw"}, - {"fr", "one", "{0} jour"}, - {"fr", "other", "{0} jours"}, - {"gl", "one", "{0} día"}, - {"gl", "other", "{0} días"}, - {"gu", "one", "{0} અઠવાડિયું"}, - {"gu", "other", "{0} અઠવાડિયા"}, - {"he", "many", "{0} ימים"}, - {"he", "one", " יום {0}"}, - {"he", "other", "{0} ימים"}, - {"he", "two", "יומיים"}, - {"hi", "one", "{0} घंटा"}, - {"hi", "other", "{0} घंटे"}, - {"hr", "few", "za {0} mjeseca"}, - {"hr", "many", "za {0} mjeseci"}, - {"hr", "one", "za {0} mjesec"}, - {"hr", "other", "za {0} mjeseci"}, - {"hu", "other", "{0} nap"}, - {"hy", "one", "այդ {0} ժամը"}, - {"hy", "other", "այդ {0} ժամերը"}, - {"id", "other", "{0} hari"}, - {"is", "one", "{0} dagur"}, - {"is", "other", "{0} dagar"}, - {"it", "one", "{0} giorno"}, - {"it", "other", "{0} giorni"}, - {"ja", "other", "{0}日"}, - {"km", "other", "{0} ថ្ងៃ"}, - {"kn", "other", "{0} ದಿನಗಳು"}, - {"ko", "other", "{0}일"}, - {"lo", "other", "{0} ມື້"}, - {"lt", "few", "{0} dienos"}, - {"lt", "one", "{0} diena"}, - {"lt", "other", "{0} dienų"}, - {"lv", "one", "{0} diennakts"}, - {"lv", "other", "{0} diennaktis"}, - {"lv", "zero", "{0} diennakšu"}, - {"ml", "one", "{0} വ്യക്തി"}, - {"ml", "other", "{0} വ്യക്തികൾ"}, - {"mr", "one", "{0} घर"}, - {"mr", "other", "{0} घरे"}, - {"ms", "other", "{0} hari"}, - {"nb", "one", "{0} dag"}, - {"nb", "other", "{0} dager"}, - {"ne", "one", "तपाईंसँग {0} निमन्त्रणा छ"}, - {"ne", "other", "तपाईँसँग {0} निमन्त्रणाहरू छन्"}, - // {"ne", "", "{0} दिन बाँकी छ ।"}, - // {"ne", "", "{0} दिन बाँकी छ ।"}, - // {"ne", "", "{0} दिन बाँकी छ ।"}, - // {"ne", "", "{0} जनाहरू पाहुना बाँकी छ ।"}, - {"nl", "one", "{0} dag"}, - {"nl", "other", "{0} dagen"}, - {"pl", "few", "{0} miesiące"}, - {"pl", "many", "{0} miesięcy"}, - {"pl", "one", "{0} miesiąc"}, - {"pl", "other", "{0} miesiąca"}, - {"pt", "one", "{0} ponto"}, - {"pt", "other", "{0} pontos"}, -// {"pt_PT", "one", "{0} dia"}, -// {"pt_PT", "other", "{0} dias"}, - {"ro", "few", "{0} zile"}, - {"ro", "one", "{0} zi"}, - {"ro", "other", "{0} de zile"}, - {"ru", "few", "{0} года"}, - {"ru", "many", "{0} лет"}, - {"ru", "one", "{0} год"}, - {"ru", "other", "{0} года"}, - {"si", "other", "දින {0}ක්"}, - {"sk", "few", "{0} dni"}, - {"sk", "one", "{0} deň"}, - {"sk", "other", "{0} dní"}, - {"sk", "many", "{0} dňa"}, // added from spreadsheet - {"sl", "few", "{0} ure"}, - {"sl", "one", "{0} ura"}, - {"sl", "other", "{0} ur"}, - {"sl", "two", "{0} uri"}, - {"sr", "few", "{0} сата"}, - {"sr", "many", "{0} сати"}, - {"sr", "one", "{0} сат"}, - {"sr", "other", "{0} сати"}, - {"sv", "one", "om {0} dag"}, - {"sv", "other", "om {0} dagar"}, - {"sw", "one", "siku {0} iliyopita"}, - {"sw", "other", "siku {0} zilizopita"}, - {"ta", "one", "{0} நாள்"}, - {"ta", "other", "{0} நாட்கள்"}, - {"te", "one", "{0} రోజు"}, - {"te", "other", "{0} రోజులు"}, - {"th", "other", "{0} วัน"}, - {"tr", "other", "{0} gün"}, - {"uk", "few", "{0} дні"}, - {"uk", "many", "{0} днів"}, - {"uk", "one", "{0} день"}, - {"uk", "other", "{0} дня"}, - {"ur", "one", "{0} گھنٹہ"}, - {"ur", "other", "{0} گھنٹے"}, - {"vi", "other", "{0} ngày"}, - {"zh", "other", "{0} 天"}, - {"zh_Hant", "other", "{0} 日"}, - {"en", "one", "{0} day"}, // added from spreadsheet - {"en", "other", "{0} days"}, // added from spreadsheet - {"zu", "one", "{0} usuku"}, // added from spreadsheet - {"zu", "other", "{0} izinsuku"}, // added from spreadsheet - }; - - static final Map localeToSamplePatterns = new LinkedHashMap(); - static { - for (String[] row : SAMPLE_PATTERNS) { - ULocale locale = new ULocale(row[0]); - String keyword = row[1]; - String sample = row[2]; - SamplePatterns samplePatterns = localeToSamplePatterns.get(locale); - if (samplePatterns == null) { - localeToSamplePatterns.put(locale, samplePatterns = new SamplePatterns()); - } - samplePatterns.put(keyword, sample); - } - } static final String[][] ORIGINAL_SAMPLES = { {"af", "0, 0.00, 0.000, 0.001, 0.002, 0.01, 0.010, 0.011, 0.02, 0.1, 0.10, 0.11, 0.2, 1.0, 1.00, 1.000, 1.002, 1.010, 1.011, 1.02, 1.10, 1.11, 1.2, 2.0, 2.00, 2.000, 2.001, 2.01, 2.1"}, @@ -431,7 +264,9 @@ public class WritePluralRulesData { } } - public static void showRules() { + public static void showRules() throws IOException { + BufferedWriter writer = getWriter("all-" + SampleStyle.rules + ".tsv"); + if (true) { // for debugging PluralRules rules = PluralRulesFactory.ALTERNATE.forLocale(new ULocale("lv")); @@ -515,11 +350,11 @@ public class WritePluralRulesData { } rulesToLocale.put(temp, locale.toString()); } - System.out.println("Locales\tPC\tOld Rules\tOld Sample Numbers\tNew Rules\tNew Sample Numbers\tInt-Diff"); + writer.write("Locales\tPC\tOld Rules\tOld Sample Numbers\tNew Rules\tNew Sample Numbers\tInt-Diff\n"); for (Entry, Set> entry : rulesToLocale.keyValuesSet()) { String localeList = CollectionUtilities.join(entry.getValue(), " "); for (Entry keywordRulesSamples : entry.getKey().entrySet()) { - System.out.println( + writer.write( localeList // locale + "\t" + keywordRulesSamples.getKey() // keyword + "\t" + keywordRulesSamples.getValue().get0() // rules @@ -527,6 +362,7 @@ public class WritePluralRulesData { + "\t" + keywordRulesSamples.getValue().get2() // rules + "\t" + keywordRulesSamples.getValue().get3() // samples + "\t" + keywordRulesSamples.getValue().get4() // int diff + + "\n" ); localeList = ""; } @@ -540,6 +376,7 @@ public class WritePluralRulesData { System.out.println("{\"" + locale.toString() + "\", \"" + oldRules.toString() + "\"},"); } } + writer.close(); } /** @@ -575,85 +412,56 @@ public class WritePluralRulesData { static final Set NEW_LOCALES = new HashSet(Arrays.asList("az,ka,kk,ky,mk,mn,my,pa,ps,sq,uz".split("\\s*,\\s*"))); - static class SamplePatterns { - final Map keywordToPattern = new TreeMap(PluralRules.KEYWORD_COMPARATOR); - final Map keywordToErrors = new HashMap(); - public void put(String keyword, String sample) { - if (keywordToPattern.containsKey(keyword)) { - throw new IllegalArgumentException("Duplicate keyword <" + keyword + ">"); - } else { - keywordToPattern.put(keyword, sample.replace(" ", "\u00A0")); - } - } - public void checkErrors(Set set) { - final Map skeletonToKeyword = new HashMap(); - for (String keyword : set) { - String error = ""; - String sample = keywordToPattern.get(keyword); - String skeleton = sample.replace(" ", "").replaceAll("\\s*\\{0\\}\\s*", ""); - String oldSkeletonKeyword = skeletonToKeyword.get(skeleton); - if (oldSkeletonKeyword != null) { - if (error.length() != 0) { - error += ", "; - } - error += "Duplicate keyword skeleton <" + keyword + ", " + skeleton + ">, same as for: <" + oldSkeletonKeyword + ">"; - } else { - skeletonToKeyword.put(skeleton, keyword); - } - if (error.length() == 0) { - keywordToErrors.put(keyword, ""); - } else { - keywordToErrors.put(keyword, "\tERROR: " + error); - } - } - } - } - enum SampleStyle {original, modified, verify} + enum SampleStyle {original, samples, rules, verify} static void generateSamples(SampleStyle sampleStyle) throws IOException { LinkedHashSet skippedLocales = new LinkedHashSet(); - System.out.println("Locale\tPC\tPattern\tSamples\tRules\tErrors (" + sampleStyle + ")"); BufferedWriter writer = null; + if (sampleStyle != SampleStyle.verify) { + writer = getWriter("all-" + sampleStyle + ".tsv"); + //writer.write("Plural Category\tEnglish Number\tFormatted Sample\tAcceptable?\tReplacement\n"); + writer.write("Locale\tPC\tPattern\tSamples\tRules\tErrors (" + sampleStyle + ")\n"); + } + for (String localeString : FOCUS_LOCALES) { ULocale locale = new ULocale(localeString); if (sampleStyle == SampleStyle.verify) { - String fileName = TARGETDIR + "fraction-" + locale + ".tsv"; - System.out.println(new File(fileName).getCanonicalPath()); - writer = new BufferedWriter( - new OutputStreamWriter( - new FileOutputStream(fileName), Charset.forName("UTF-8"))); + writer = getWriter("fraction-" + locale + ".tsv"); writer.write("Plural Category\tEnglish Number\tFormatted Sample\tAcceptable?\tReplacement\n"); } NumberFormat nf = NumberFormat.getInstance(new ULocale(locale.toString()+"@numbers=latn")); PluralRules newRules = PluralRulesFactory.ALTERNATE.forLocale(locale); - SamplePatterns samplePatterns = localeToSamplePatterns.get(locale); - if (samplePatterns == null && NEW_LOCALES.contains(localeString)) { - skippedLocales.add(locale); - continue; + SamplePatterns samplePatterns = PluralRulesFactory.getLocaleToSamplePatterns().get(locale); + if (samplePatterns == null) { + samplePatterns = PluralRulesFactory.getLocaleToSamplePatterns().get(new ULocale("und")); } + + // if (samplePatterns == null && NEW_LOCALES.contains(localeString)) { + // skippedLocales.add(locale); + // continue; + // } // check for errors. samplePatterns.checkErrors(newRules.getKeywords()); // now print. for (String keyword : newRules.getKeywords()) { - if (sampleStyle != SampleStyle.modified) { - Collection samples = getSamples(newRules, keyword, - sampleStyle == SampleStyle.verify ? null : locale); + if (sampleStyle != SampleStyle.samples) { + Collection samples = getSamples(newRules, keyword, locale, sampleStyle); for (NumberInfo sample : samples) { String pattern = samplePatterns.keywordToPattern.get(keyword); String str = format(pattern, nf, sample); if (sampleStyle == SampleStyle.verify) { writer.write(keyword + "\t'" + sample + "\t" + str + "\n"); } else { - System.out.println(locale + "\t" + keyword + "\t" + sample + "\t" + str); + writer.write(locale + "\t" + keyword + "\t" + sample + "\t" + str + "\n"); } } continue; } String pattern = null; String error = null; - Collection samples = getSamples(newRules, keyword, null); + Collection samples = getSamples(newRules, keyword, locale, sampleStyle); NumberInfo first = samples.iterator().next(); String sample = "??? " + first.toString(); String rule = ""; @@ -679,32 +487,64 @@ public class WritePluralRulesData { sample = buffer.toString(); } } - System.out.println(locale + "\t" + keyword + writer.write(locale + "\t" + keyword + "\t" + pattern + "\t" + sample + "\t" + rule + error + + "\n" ); } if (sampleStyle == SampleStyle.verify) { writer.close(); } } - System.out.println("SKIP:\t\t\t" + skippedLocales); + if (sampleStyle != SampleStyle.verify) { + if (skippedLocales.size() != 0) { + writer.write("SKIP:\t\t\t" + skippedLocales + "\n"); + } + writer.close(); + } } - private static Collection getSamples(PluralRules newRules, String keyword, ULocale locale) { - if (locale == null) { - return newRules.getFractionSamples(keyword); + private static BufferedWriter getWriter(String filename) { + try { + BufferedWriter writer; + String fileName = TARGETDIR + filename; + System.out.println(new File(fileName).getCanonicalPath()); + writer = new BufferedWriter( + new OutputStreamWriter( + new FileOutputStream(fileName), Charset.forName("UTF-8"))); + return writer; + } catch (FileNotFoundException e) { + throw new IllegalArgumentException(e); + } catch (IOException e) { + throw new IllegalArgumentException(e); } - Collection result = new ArrayList(); - List originals = LOCALE_TO_ORIGINALS.get(locale); - if (originals == null) { - return newRules.getFractionSamples(keyword); + } + + private static Collection getSamples(PluralRules newRules, String keyword, ULocale locale, SampleStyle sampleStyle) { + Set result = new TreeSet(); + Collection extras; + if (sampleStyle == SampleStyle.original) { + extras = LOCALE_TO_ORIGINALS.get(locale); + if (extras != null) { + for (NumberInfo s : extras) { + if (keyword.equals(newRules.select(s))) { + result.add(s); + } + } + } } - for (NumberInfo s : originals) { - if (keyword.equals(newRules.select(s))) { - result.add(s); + extras = PluralRulesFactory.getExtraSamples().get(locale); + if (extras == null) { + extras = PluralRulesFactory.getExtraSamples().get(new ULocale("und")); + } + if (extras != null) { + for (NumberInfo s : extras) { + if (keyword.equals(newRules.select(s))) { + result.add(s); + } } } if (result.size() == 0) {