mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-0 (cldrbug 339) fixed test generation
X-SVN-Rev: 16641
This commit is contained in:
parent
436facf32a
commit
0a3710fa7a
3 changed files with 492 additions and 50 deletions
|
@ -795,8 +795,11 @@ public class BagFormatter {
|
|||
log.println("Creating File: "
|
||||
+ file.getCanonicalPath());
|
||||
}
|
||||
File parent = new File(file.getParent());
|
||||
parent.mkdirs();
|
||||
String parentName = file.getParent();
|
||||
if (parentName != null) {
|
||||
File parent = new File(parentName);
|
||||
parent.mkdirs();
|
||||
}
|
||||
return new PrintWriter(
|
||||
new BufferedWriter(
|
||||
new OutputStreamWriter(
|
||||
|
|
|
@ -8,7 +8,10 @@
|
|||
*/
|
||||
package com.ibm.icu.dev.tool.cldr;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.StringWriter;
|
||||
import java.io.Writer;
|
||||
import java.text.ParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
@ -25,8 +28,16 @@ import java.util.TreeSet;
|
|||
import java.util.regex.Pattern;
|
||||
import java.util.regex.Matcher;
|
||||
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.Node;
|
||||
import org.w3c.dom.NamedNodeMap;
|
||||
import org.w3c.dom.NodeList;
|
||||
import org.w3c.dom.Text;
|
||||
|
||||
import com.ibm.icu.dev.test.util.BagFormatter;
|
||||
import com.ibm.icu.dev.test.util.UnicodeMap;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.text.Collator;
|
||||
import com.ibm.icu.text.DateFormat;
|
||||
|
@ -45,6 +56,8 @@ import com.ibm.icu.util.ULocale;
|
|||
import com.ibm.icu.dev.test.util.Relation;
|
||||
import com.ibm.icu.dev.test.util.SortedBag;
|
||||
import com.ibm.icu.dev.tool.UOption;
|
||||
import com.ibm.icu.dev.tool.cldr.ICUResourceWriter.Resource;
|
||||
import com.ibm.icu.dev.tool.cldr.ICUResourceWriter.ResourceTable;
|
||||
|
||||
/**
|
||||
* Generated tests for CLDR. Currently, these are driven off of a version of ICU4J, and just
|
||||
|
@ -59,29 +72,35 @@ public class GenerateCldrTests {
|
|||
private static final int
|
||||
HELP1 = 0,
|
||||
HELP2 = 1,
|
||||
DESTDIR = 3,
|
||||
LOGDIR = 3;
|
||||
DESTDIR = 2,
|
||||
LOGDIR = 3,
|
||||
SOURCEDIR =4,
|
||||
MATCH = 5;
|
||||
|
||||
private static final UOption[] options = {
|
||||
UOption.HELP_H(),
|
||||
UOption.HELP_QUESTION_MARK(),
|
||||
UOption.DESTDIR().setDefault("C:\\ICU4C\\locale\\common\\test\\"),
|
||||
UOption.create("log", 'l', UOption.REQUIRES_ARG).setDefault("")
|
||||
UOption.DESTDIR().setDefault("C:\\DATA\\GEN\\cldr\\test\\"),
|
||||
UOption.create("log", 'l', UOption.REQUIRES_ARG).setDefault("C:\\DATA\\GEN\\cldr\\test\\"),
|
||||
UOption.SOURCEDIR().setDefault("C:\\ICU4C\\locale\\common\\"),
|
||||
UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"),
|
||||
};
|
||||
|
||||
CldrCollations cldrCollations;
|
||||
static String logDir = null, destDir = null;
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
UOption.parseArgs(args, options);
|
||||
|
||||
log = BagFormatter.openUTF8Writer(options[LOGDIR].value, "log.txt");
|
||||
try {
|
||||
|
||||
//compareAvailable();
|
||||
//if (true) return;
|
||||
//System.out.println(createCaseClosure(new UnicodeSet("[a{bc}{def}{oss}]")));
|
||||
//System.out.println(createCaseClosure(new UnicodeSet("[a-zß{aa}]")));
|
||||
GenerateCldrTests t = new GenerateCldrTests();
|
||||
//t.generate(new ULocale("hu"), null);
|
||||
t.generate(".*");
|
||||
t.generate(options[MATCH].value);
|
||||
/*
|
||||
t.generate(new ULocale("da"));
|
||||
t.generate(new ULocale("hu"));
|
||||
|
@ -90,12 +109,14 @@ public class GenerateCldrTests {
|
|||
*/
|
||||
} finally {
|
||||
log.close();
|
||||
System.out.println("Done");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
/*
|
||||
private static void compareAvailable() {
|
||||
ULocale[] cols = Collator.getAvailableULocales();
|
||||
Locale[] alocs = NumberFormat.getAvailableLocales();
|
||||
|
@ -110,6 +131,7 @@ public class GenerateCldrTests {
|
|||
log.println("collation - main");
|
||||
showLocales(sCols);
|
||||
}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @param sLocs
|
||||
|
@ -147,12 +169,18 @@ public class GenerateCldrTests {
|
|||
ULocale parent = new ULocale(lang);
|
||||
//System.out.println(item + ", " + parent);
|
||||
parentToLocales.add(parent, item);
|
||||
String rules = ((RuleBasedCollator)Collator.getInstance(item)).getRules();
|
||||
/*
|
||||
RuleBasedCollator col = cldrCollations.getInstance(item);
|
||||
if (col == null) {
|
||||
System.out.println("No collator for: " + item);
|
||||
}
|
||||
String rules = col.getRules(); // ((RuleBasedCollator)Collator.getInstance(item)).getRules();
|
||||
rulesToLocales.add(rules, item);
|
||||
localesToRules.put(item, rules);
|
||||
*/
|
||||
}
|
||||
|
||||
Set collationLocales = addULocales(Collator.getAvailableULocales(), new TreeSet(ULocaleComparator));
|
||||
Set collationLocales; // = addULocales(Collator.getAvailableULocales(), new TreeSet(ULocaleComparator));
|
||||
Set numberLocales = addULocales(NumberFormat.getAvailableLocales(), new TreeSet(ULocaleComparator));
|
||||
Set dateLocales = addULocales(DateFormat.getAvailableLocales(), new TreeSet(ULocaleComparator));
|
||||
Set allLocales = new TreeSet(ULocaleComparator);
|
||||
|
@ -162,7 +190,10 @@ public class GenerateCldrTests {
|
|||
Relation rulesToLocales = new Relation(new TreeMap(), cm);
|
||||
Relation parentToLocales = new Relation(new TreeMap(ULocaleComparator), cm);
|
||||
|
||||
{
|
||||
void getLocaleList() {
|
||||
collationLocales = new TreeSet(ULocaleComparator);
|
||||
collationLocales.addAll(cldrCollations.getAvailableSet());
|
||||
/*
|
||||
collationLocales = addULocales(new String[] { // HACK
|
||||
"ga",
|
||||
"nl",
|
||||
|
@ -174,6 +205,7 @@ public class GenerateCldrTests {
|
|||
"zh@collation=stroke",
|
||||
"zh@collation=traditional",
|
||||
}, collationLocales);
|
||||
*/
|
||||
allLocales.addAll(collationLocales);
|
||||
allLocales.addAll(numberLocales);
|
||||
allLocales.addAll(dateLocales);
|
||||
|
@ -198,11 +230,16 @@ public class GenerateCldrTests {
|
|||
*/
|
||||
}
|
||||
|
||||
/**
|
||||
* @param item
|
||||
*/
|
||||
|
||||
CldrOthers cldrOthers;
|
||||
|
||||
void generate(String pat) throws Exception {
|
||||
cldrOthers = new CldrOthers(options[SOURCEDIR].value + "main" + File.separator, pat);
|
||||
cldrOthers.show();
|
||||
//if (true) return;
|
||||
cldrCollations = new CldrCollations(options[SOURCEDIR].value + "collation" + File.separator, pat);
|
||||
cldrCollations.show();
|
||||
getLocaleList();
|
||||
|
||||
Matcher m = Pattern.compile(pat).matcher("");
|
||||
for (Iterator it = parentToLocales.keySet().iterator(); it.hasNext();) {
|
||||
String p = it.next().toString();
|
||||
|
@ -214,15 +251,15 @@ public class GenerateCldrTests {
|
|||
private void generate(ULocale locale) throws Exception {
|
||||
out = BagFormatter.openUTF8Writer(options[DESTDIR].value, locale + ".xml");
|
||||
out.println("<?xml version='1.0' encoding='UTF-8' ?>");
|
||||
out.println("<!DOCTYPE ldml SYSTEM 'cldrTest.dtd'>");
|
||||
out.println("<!DOCTYPE ldml SYSTEM 'http://www.unicode.org/cldr/dtd/1.2/beta/cldrTest.dtd'>");
|
||||
out.println("<!-- For information, see readme.html -->");
|
||||
out.println(" <cldrTest version='1.2α' base='" + locale + "'>");
|
||||
out.println(" <cldrTest version='1.2' base='" + locale + "'>");
|
||||
out.println(" <!-- " + BagFormatter.toHTML.transliterate(
|
||||
locale.getDisplayName(ULocale.ENGLISH) + " ["
|
||||
+ locale.getDisplayName(locale))
|
||||
+ "] -->");
|
||||
generateItems(locale, numberLocales, NumberEquator, NumberShower);
|
||||
generateItems(locale, dateLocales, DateEquator, DateShower);
|
||||
//generateItems(locale, numberLocales, NumberEquator, NumberShower);
|
||||
//generateItems(locale, dateLocales, DateEquator, DateShower);
|
||||
generateItems(locale, collationLocales, CollationEquator, CollationShower);
|
||||
out.println(" </cldrTest>");
|
||||
out.close();
|
||||
|
@ -272,7 +309,7 @@ public class GenerateCldrTests {
|
|||
*/
|
||||
void add(ULocale locale, Map uniqueLocales) {
|
||||
try {
|
||||
RuleBasedCollator col = (RuleBasedCollator) Collator.getInstance(locale);
|
||||
RuleBasedCollator col = cldrCollations.getInstance(locale); // (RuleBasedCollator) Collator.getInstance(locale);
|
||||
// for our purposes, separate locales if we are using different exemplars
|
||||
String key = col.getRules() + "\uFFFF" + getExemplarSet(locale, 0);
|
||||
Set s = (Set) uniqueLocales.get(key);
|
||||
|
@ -297,7 +334,8 @@ public class GenerateCldrTests {
|
|||
String n = locale.toString();
|
||||
int pos = n.indexOf('@');
|
||||
if (pos >= 0) locale = new ULocale(n.substring(0,pos));
|
||||
UnicodeSet result = LocaleData.getExemplarSet(locale, options);
|
||||
UnicodeSet result = cldrOthers.getExemplarSet(locale); // LocaleData.getExemplarSet(locale, options);
|
||||
if (options == 0) result.closeOver(UnicodeSet.CASE);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -547,15 +585,14 @@ public class GenerateCldrTests {
|
|||
|
||||
// ========== COLLATION ==========
|
||||
|
||||
static Equator CollationEquator = new Equator() {
|
||||
Equator CollationEquator = new Equator() {
|
||||
/**
|
||||
* Must both be ULocales
|
||||
*/
|
||||
public boolean equals(Object o1, Object o2) {
|
||||
ULocale loc1 = (ULocale) o1;
|
||||
ULocale loc2 = (ULocale) o2;
|
||||
return Collator.getInstance(loc1).equals(
|
||||
Collator.getInstance(loc2));
|
||||
return cldrCollations.getInstance(loc1).equals(cldrCollations.getInstance(loc2)); // Collator.getInstance(loc1).equals(Collator.getInstance(loc2));
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -563,7 +600,7 @@ public class GenerateCldrTests {
|
|||
public void show(ULocale locale, Collection others) {
|
||||
showLocales("collation", others);
|
||||
|
||||
Collator col = Collator.getInstance(locale);
|
||||
Collator col = cldrCollations.getInstance(locale); // Collator.getInstance(locale);
|
||||
|
||||
UnicodeSet tailored = col.getTailoredSet();
|
||||
if (locale.getLanguage().equals("zh")) {
|
||||
|
@ -584,21 +621,23 @@ public class GenerateCldrTests {
|
|||
exemplars = nfc(exemplars);
|
||||
System.out.println(exemplars.toPattern(true));
|
||||
tailored.addAll(exemplars);
|
||||
UnicodeSet tailoredMinusHan = new UnicodeSet(tailored).removeAll(
|
||||
new UnicodeSet("[:script=han:]"));
|
||||
if (!exemplars.containsAll(tailoredMinusHan)) {
|
||||
//UnicodeSet tailoredMinusHan = new UnicodeSet(tailored).removeAll(SKIP_COLLATION_SET);
|
||||
if (!exemplars.containsAll(tailored)) {
|
||||
//BagFormatter bf = new BagFormatter();
|
||||
log.println("In Tailored, but not Exemplar; Locale: " + locale + "\t" + locale.getDisplayName());
|
||||
log.println(new UnicodeSet(tailoredMinusHan).removeAll(exemplars).toPattern(false));
|
||||
log.println(new UnicodeSet(tailored).removeAll(exemplars).toPattern(false));
|
||||
//bf.(log,"tailored", tailored, "exemplars", exemplars);
|
||||
log.flush();
|
||||
}
|
||||
tailored.addAll(new UnicodeSet("[\\ .02{12}]"));
|
||||
tailored.removeAll(SKIP_COLLATION_SET);
|
||||
|
||||
SortedBag bag = new SortedBag(col);
|
||||
doCollationResult(col, tailored, bag);
|
||||
out.println(" </collation>");
|
||||
}};
|
||||
static final UnicodeSet SKIP_COLLATION_SET = new UnicodeSet(
|
||||
"[[:script=han:][:script=hangul:]-[\u4e00-\u4eff \u9f00-\u9fff \uac00-\uacff \ud700-\ud7ff]]");
|
||||
|
||||
/**
|
||||
* @param col
|
||||
|
@ -638,6 +677,272 @@ public class GenerateCldrTests {
|
|||
out.println(" </result>");
|
||||
}
|
||||
|
||||
static public Set getMatchingXMLFiles(String sourceDir, String localeRegex) {
|
||||
Matcher m = Pattern.compile(localeRegex).matcher("");
|
||||
Set s = new TreeSet();
|
||||
File[] files = new File(sourceDir).listFiles();
|
||||
for (int i = 0; i < files.length; ++i) {
|
||||
String name = files[i].getName();
|
||||
if (!name.endsWith(".xml")) continue;
|
||||
String locale = name.substring(0,name.length()-4); // drop .xml
|
||||
if (!locale.equals("root") && !m.reset(locale).matches()) continue;
|
||||
s.add(locale);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
static class CldrOthers {
|
||||
Map ulocale_exemplars = new TreeMap(ULocaleComparator);
|
||||
Map uniqueExemplars = new HashMap();
|
||||
String sourceDir;
|
||||
Set locales = new TreeSet(ULocaleComparator);
|
||||
|
||||
UnicodeSet getExemplarSet(ULocale locale) {
|
||||
return (UnicodeSet) ulocale_exemplars.get(locale);
|
||||
}
|
||||
|
||||
void show() {
|
||||
log.println("Showing Locales");
|
||||
log.println("Unique Exemplars: " + uniqueExemplars.size());
|
||||
for (Iterator it2 = ulocale_exemplars.keySet().iterator(); it2.hasNext();) {
|
||||
ULocale locale = (ULocale) it2.next();
|
||||
UnicodeSet us = getExemplarSet(locale);
|
||||
log.println("\t" + locale + ", " + us);
|
||||
}
|
||||
}
|
||||
static final ULocale ROOT = new ULocale("root"); // since CLDR has different root.
|
||||
|
||||
CldrOthers(String sourceDir, String localeRegex) {
|
||||
this.sourceDir = sourceDir;
|
||||
Set s = getMatchingXMLFiles(sourceDir, localeRegex);
|
||||
for (Iterator it = s.iterator(); it.hasNext();) {
|
||||
getInfo((String) it.next());
|
||||
}
|
||||
// now do inheritance manually
|
||||
for (Iterator it = locales.iterator(); it.hasNext();) {
|
||||
ULocale locale = (ULocale) it.next();
|
||||
UnicodeSet ex = (UnicodeSet) ulocale_exemplars.get(locale);
|
||||
if (ex != null) continue;
|
||||
for (ULocale parent = locale.getFallback(); parent != null; parent = parent.getFallback()) {
|
||||
ULocale fixedParent = parent.getLanguage().length() == 0 ? ROOT : parent;
|
||||
ex = (UnicodeSet) ulocale_exemplars.get(fixedParent);
|
||||
if (ex == null) continue;
|
||||
ulocale_exemplars.put(locale, ex);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
void getInfo(String locale) {
|
||||
//System.out.println(locale);
|
||||
locales.add(new ULocale(locale));
|
||||
// Document doc = LDMLUtilities.getFullyResolvedLDML(sourceDir, locale, false, false, false);
|
||||
Document doc = LDMLUtilities.parse(sourceDir + locale + ".xml", false);
|
||||
Node node = LDMLUtilities.getNode(doc, "//ldml/characters/exemplarCharacters");
|
||||
if (node == null) return;
|
||||
if (isDraft(node)) System.out.println("Skipping draft: " + locale + ", " + getXPath(node));
|
||||
String exemplars = LDMLUtilities.getNodeValue(node);
|
||||
UnicodeSet exemplarSet = new UnicodeSet(exemplars);
|
||||
UnicodeSet fixed = (UnicodeSet) uniqueExemplars.get(exemplarSet);
|
||||
if (fixed == null) {
|
||||
uniqueExemplars.put(exemplarSet, exemplarSet);
|
||||
fixed = exemplarSet;
|
||||
}
|
||||
ulocale_exemplars.put(new ULocale(locale), fixed);
|
||||
}
|
||||
}
|
||||
|
||||
public static boolean isDraft(Node node) {
|
||||
for (; node.getNodeType() != Node.DOCUMENT_NODE; node = node.getParentNode()){
|
||||
NamedNodeMap attributes = node.getAttributes();
|
||||
if (attributes == null) continue;
|
||||
for (int i = 0; i < attributes.getLength(); ++i) {
|
||||
Node attribute = attributes.item(i);
|
||||
if (attribute.getNodeName().equals("draft") && attribute.getNodeValue().equals("true")) return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public static String getXPath(Node node) {
|
||||
StringBuffer xpathFragment = new StringBuffer();
|
||||
StringBuffer xpath = new StringBuffer();
|
||||
for (; node.getNodeType() != Node.DOCUMENT_NODE; node = node.getParentNode()){
|
||||
xpathFragment.setLength(0);
|
||||
xpathFragment.append('/').append(node.getNodeName());
|
||||
NamedNodeMap attributes = node.getAttributes();
|
||||
if (attributes != null) {
|
||||
for (int i = 0; i < attributes.getLength(); ++i) {
|
||||
Node attribute = attributes.item(i);
|
||||
xpathFragment.append("[@").append(attribute.getNodeName()).append('=')
|
||||
.append(attribute.getNodeValue()).append(']');
|
||||
}
|
||||
}
|
||||
xpath.insert(0, xpathFragment);
|
||||
}
|
||||
xpath.insert(0, '/');
|
||||
return xpath.toString();
|
||||
}
|
||||
|
||||
public static String getParent(String locale) {
|
||||
int pos = locale.lastIndexOf('_');
|
||||
if (pos >= 0) {
|
||||
return locale.substring(0,pos);
|
||||
}
|
||||
if (!locale.equals("root")) return "root";
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
static class CldrCollations {
|
||||
Set validLocales = new TreeSet();
|
||||
Map ulocale_rules = new TreeMap(ULocaleComparator);
|
||||
Map locale_types_rules = new TreeMap();
|
||||
String sourceDir;
|
||||
Map collation_collation = new HashMap();
|
||||
RuleBasedCollator emptyCollator = (RuleBasedCollator) Collator.getInstance(new ULocale(""));
|
||||
|
||||
public Set getAvailableSet() {
|
||||
return ulocale_rules.keySet();
|
||||
}
|
||||
|
||||
public RuleBasedCollator getInstance(ULocale locale) {
|
||||
return (RuleBasedCollator) ulocale_rules.get(locale);
|
||||
}
|
||||
|
||||
void show() {
|
||||
log.println("Showing Locales");
|
||||
log.println("Unique Collators: " + collation_collation.size());
|
||||
for (Iterator it2 = ulocale_rules.keySet().iterator(); it2.hasNext();) {
|
||||
ULocale locale = (ULocale) it2.next();
|
||||
RuleBasedCollator col = (RuleBasedCollator) ulocale_rules.get(locale);
|
||||
log.println("\t" + locale + ", " + col.getRules());
|
||||
}
|
||||
}
|
||||
|
||||
CldrCollations(String sourceDir, String localeRegex) throws Exception {
|
||||
this.sourceDir = sourceDir;
|
||||
Set s = getMatchingXMLFiles(sourceDir, localeRegex);
|
||||
for (Iterator it = s.iterator(); it.hasNext();) {
|
||||
getCollationRules((String) it.next());
|
||||
}
|
||||
|
||||
// now fixup the validLocales, adding in what they inherit
|
||||
// TODO, add check: validSubLocales are masked by intervening locales.
|
||||
for (Iterator it = validLocales.iterator(); it.hasNext(); ) {
|
||||
String locale = (String) it.next();
|
||||
Map types_rules = (Map) locale_types_rules.get(locale);
|
||||
if (types_rules != null) log.println("Weird: overlap in validLocales: " + locale);
|
||||
else {
|
||||
for (String parentlocale = getParent(locale); parentlocale != null; locale = getParent(parentlocale)) {
|
||||
types_rules = (Map) locale_types_rules.get(parentlocale);
|
||||
if (types_rules != null) {
|
||||
locale_types_rules.put(locale, types_rules);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// now generate the @-style locales
|
||||
for (Iterator it = locale_types_rules.keySet().iterator(); it.hasNext(); ) {
|
||||
String locale = (String) it.next();
|
||||
Map types_rules = (Map) locale_types_rules.get(locale);
|
||||
for (Iterator it2 = types_rules.keySet().iterator(); it2.hasNext(); ) {
|
||||
String type = (String) it2.next();
|
||||
RuleBasedCollator col = (RuleBasedCollator) types_rules.get(type);
|
||||
String name = type.equals("standard") ? locale : locale + "@collation=" + type;
|
||||
ulocale_rules.put(new ULocale(name), col);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static String replace(String source, String pattern, String replacement) {
|
||||
// dumb code for now
|
||||
for (int pos = source.indexOf(pattern, 0); pos >= 0; pos = source.indexOf(pattern, pos + 1)) {
|
||||
source = source.substring(0, pos) + replacement + source.substring(pos+pattern.length());
|
||||
}
|
||||
return source;
|
||||
}
|
||||
|
||||
private void getCollationRules(String locale) throws Exception {
|
||||
System.out.println(locale);
|
||||
Document doc = LDMLUtilities.getFullyResolvedLDML(sourceDir, locale, false, false, false);
|
||||
Node node = LDMLUtilities.getNode(doc, "//ldml/collations");
|
||||
LDML2ICUConverter cnv = new LDML2ICUConverter();
|
||||
StringBuffer stringBuffer = new StringBuffer();
|
||||
ICUResourceWriter.ResourceTable resource = (ICUResourceWriter.ResourceTable) cnv.parseCollations(node, stringBuffer);
|
||||
Map types_rules = new TreeMap();
|
||||
locale_types_rules.put(locale, types_rules);
|
||||
for (Resource current = resource.first; current != null; current = current.next) {
|
||||
//System.out.println(current.name);
|
||||
if (current instanceof ICUResourceWriter.ResourceTable) {
|
||||
ICUResourceWriter.ResourceTable table = (ICUResourceWriter.ResourceTable) current;
|
||||
for (Resource current2 = table.first; current2 != null; current2 = current2.next) {
|
||||
if (current2 instanceof ICUResourceWriter.ResourceString) {
|
||||
ICUResourceWriter.ResourceString foo = (ICUResourceWriter.ResourceString) current2;
|
||||
//System.out.println("\t" + foo.name + ", " + foo.val);
|
||||
/* skip since the utilities have the wrong value
|
||||
if (current.name.equals("validSubLocales")) {
|
||||
// skip since it is wrong
|
||||
log.println("Valid Sub Locale: " + foo.name);
|
||||
validLocales.add(foo.name);
|
||||
} else
|
||||
*/
|
||||
if (foo.name.equals("Sequence")) {
|
||||
String rules = foo.val;
|
||||
RuleBasedCollator fixed = generateCollator(locale, current.name, foo.name, foo.val);
|
||||
if (fixed != null) types_rules.put(current.name, fixed);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
//current.write(System.out,0,false);
|
||||
}
|
||||
// now get the valid sublocales
|
||||
Document doc2 = LDMLUtilities.parse(sourceDir + locale + ".xml", false);
|
||||
Node colls = LDMLUtilities.getNode(doc2,"//ldml/collations");
|
||||
String validSubLocales = LDMLUtilities.getAttributeValue(colls, "validSubLocales");
|
||||
if (validSubLocales != null) {
|
||||
String items[] = new String[100]; // allocate plenty
|
||||
Utility.split(validSubLocales, ' ', items);
|
||||
for (int i = 0; items[i].length() != 0; ++i) {
|
||||
log.println("Valid Sub Locale: " + items[i]);
|
||||
validLocales.add(items[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param locale
|
||||
* @param current
|
||||
* @param foo
|
||||
* @param rules
|
||||
*/
|
||||
private RuleBasedCollator generateCollator(String locale, String current, String foo, String rules) {
|
||||
RuleBasedCollator fixed = null;
|
||||
try {
|
||||
if (rules.equals("")) fixed = emptyCollator;
|
||||
else {
|
||||
rules = replace(rules, "[optimize[", "[optimize [");
|
||||
rules = replace(rules, "[suppressContractions[", "[suppressContractions [");
|
||||
RuleBasedCollator col = new RuleBasedCollator(rules);
|
||||
fixed = (RuleBasedCollator) collation_collation.get(col);
|
||||
if (fixed == null) {
|
||||
collation_collation.put(col, col);
|
||||
fixed = col;
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.println("***Cannot create collator from: " + locale + ", " + current + ", " + foo + ", " + rules);
|
||||
e.printStackTrace(log);
|
||||
RuleBasedCollator coll = (RuleBasedCollator)Collator.getInstance(new ULocale(locale));
|
||||
String oldrules = coll.getRules();
|
||||
log.println("Old ICU4J: " + oldrules);
|
||||
log.println("Equal?: " + oldrules.equals(rules));
|
||||
}
|
||||
return fixed;
|
||||
}
|
||||
}
|
||||
// ========== UNICODESET UTILITIES ==========
|
||||
|
||||
public static interface Apply {
|
||||
|
|
|
@ -42,8 +42,12 @@ import com.ibm.icu.dev.test.util.BagFormatter;
|
|||
import com.ibm.icu.dev.tool.UOption;
|
||||
import com.ibm.icu.impl.ICUResourceBundle;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.text.Collator;
|
||||
import com.ibm.icu.text.Normalizer;
|
||||
import com.ibm.icu.text.RuleBasedCollator;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
import com.ibm.icu.util.UResourceBundle;
|
||||
|
||||
|
@ -63,7 +67,8 @@ import com.ibm.icu.util.UResourceBundle;
|
|||
static final boolean DEBUG = false;
|
||||
static final boolean DEBUG2 = false;
|
||||
static final boolean DEBUG_SHOW_ADD = false;
|
||||
static final boolean DEBUG_ELEMENT = true;
|
||||
static final boolean DEBUG_ELEMENT = false;
|
||||
static final boolean DEBUG_SHOW_BAT = false;
|
||||
|
||||
static final boolean FIX_ZONE_ALIASES = true;
|
||||
|
||||
|
@ -117,6 +122,7 @@ import com.ibm.icu.util.UResourceBundle;
|
|||
// if (baseName.equals("root")) temp.addMissing();
|
||||
|
||||
temp.writeTo(options[DESTDIR].value, baseName);
|
||||
generateBat(options[SOURCEDIR].value, baseName + ".xml", options[DESTDIR].value, baseName + ".xml");
|
||||
sidewaysView.putData(temp.data, baseName);
|
||||
log.flush();
|
||||
}
|
||||
|
@ -298,9 +304,9 @@ import com.ibm.icu.util.UResourceBundle;
|
|||
EndNode value = (EndNode) data.get(key);
|
||||
GenerateSidewaysView parent = (GenerateSidewaysView) toRemove.get(key);
|
||||
EndNode parentValue = (EndNode) parent.data.get(key);
|
||||
log.println("Removing " + key.toString(true, 0) + "\t" + value);
|
||||
log.println("Removing " + key.toString(true, 0, Integer.MAX_VALUE) + "\t" + value);
|
||||
ElementChain parentKey = (ElementChain) parent.data.getKeyFor(key);
|
||||
log.println("\tIn " + parent.filename + ":\t" + parentKey.toString(true, 0) + "\t"+ parentValue);
|
||||
log.println("\tIn " + parent.filename + ":\t" + parentKey.toString(true, 0, Integer.MAX_VALUE) + "\t"+ parentValue);
|
||||
data.remove(key);
|
||||
}
|
||||
}
|
||||
|
@ -434,6 +440,68 @@ import com.ibm.icu.util.UResourceBundle;
|
|||
return buffer.toString();
|
||||
}
|
||||
|
||||
static void generateBat(String sourceDir, String sourceFile, String targetDir, String targetFile) {
|
||||
boolean needBat = true;
|
||||
try {
|
||||
BufferedReader b1 = BagFormatter.openUTF8Reader(sourceDir, sourceFile);
|
||||
BufferedReader b2 = BagFormatter.openUTF8Reader(targetDir, targetFile);
|
||||
while (true) {
|
||||
String line1 = b1.readLine();
|
||||
String line2 = b2.readLine();
|
||||
if (line1 == null && line2 == null) {
|
||||
needBat = false;
|
||||
break;
|
||||
}
|
||||
if (line1 == null || line2 == null) {
|
||||
if (DEBUG_SHOW_BAT) System.out.println("*File line counts differ: ");
|
||||
break;
|
||||
}
|
||||
if (!equalsIgnoringWhitespace(line1, line2)) {
|
||||
if (DEBUG_SHOW_BAT) {
|
||||
System.out.println("*File lines differ: ");
|
||||
System.out.println("\t1\t" + line1);
|
||||
System.out.println("\t2\t" + line2);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
b1.close();
|
||||
b2.close();
|
||||
String batDir = targetDir + File.separator + "diff" + File.separator;
|
||||
String batName = targetFile + ".bat";
|
||||
if (needBat) {
|
||||
PrintWriter bat = BagFormatter.openUTF8Writer(batDir, batName);
|
||||
bat.println("\"C:\\Program Files\\Compare It!\\wincmp3.exe\" " +
|
||||
new File(sourceDir + sourceFile).getCanonicalPath() + " " +
|
||||
new File(targetDir + targetFile).getCanonicalPath());
|
||||
bat.close();
|
||||
} else {
|
||||
File f = new File(batDir + batName);
|
||||
if (f.exists()) {
|
||||
if (DEBUG_SHOW_BAT) System.out.println("*Deleting old " + f.getCanonicalPath());
|
||||
f.delete();
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
}
|
||||
|
||||
static boolean equalsIgnoringWhitespace(String a, String b) {
|
||||
int i = 0;
|
||||
int j = 0;
|
||||
char c, d;
|
||||
while (true) { // don't worry about surrogates
|
||||
do {c = i < a.length() ? a.charAt(i++) : 0xFFFF;} while (UCharacter.isUWhiteSpace(c));
|
||||
do {d = j < b.length() ? b.charAt(j++) : 0xFFFF;} while (UCharacter.isUWhiteSpace(d));
|
||||
if (c != d) return false;
|
||||
if (c == 0xFFFF) return true;
|
||||
}
|
||||
}
|
||||
|
||||
static class SimpleAttribute implements Comparable {
|
||||
String name;
|
||||
|
@ -493,8 +561,7 @@ import com.ibm.icu.util.UResourceBundle;
|
|||
|
||||
// hack to removed #IMPLIED
|
||||
if (elementName.equals("ldml")
|
||||
&& name.equals("version")
|
||||
&& value.equals("1.1")) continue;
|
||||
&& name.equals("version")) continue; // skip version
|
||||
if (name.equals("type")
|
||||
&& value.equals("standard")) continue;
|
||||
|
||||
|
@ -588,6 +655,18 @@ import com.ibm.icu.util.UResourceBundle;
|
|||
public void add(SimpleAttributes attributes) {
|
||||
contents.addAll(attributes.contents);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ignorelist
|
||||
*/
|
||||
public void removeAttributes(Set ignorelist) {
|
||||
for (Iterator it = contents.iterator(); it.hasNext();) {
|
||||
SimpleAttribute sa = (SimpleAttribute) it.next();
|
||||
if (ignorelist.contains(sa.name)) {
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class TripleData {
|
||||
|
@ -932,6 +1011,13 @@ import com.ibm.icu.util.UResourceBundle;
|
|||
return;
|
||||
}
|
||||
*/
|
||||
/**
|
||||
* @param ignorelist
|
||||
* @return
|
||||
*/
|
||||
public void removeAttributes(Set ignorelist) {
|
||||
attributes.removeAttributes(ignorelist);
|
||||
}
|
||||
}
|
||||
|
||||
private void writeElementComment(StringBuffer out, String comment, int common) {
|
||||
|
@ -1004,12 +1090,14 @@ import com.ibm.icu.util.UResourceBundle;
|
|||
|
||||
public String toString() {
|
||||
//throw new IllegalArgumentException("Don't use");
|
||||
return toString(true, 0);
|
||||
return toString(true, 0, Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
public String toString(boolean path, int startLevel) {
|
||||
public String toString(boolean path, int startLevel, int limitLevel) {
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
for (int i = startLevel; i < contexts.size(); ++i) {
|
||||
if (startLevel < 0) startLevel = 0;
|
||||
if (limitLevel > contexts.size()) limitLevel = contexts.size();
|
||||
for (int i = startLevel; i < limitLevel; ++i) {
|
||||
//if (i != 0) buffer.append(' ');
|
||||
Element e = (Element) contexts.get(i);
|
||||
if (path) buffer.append("/" + e.toString(Element.PATH));
|
||||
|
@ -1095,6 +1183,19 @@ import com.ibm.icu.util.UResourceBundle;
|
|||
return (Element) contexts.get(contexts.size()-1);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ignorelist
|
||||
* @return
|
||||
*/
|
||||
public ElementChain createRemovingAttributes(Set ignorelist) {
|
||||
ElementChain result = new ElementChain(this);
|
||||
for (int i = 0; i < contexts.size(); ++i) {
|
||||
Element e = (Element)contexts.get(i);
|
||||
e.removeAttributes(ignorelist);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param comment
|
||||
*/
|
||||
|
@ -1377,9 +1478,11 @@ import com.ibm.icu.util.UResourceBundle;
|
|||
Map contextCache = new TreeMap();
|
||||
Set fileNames = new TreeSet();
|
||||
Set allTypes = new TreeSet();
|
||||
|
||||
void putData(OrderedMap data, String filename) {
|
||||
for (Iterator it = data.iterator(); it.hasNext();) {
|
||||
ElementChain copy = (ElementChain) it.next();
|
||||
ElementChain original = (ElementChain) it.next();
|
||||
ElementChain copy = original.createRemovingAttributes(IGNORELIST);
|
||||
EndNode endNode = (EndNode)data.get(copy);
|
||||
Map dataToFile = (Map)contextCache.get(copy);
|
||||
if (dataToFile == null) {
|
||||
|
@ -1397,12 +1500,21 @@ import com.ibm.icu.util.UResourceBundle;
|
|||
if (filename.indexOf('_') < 0
|
||||
|| filename.equals("zh_Hant")) fileNames.add(filename); // add all language-only locales
|
||||
}
|
||||
|
||||
String getChainName(ElementChain ec) {
|
||||
Element e = (Element)ec.contexts.get(1);
|
||||
|
||||
int getChainDepth(ElementChain ec) {
|
||||
Element e = (Element)ec.contexts.get(1);
|
||||
String result = e.elementName;
|
||||
if (result.equals("numbers") || result.equals("localeDisplayNames") || result.equals("dates")) {
|
||||
e = (Element)ec.contexts.get(2);
|
||||
return 3;
|
||||
}
|
||||
return 2;
|
||||
}
|
||||
|
||||
String getChainName(ElementChain ec, int limit) {
|
||||
Element e = (Element)ec.contexts.get(1);
|
||||
String result = e.elementName;
|
||||
for (int i = 2; i < limit; ++i) {
|
||||
e = (Element)ec.contexts.get(i);
|
||||
result += "_" + e.elementName;
|
||||
}
|
||||
return result;
|
||||
|
@ -1426,7 +1538,8 @@ import com.ibm.icu.util.UResourceBundle;
|
|||
int lineCounter = 1;
|
||||
for (Iterator it = contextCache.keySet().iterator(); it.hasNext();) {
|
||||
ElementChain stack = (ElementChain) it.next();
|
||||
String chainName = getChainName(stack);
|
||||
int limit = getChainDepth(stack);
|
||||
String chainName = getChainName(stack, limit);
|
||||
if (!chainName.equals(lastChainName)) {
|
||||
if (out != null) {
|
||||
out.println("</table>");
|
||||
|
@ -1435,16 +1548,16 @@ import com.ibm.icu.util.UResourceBundle;
|
|||
allTypes.add(chainName); // add to the list
|
||||
out = openAndDoHeader(chainName);
|
||||
lastChainName = chainName;
|
||||
lineCounter = 1;
|
||||
lineCounter = 0;
|
||||
}
|
||||
String key = stack.toString(true, 1);
|
||||
String key = stack.toString(true, limit, Integer.MAX_VALUE);
|
||||
// strip /ldml@version="1.2"/;
|
||||
|
||||
lineCounter++;
|
||||
out.println("<tr><td colspan='2' class='head'>" +
|
||||
"<a href='#" + lineCounter + "' name='" + lineCounter + "'>"
|
||||
+ lineCounter + "</a> " +
|
||||
BagFormatter.toHTML.transliterate(key) + "</td></tr>");
|
||||
lineCounter++;
|
||||
Map dataToFile = (Map) contextCache.get(stack);
|
||||
// walk through once, and gather all the filenames
|
||||
Set remainingFiles = new TreeSet(fileNames);
|
||||
|
@ -1463,7 +1576,9 @@ import com.ibm.icu.util.UResourceBundle;
|
|||
files.addAll(remainingFiles);
|
||||
dataStyle = " class='nodata'";
|
||||
}
|
||||
out.print("<tr><th" + dataStyle + ">\"" + data + "\"</th><td>");
|
||||
out.print("<tr><th" + dataStyle +
|
||||
(lineCounter == 1 ? " width='20%'" : "")
|
||||
+ ">\"" + data + "\"</th><td>");
|
||||
boolean first = true;
|
||||
for (Iterator it3 = files.iterator(); it3.hasNext();) {
|
||||
if (first) first = false;
|
||||
|
@ -1503,7 +1618,20 @@ import com.ibm.icu.util.UResourceBundle;
|
|||
out.println("<link rel='stylesheet' type='text/css' href='by_type.css'>");
|
||||
out.println("</head>");
|
||||
out.println("<body>");
|
||||
out.println("<ul><li><a href=\"index.html\">Index</a></li></ul>");
|
||||
out.println("<h2>By-Type Chart for " + "//ldml/" + type.replace('_', '/') + "/...</h1>");
|
||||
out.println("<p>" +
|
||||
"<a href=\"index.html\">By-Type Chart Index</a> " +
|
||||
"| <a href='http://www.jtcsv.com/cgibin/cldrwiki.pl?InterimVettingCharts'>Interim Vetting Charts</a>" +
|
||||
"| <a href='http://oss.software.ibm.com/cvs/icu/~checkout~/locale/docs/tr35.html'>LDML Specification</a>" +
|
||||
"| <a href='http://www.unicode.org/cldr/filing_bug_reports.html'>Filing Bug Reports</a>" +
|
||||
"| <a href='http://oss.software.ibm.com/cvs/icu/~checkout~/locale/comparison_charts.html'>Cross Platform Charts</a>" +
|
||||
"</p>");
|
||||
out.println("<p>This chart shows values across locales for different fields. " +
|
||||
"Each value is listed under the field designator (in XML XPath format), " +
|
||||
"followed by all the locales that use it. " +
|
||||
"Locales are omitted if the value would be the same as the parent's. " +
|
||||
"The locales are listed in the format: ·aa· for searching. " +
|
||||
"The value appears in red if it is the same as the root. </p>");
|
||||
out.println("<table>");
|
||||
return out;
|
||||
}
|
||||
|
@ -1526,7 +1654,13 @@ import com.ibm.icu.util.UResourceBundle;
|
|||
out.println("<link rel='stylesheet' type='text/css' href='http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/common.css'>");
|
||||
out.println("<link rel='stylesheet' type='text/css' href='by_type.css'>");
|
||||
out.println("</head>");
|
||||
out.println("<body><ul>");
|
||||
out.println("<body><h1>By Type Chart Index</h1>");
|
||||
out.println("<p>The following are charts for the individual datatypes, " +
|
||||
"that show a comparison across locales for different fields. " +
|
||||
"For example, in the orientation chart, one can see that all locales " +
|
||||
"are left-to-right except ·ar· ·fa· ·he· ·ps· (and their children).</p>" +
|
||||
"<p>Note: these charts do not yet include collations</p>");
|
||||
out.println("<ul>");
|
||||
for(Iterator e = allTypes.iterator();e.hasNext();) {
|
||||
String f = (String)e.next();
|
||||
out.println(" <li><a href=\"" + f + ".html" + "\">" + f + "</a>");
|
||||
|
|
Loading…
Add table
Reference in a new issue