ICU-0 update tools

X-SVN-Rev: 16481
This commit is contained in:
Mark Davis 2004-10-14 17:54:55 +00:00
parent 6d31727dc7
commit 5f191b938e
7 changed files with 323 additions and 106 deletions

View file

@ -8,6 +8,7 @@
*/
package com.ibm.icu.dev.tool.cldr;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
@ -37,6 +38,7 @@ import org.xml.sax.helpers.DefaultHandler;
import com.ibm.icu.dev.test.util.BagFormatter;
import com.ibm.icu.dev.tool.UOption;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.text.Collator;
import com.ibm.icu.util.Currency;
import com.ibm.icu.util.ULocale;
@ -103,6 +105,32 @@ import com.ibm.icu.util.UResourceBundle;
log.close();
}
}
public static class TimeZoneAliases {
static Map map = null;
static void init() {
map = new HashMap();
try {
BufferedReader br = BagFormatter.openUTF8Reader("C:\\ICU4J\\icu4j\\src\\com\\ibm\\icu\\dev\\tool\\cldr\\", "timezone_aliases.txt");
String[] pieces = new String[2];
while (true) {
String line = br.readLine();
if (line == null) break;
Utility.split(line,';', pieces);
map.put(pieces[0].trim(), pieces[1].trim());
}
br.close();
map.put("","EMPTY-REMOVE");
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public static String get(String id) {
if (map == null) init();
return (String) map.get(id);
}
}
static MapComparator elementOrdering = new MapComparator();
static MapComparator attributeOrdering = new MapComparator();
@ -185,6 +213,31 @@ import com.ibm.icu.util.UResourceBundle;
GenerateSidewaysView temp = getCLDR(current);
this.removeAll(temp);
}
detectAliases(filename);
}
private void detectAliases(String filename) {
Set problems = new TreeSet();
for (Iterator it = data.iterator(); it.hasNext();) {
ElementChain key = (ElementChain) it.next();
for (int i = 0; i < key.contexts.size(); ++i) {
Element e = (Element) key.contexts.get(i);
if (!e.elementName.equals("zone")) continue;
for (Iterator q = e.attributes.contents.iterator(); q.hasNext(); ) {
SimpleAttribute a = (SimpleAttribute)q.next();
if (!a.name.equals("type")) continue;
String other = TimeZoneAliases.get(a.value);
if (other != null) {
problems.add(a.value);
}
}
}
}
for (Iterator it = problems.iterator(); it.hasNext();) {
String oldOne = (String)it.next();
String newOne = TimeZoneAliases.get(oldOne);
log.println("Fix Timezone Alias: " + filename + "\t" + oldOne + " => " + newOne);
}
}
private void removeAll(GenerateSidewaysView temp) {
@ -246,6 +299,7 @@ import com.ibm.icu.util.UResourceBundle;
public int hashCode() {
return name.hashCode() ^ value.hashCode();
}
public String toString() {return toString(true);}
public String toString(boolean path) {
if (path) {
return "@" + name + "=\"" + BagFormatter.toHTML.transliterate(value) + "\"";
@ -286,6 +340,7 @@ import com.ibm.icu.util.UResourceBundle;
}
}
public String toString() {return toString(true);}
public String toString(boolean path) {
StringBuffer buffer = new StringBuffer();
for (Iterator it = contents.iterator(); it.hasNext();) {
@ -353,6 +408,7 @@ import com.ibm.icu.util.UResourceBundle;
this.elementName = other.elementName;
this.attributes = new SimpleAttributes(other.attributes, elementName);
}
public String toString() {return toString(true);}
public String toString(boolean path) {
return toString(START_VALUE, path);
}
@ -422,6 +478,7 @@ import com.ibm.icu.util.UResourceBundle;
contexts.remove(last);
}
public String toString() {return toString(true);}
public String toString(boolean path) {
StringBuffer buffer = new StringBuffer();
for (int i = 0; i < contexts.size(); ++i) {

View file

@ -0,0 +1,153 @@
ACT; Australia/Darwin
AET; Australia/Sydney
AGT; America/Buenos_Aires
ART; Africa/Cairo
AST; America/Anchorage
America/Atka; America/Adak
America/Ensenada; America/Tijuana
America/Fort_Wayne; America/Indianapolis
America/Indiana/Indianapolis; America/Indianapolis
America/Kentucky/Louisville; America/Louisville
America/Knox_IN; America/Indiana/Knox
America/Porto_Acre; America/Rio_Branco
America/Rosario; America/Cordoba
America/Shiprock; America/Denver
America/Virgin; America/St_Thomas
Antarctica/South_Pole; Antarctica/McMurdo
Arctic/Longyearbyen; Europe/Oslo
Asia/Ashkhabad; Asia/Ashgabat
Asia/Chungking; Asia/Chongqing
Asia/Dacca; Asia/Dhaka
Asia/Istanbul; Europe/Istanbul
Asia/Macao; Asia/Macau
Asia/Tel_Aviv; Asia/Jerusalem
Asia/Thimbu; Asia/Thimphu
Asia/Ujung_Pandang; Asia/Makassar
Asia/Ulan_Bator; Asia/Ulaanbaatar
Australia/ACT; Australia/Sydney
Australia/Canberra; Australia/Sydney
Australia/LHI; Australia/Lord_Howe
Australia/NSW; Australia/Sydney
Australia/North; Australia/Darwin
Australia/Queensland; Australia/Brisbane
Australia/South; Australia/Adelaide
Australia/Tasmania; Australia/Hobart
Australia/Victoria; Australia/Melbourne
Australia/West; Australia/Perth
Australia/Yancowinna; Australia/Broken_Hill
BET; America/Sao_Paulo
BST; Asia/Dhaka
Brazil/Acre; America/Porto_Acre
Brazil/DeNoronha; America/Noronha
Brazil/East; America/Sao_Paulo
Brazil/West; America/Manaus
CAT; Africa/Harare
CNT; America/St_Johns
CST; America/Chicago
CST6CDT; America/Chicago
CTT; Asia/Shanghai
Canada/Atlantic; America/Halifax
Canada/Central; America/Winnipeg
Canada/East-Saskatchewan; America/Regina
Canada/Eastern; America/Toronto
Canada/Mountain; America/Edmonton
Canada/Newfoundland; America/St_Johns
Canada/Pacific; America/Vancouver
Canada/Saskatchewan; America/Regina
Canada/Yukon; America/Whitehorse
Chile/Continental; America/Santiago
Chile/EasterIsland; Pacific/Easter
Cuba; America/Havana
EAT; Africa/Addis_Ababa
ECT; Europe/Paris
EST; America/Indianapolis
EST5EDT; America/New_York
Egypt; Africa/Cairo
Eire; Europe/Dublin
Etc/GMT+0; Etc/GMT
Etc/GMT-0; Etc/GMT
Etc/GMT0; Etc/GMT
Etc/Greenwich; Etc/GMT
Etc/Universal; Etc/UTC
Etc/Zulu; Etc/UTC
Europe/Nicosia; Asia/Nicosia
Europe/Tiraspol; Europe/Chisinau
GB; Europe/London
GB-Eire; Europe/London
GMT; Etc/GMT
GMT+0; Etc/GMT+0
GMT-0; Etc/GMT-0
GMT0; Etc/GMT0
Greenwich; Etc/Greenwich
HST; Pacific/Honolulu
Hongkong; Asia/Hong_Kong
IET; America/Indianapolis
IST; Asia/Calcutta
Iceland; Atlantic/Reykjavik
Iran; Asia/Tehran
Israel; Asia/Jerusalem
JST; Asia/Tokyo
Jamaica; America/Jamaica
Japan; Asia/Tokyo
Kwajalein; Pacific/Kwajalein
Libya; Africa/Tripoli
MIT; Pacific/Apia
MST; America/Phoenix
MST7MDT; America/Denver
Mexico/BajaNorte; America/Tijuana
Mexico/BajaSur; America/Mazatlan
Mexico/General; America/Mexico_City
Mideast/Riyadh87; Asia/Riyadh87
Mideast/Riyadh88; Asia/Riyadh88
Mideast/Riyadh89; Asia/Riyadh89
NET; Asia/Yerevan
NST; Pacific/Auckland
NZ; Pacific/Auckland
NZ-CHAT; Pacific/Chatham
Navajo; America/Denver
PLT; Asia/Karachi
PNT; America/Phoenix
PRC; Asia/Shanghai
PRT; America/Puerto_Rico
PST; America/Los_Angeles
PST8PDT; America/Los_Angeles
Pacific/Samoa; Pacific/Pago_Pago
Poland; Europe/Warsaw
Portugal; Europe/Lisbon
ROC; Asia/Taipei
ROK; Asia/Seoul
SST; Pacific/Guadalcanal
Singapore; Asia/Singapore
SystemV/AST4; America/Puerto_Rico
SystemV/AST4ADT; America/Halifax
SystemV/CST6; America/Regina
SystemV/CST6CDT; America/Chicago
SystemV/EST5; America/Indianapolis
SystemV/EST5EDT; America/New_York
SystemV/HST10; Pacific/Honolulu
SystemV/MST7; America/Phoenix
SystemV/MST7MDT; America/Denver
SystemV/PST8; Pacific/Pitcairn
SystemV/PST8PDT; America/Los_Angeles
SystemV/YST9; Pacific/Gambier
SystemV/YST9YDT; America/Anchorage
Turkey; Europe/Istanbul
UCT; Etc/UCT
US/Alaska; America/Anchorage
US/Aleutian; America/Adak
US/Arizona; America/Phoenix
US/Central; America/Chicago
US/East-Indiana; America/Indianapolis
US/Eastern; America/New_York
US/Hawaii; Pacific/Honolulu
US/Indiana-Starke; America/Indiana/Knox
US/Michigan; America/Detroit
US/Mountain; America/Denver
US/Pacific; America/Los_Angeles
US/Pacific-New; America/Los_Angeles
US/Samoa; Pacific/Pago_Pago
UTC; Etc/UTC
Universal; Etc/Universal
VST; Asia/Saigon
W-SU; Europe/Moscow
Zulu; Etc/Zulu

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
* $Date: 2004/02/07 01:01:14 $
* $Revision: 1.34 $
* $Date: 2004/10/14 17:54:56 $
* $Revision: 1.35 $
*
*******************************************************************************
*/
@ -169,7 +169,7 @@ public final class Main implements UCD_Types {
else if (arg.equalsIgnoreCase("JavascriptProperties")) WriteJavaScriptInfo.assigned();
else if (arg.equalsIgnoreCase("TestDirectoryIterator")) DirectoryIterator.test();
else if (arg.equalsIgnoreCase("checkIdentical")) GenerateData.handleIdentical();
else if (arg.equalsIgnoreCase("testnameuniqueness")) TestNameUniqueness.test();
else if (arg.equalsIgnoreCase("testnameuniqueness")) TestNameUniqueness.checkNameList();
else if (arg.equalsIgnoreCase("checkDifferences")) GenerateData.checkDifferences("3.2.0");
else if (arg.equalsIgnoreCase("Compare14652")) Compare14652.main(null);

View file

@ -1188,9 +1188,11 @@ public class MakeUnicodeFiles {
}
}
static final UnicodeSet INVARIANT_RELATIONS = new UnicodeSet("[\\= \\! \\? \\< \\> \u2264 \u2265 \u2282 \u2286 \u2283 \u2287]");
static final UnicodeSet INVARIANT_RELATIONS = new UnicodeSet("[\\~ \\= \\! \\? \\< \\> \u2264 \u2265 \u2282 \u2286 \u2283 \u2287]");
public static void testInvariants() throws IOException {
String[][] variables = new String[100][2];
int variableCount = 0;
PrintWriter out = BagFormatter.openUTF8Writer(UCD_Types.GEN_DIR, "UnicodeInvariantResults.txt");
out.write('\uFEFF'); // BOM
BufferedReader in = BagFormatter.openUTF8Reader("", "UnicodeInvariants.txt");
@ -1202,8 +1204,6 @@ public class MakeUnicodeFiles {
int parseErrorCount = 0;
int testFailureCount = 0;
while (true) {
String rightSide = null;
String leftSide = null;
String line = in.readLine();
if (line == null) break;
if (line.startsWith("\uFEFF")) line = line.substring(1);
@ -1213,7 +1213,24 @@ public class MakeUnicodeFiles {
if (pos >= 0) line = line.substring(0,pos).trim();
if (line.length() == 0) continue;
// fix all the variables
String oldLine = line;
line = Utility.replace(line, variables, variableCount);
// detect variables
if (line.startsWith("Let")) {
int x = line.indexOf('=');
variables[variableCount][0] = line.substring(3,x).trim();
variables[variableCount][1] = line.substring(x+1).trim();
variableCount++;
System.out.println("Added variable: <" + variables[variableCount-1][0] + "><"
+ variables[variableCount-1][1] + ">");
continue;
}
char relation = 0;
String rightSide = null;
String leftSide = null;
UnicodeSet leftSet = null;
UnicodeSet rightSet = null;
try {

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestNameUniqueness.java,v $
* $Date: 2004/02/07 01:01:13 $
* $Revision: 1.2 $
* $Date: 2004/10/14 17:54:56 $
* $Revision: 1.3 $
*
*******************************************************************************
*/
@ -19,19 +19,97 @@ import java.text.DateFormat;
import java.text.SimpleDateFormat;
import com.ibm.text.utility.*;
import com.ibm.icu.dev.test.util.BagFormatter;
import com.ibm.icu.dev.test.util.UnicodeProperty;
import com.ibm.icu.text.UnicodeSet;
public class TestNameUniqueness implements UCD_Types {
public static void test() throws IOException {
new TestNameUniqueness().checkNames();
public static void main(String[] args) throws Exception {
checkNameList();
// new TestNameUniqueness().checkNames();
}
Map names = new HashMap();
int[] charCount = new int[128];
int[] samples = new int[128];
public static class NameIterator {
int fileCount = -1;
String line;
BufferedReader br;
String[] pieces = new String[3];
/**
* @return null when done
*/
static String[][] files = {
{"C:\\DATA\\", "pdam1040630.lst"},
{"C:\\DATA\\UCD\\4.1.0-Update\\", "NamedCompositeEntities-4.1.0d2.txt"}
};
public String next() {
while (true) {
try {
if (br != null) line = br.readLine();
if (line == null) {
fileCount++;
br = BagFormatter.openReader(files[fileCount][0], files[fileCount][1], "ISO-8859-1");
line = br.readLine();
}
} catch (IOException e) {}
if (line == null) return null;
if (line.length() == 0) continue;
if (fileCount == 0) {
char c = line.charAt(0);
// skip if doesn't start with hex digit
if (!(('0' <= c && c <= '9') || ('A' <= c && c <= 'F'))) continue;
Utility.split(line,'\t',pieces,true);
Utility.split(pieces[1],'(',pieces,true);
Utility.split(pieces[0],'*',pieces,true);
return pieces[0];
} else {
Utility.split(line,';',pieces,true);
return pieces[1];
}
//throw new IllegalArgumentException("Illegal file type");
}
}
}
public static void checkNameList() throws IOException {
Map map = new HashMap();
NameIterator nameIterator = new NameIterator();
int lineCount = 0;
while (true) {
String name = nameIterator.next();
if (name == null) break;
String key;
try {
if (name.startsWith("<")) key = name;
else key = UnicodeProperty.toNameSkeleton(name);
} catch (RuntimeException e) {
System.out.println("Error on " + nameIterator.line);
throw e;
}
Object value = map.get(key);
if (value != null && !key.startsWith("<")) {
System.out.println("*!*!*!* Collision at " + key + " between: ");
System.out.println("\t" + value);
System.out.println("\t" + nameIterator.line);
//throw new IllegalArgumentException();
}
map.put(key, nameIterator.line);
if (nameIterator.line.startsWith("116C")
|| nameIterator.line.startsWith("1180")
|| name.indexOf('-') >= 0
|| (lineCount++ % 1000) == 0) {
System.out.println("[" + lineCount + "]\t" + nameIterator.line + "\t" + name);
System.out.println("\t" + name);
System.out.println("\t" + key);
}
}
}
void checkNames() throws IOException {
PrintWriter out = Utility.openPrintWriter("name_uniqueness.txt", Utility.LATIN1_WINDOWS);
try {

View file

@ -1,92 +0,0 @@
# Invariance tests
# Each line indicates an invariant set relationship to be tested,
# and is of the form:
#
# line := set relation set
#
# relation := '=' // has identical contents to
# := ('>' | '⊃') // is proper superset of
# := ('≥' | '⊇') // is superset of
# := ('<' | '⊂') // is proper subset of
# := ('≤' | '⊆') // is subset of
# := '!' // has no intersection
# := '?' // none of the above (they overlap, and neither contains the other)
#
# A set is a standard UnicodeSet, but where $pv can be used to express properties
#
# pv := '$' '×'? prop (('=' | ':') value)?
#
# The × indicates that the property is the previous released version.
# That is, if the version is 4.0.1, then the × version is 4.0.0
# If the value is missing, it is defaulted to true
# If the value is of the form «...», then the ... is interpreted as a regular expression
# The property can be the short or long form as in the PropertyAliases.txt
# The value (if enumerated) can be the short or long form as in PropertyValueAliases.txt
#
# A UnicodeSet is a boolean combinations of properties and character ranges, as you would see in
# Perl or other regular-expression languages. Examples:
# [$General_Category:Unassigned-[a-zA-Z]]
# For details, see http://oss.software.ibm.com/icu/userguide/unicodeSet.html
#
# WARNING: do not use \p{...} or [:...:] syntax, since those will be
# ICU's current version of properties, not the current snapshot's.
# Use the $ notation for properties (listed above) instead.
#
# When this file is parsed, an error message may contain <@>
# to indicate the location of an error in the input line.
# The following not very interesting, but show examples of use
#$GC:Zs ! $GC:Zp
#$East_Asian_Width:Neutral ? $GC:Uppercase_Letter
$GC:Zs ? $Name:«.*SPACE.*»
[$script:greek&$gc:«.*letter.*»] = [;\u00B5\u00B7\u00C4\u00CB\u00CF\u00D6\u00DC\u00E4\u00EB\u00EF\u00F6\u00FC\u00FF-\u0101\u0112-\u0113\u012A-\u012B\u014C-\u014D\u016A-\u016B\u0178\u01D5-\u01DC\u01DE-\u01E3\u01EC-\u01ED\u022A-\u022D\u0230-\u0233\u0304\u0308\u0313-\u0314\u0342-\u0345\u037A\u037E\u0386-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D7\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F5\u0401\u0407\u0451\u0457\u04D2-\u04D3\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F1\u04F4-\u04F5\u04F8-\u04F9\u1E14-\u1E17\u1E20-\u1E21\u1E26-\u1E27\u1E2E-\u1E2F\u1E38-\u1E39\u1E4E-\u1E53\u1E5C-\u1E5D\u1E7A-\u1E7B\u1E84-\u1E85\u1E8C-\u1E8D\u1E97\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FCC\u1FCF-\u1FD3\u1FD6-\u1FDB\u1FDF-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126]
# Examples of parsing errors
# $LBA:Neutral = $GC:Zp # example of non-existant property
# $LB:foo = $GC:Zp # example of non-existant value
# $GC:Zs @ $GC:Zp # example of unknown relation
# The following should be real invariants
# For illustration, different alias styles are used
$Line_Break:Unknown = [$General_Category:Unassigned $GeneralCategory:PrivateUse]
$LB:OP = $GC:Ps
$General_Category:Decimal_Number = $Numeric_Type:Decimal
$Whitespace ⊃ [$GC:Zs $GC:Zp $GC:Zl]
# Comparisons across versions
$ID_Start ⊇ $×ID_Start
$ID_Continue ⊇ $×ID_Continue
#$age:4.0.1 = $age4.0.0
# Derivations
$Math = [$GC:Sm $Other_Math]
$Alphabetic = [$GC:Lu $GC:Ll $GC:Lt $GC:Lm $GC:Lo $GC:Nl $Other_Alphabetic]
$Lowercase = [$GC:Ll $Other_Lowercase]
$Uppercase = [$GC:Lu $Other_Uppercase]
$ID_Start = [$GC:Lu $GC:Ll $GC:Lt $GC:Lm $GC:Lo $GC:Nl $Other_ID_Start]
$ID_Continue = [$ID_Start $GC:Mn $GC:Mc $GC:Nd $GC:Pc]
$Default_Ignorable_Code_Point = [[$Other_Default_Ignorable_Code_Point $GC:Cf $GC:Cc $GC:Cs $Variation_Selector $Noncharacter_Code_Point] - [$White_Space\uFFF9-\uFFFB]]
$Grapheme_Extend = [$GC:Me $GC:Mn $Other_Grapheme_Extend]
$Grapheme_Base = [^$GC:Cc $GC:Cf $GC:Cs $GC:Co $GC:Cn $GC:Zl $GC:Zp $Grapheme_Extend]
# "Minimal" Other_: NOT hard requirements; just if we want to be minimal
$Other_Math = [$Math - $GC:Sm]
$Other_Alphabetic = [$Alphabetic - [$GC:Lu $GC:Ll $GC:Lt $GC:Lm $GC:Lo $GC:Nl]]
$Other_Lowercase = [$Lowercase - $GC:Ll]
$Other_Uppercase = [$Uppercase - $GC:Lu]
$Other_ID_Start = [$ID_Start - [$GC:Lu $GC:Ll $GC:Lt $GC:Lm $GC:Lo $GC:Nl]]
$Other_Default_Ignorable_Code_Point = [$Default_Ignorable_Code_Point - [[$GC:Cf $GC:Cc $GC:Cs $Variation_Selector $Noncharacter_Code_Point] - [$White_Space\uFFF9-\uFFFB]]]
$Other_Grapheme_Extend = [$Grapheme_Extend - [$GC:Me $GC:Mn]]
# Testing
$script:greek = $×script:greek
$gc:lm = $script:inherited

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
* $Date: 2004/06/26 00:26:16 $
* $Revision: 1.43 $
* $Date: 2004/10/14 17:54:56 $
* $Revision: 1.44 $
*
*******************************************************************************
*/
@ -1083,7 +1083,11 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
}
public static String replace(String source, String[][] replacements) {
for (int i = 0; i < replacements.length; ++i) {
return replace(source, replacements, replacements.length);
}
public static String replace(String source, String[][] replacements, int count) {
for (int i = 0; i < count; ++i) {
source = replace(source, replacements[i][0], replacements[i][1]);
}
return source;